scatters for pre-mathmodel
This commit is contained in:
61
new_plots.py
61
new_plots.py
@@ -394,8 +394,69 @@ def generate_category_plots() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_basic_scatters() -> None:
|
||||||
|
"""Повторяем набор из best_model_and_plots: все точки, без выбросов, без выбросов + тренд."""
|
||||||
|
df = prepare_client_data()
|
||||||
|
y_col = "orders_amt_total"
|
||||||
|
x_col = bmp.X_COL
|
||||||
|
x_max = bmp.DEFAULT_X_MAX
|
||||||
|
y_max = bmp.DEFAULT_Y_MAX
|
||||||
|
out_dir = OUTPUT_DIR / y_col
|
||||||
|
|
||||||
|
base = df[[x_col, y_col]].dropna()
|
||||||
|
base = bmp.filter_x_range(base, x_col, x_max)
|
||||||
|
base = base.copy()
|
||||||
|
base["alpha"] = compute_density_alpha(base, x_col, y_col, x_max, y_max)
|
||||||
|
|
||||||
|
def scatter_chart(data: pd.DataFrame, title: str, trend: Tuple[np.ndarray, np.ndarray] | None = None) -> alt.Chart:
|
||||||
|
x_scale = alt.Scale(domain=(0, x_max), clamp=True, nice=False, domainMin=0, domainMax=x_max)
|
||||||
|
y_scale = alt.Scale(domain=(bmp.DEFAULT_Y_MIN, y_max), clamp=True, nice=False)
|
||||||
|
points = alt.Chart(data).mark_circle(size=40).encode(
|
||||||
|
x=alt.X(x_col, title="Среднее число показов в день", scale=x_scale),
|
||||||
|
y=alt.Y(y_col, title=y_col, scale=y_scale),
|
||||||
|
opacity=alt.Opacity("alpha:Q", scale=alt.Scale(domain=(0, 1), clamp=True)),
|
||||||
|
color=alt.value(bmp.DEFAULT_SCATTER_COLOR),
|
||||||
|
tooltip=[x_col, y_col],
|
||||||
|
)
|
||||||
|
layers = [points]
|
||||||
|
if trend is not None and trend[0] is not None:
|
||||||
|
trend_df = pd.DataFrame({x_col: trend[0], "trend": trend[1]})
|
||||||
|
layers.append(
|
||||||
|
alt.Chart(trend_df).mark_line(color=bmp.DEFAULT_TREND_COLOR, strokeWidth=2.5).encode(
|
||||||
|
x=alt.X(x_col, scale=x_scale),
|
||||||
|
y=alt.Y("trend", scale=y_scale),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
chart = alt.layer(*layers).resolve_scale(opacity="independent")
|
||||||
|
return configure_chart(chart, title, width=800, height=600)
|
||||||
|
|
||||||
|
# 1) все точки
|
||||||
|
scatter_chart(base, "Облако: все точки").save(out_dir / "scatter_all.html")
|
||||||
|
inject_font_css(out_dir / "scatter_all.html")
|
||||||
|
|
||||||
|
# 2) без выбросов
|
||||||
|
cleaned = bmp.remove_outliers(base, y_col=y_col, x_col=x_col, iqr_k=bmp.DEFAULT_IQR_K, q_low=bmp.DEFAULT_Q_LOW, q_high=bmp.DEFAULT_Q_HIGH)
|
||||||
|
cleaned = cleaned.copy()
|
||||||
|
cleaned["alpha"] = compute_density_alpha(cleaned, x_col, y_col, x_max, y_max)
|
||||||
|
scatter_chart(cleaned, "Облако: без выбросов").save(out_dir / "scatter_clean.html")
|
||||||
|
inject_font_css(out_dir / "scatter_clean.html")
|
||||||
|
|
||||||
|
# 3) без выбросов + тренд
|
||||||
|
tx, ty = bmp.compute_trend(
|
||||||
|
cleaned,
|
||||||
|
y_col=y_col,
|
||||||
|
x_col=x_col,
|
||||||
|
method=bmp.DEFAULT_TREND_METHOD,
|
||||||
|
lowess_frac=bmp.DEFAULT_TREND_FRAC,
|
||||||
|
savgol_window=bmp.DEFAULT_SAVGOL_WINDOW,
|
||||||
|
)
|
||||||
|
scatter_chart(cleaned, "Облако: без выбросов + тренд", trend=(tx, ty)).save(out_dir / "scatter_clean_trend.html")
|
||||||
|
inject_font_css(out_dir / "scatter_clean_trend.html")
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
generate_basic_scatters()
|
||||||
generate_total_plots()
|
generate_total_plots()
|
||||||
generate_category_plots()
|
generate_category_plots()
|
||||||
|
|
||||||
|
|||||||
44
new_plots/orders_amt_total/scatter_all.html
Normal file
44
new_plots/orders_amt_total/scatter_all.html
Normal file
File diff suppressed because one or more lines are too long
44
new_plots/orders_amt_total/scatter_clean.html
Normal file
44
new_plots/orders_amt_total/scatter_clean.html
Normal file
File diff suppressed because one or more lines are too long
44
new_plots/orders_amt_total/scatter_clean_trend.html
Normal file
44
new_plots/orders_amt_total/scatter_clean_trend.html
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user