diff --git a/new_plots.py b/new_plots.py index 4e25704..2ca1481 100644 --- a/new_plots.py +++ b/new_plots.py @@ -394,8 +394,69 @@ def generate_category_plots() -> None: ) +def generate_basic_scatters() -> None: + """Повторяем набор из best_model_and_plots: все точки, без выбросов, без выбросов + тренд.""" + df = prepare_client_data() + y_col = "orders_amt_total" + x_col = bmp.X_COL + x_max = bmp.DEFAULT_X_MAX + y_max = bmp.DEFAULT_Y_MAX + out_dir = OUTPUT_DIR / y_col + + base = df[[x_col, y_col]].dropna() + base = bmp.filter_x_range(base, x_col, x_max) + base = base.copy() + base["alpha"] = compute_density_alpha(base, x_col, y_col, x_max, y_max) + + def scatter_chart(data: pd.DataFrame, title: str, trend: Tuple[np.ndarray, np.ndarray] | None = None) -> alt.Chart: + x_scale = alt.Scale(domain=(0, x_max), clamp=True, nice=False, domainMin=0, domainMax=x_max) + y_scale = alt.Scale(domain=(bmp.DEFAULT_Y_MIN, y_max), clamp=True, nice=False) + points = alt.Chart(data).mark_circle(size=40).encode( + x=alt.X(x_col, title="Среднее число показов в день", scale=x_scale), + y=alt.Y(y_col, title=y_col, scale=y_scale), + opacity=alt.Opacity("alpha:Q", scale=alt.Scale(domain=(0, 1), clamp=True)), + color=alt.value(bmp.DEFAULT_SCATTER_COLOR), + tooltip=[x_col, y_col], + ) + layers = [points] + if trend is not None and trend[0] is not None: + trend_df = pd.DataFrame({x_col: trend[0], "trend": trend[1]}) + layers.append( + alt.Chart(trend_df).mark_line(color=bmp.DEFAULT_TREND_COLOR, strokeWidth=2.5).encode( + x=alt.X(x_col, scale=x_scale), + y=alt.Y("trend", scale=y_scale), + ) + ) + chart = alt.layer(*layers).resolve_scale(opacity="independent") + return configure_chart(chart, title, width=800, height=600) + + # 1) все точки + scatter_chart(base, "Облако: все точки").save(out_dir / "scatter_all.html") + inject_font_css(out_dir / "scatter_all.html") + + # 2) без выбросов + cleaned = bmp.remove_outliers(base, y_col=y_col, x_col=x_col, iqr_k=bmp.DEFAULT_IQR_K, q_low=bmp.DEFAULT_Q_LOW, q_high=bmp.DEFAULT_Q_HIGH) + cleaned = cleaned.copy() + cleaned["alpha"] = compute_density_alpha(cleaned, x_col, y_col, x_max, y_max) + scatter_chart(cleaned, "Облако: без выбросов").save(out_dir / "scatter_clean.html") + inject_font_css(out_dir / "scatter_clean.html") + + # 3) без выбросов + тренд + tx, ty = bmp.compute_trend( + cleaned, + y_col=y_col, + x_col=x_col, + method=bmp.DEFAULT_TREND_METHOD, + lowess_frac=bmp.DEFAULT_TREND_FRAC, + savgol_window=bmp.DEFAULT_SAVGOL_WINDOW, + ) + scatter_chart(cleaned, "Облако: без выбросов + тренд", trend=(tx, ty)).save(out_dir / "scatter_clean_trend.html") + inject_font_css(out_dir / "scatter_clean_trend.html") + + def main() -> None: OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + generate_basic_scatters() generate_total_plots() generate_category_plots() diff --git a/new_plots/orders_amt_total/scatter_all.html b/new_plots/orders_amt_total/scatter_all.html new file mode 100644 index 0000000..11c9f96 --- /dev/null +++ b/new_plots/orders_amt_total/scatter_all.html @@ -0,0 +1,44 @@ + + + + + + + + + + + +
+ + + \ No newline at end of file diff --git a/new_plots/orders_amt_total/scatter_clean.html b/new_plots/orders_amt_total/scatter_clean.html new file mode 100644 index 0000000..54116dc --- /dev/null +++ b/new_plots/orders_amt_total/scatter_clean.html @@ -0,0 +1,44 @@ + + + + + + + + + + + +
+ + + \ No newline at end of file diff --git a/new_plots/orders_amt_total/scatter_clean_trend.html b/new_plots/orders_amt_total/scatter_clean_trend.html new file mode 100644 index 0000000..0fff0f9 --- /dev/null +++ b/new_plots/orders_amt_total/scatter_clean_trend.html @@ -0,0 +1,44 @@ + + + + + + + + + + + +
+ + + \ No newline at end of file