scatters for pre-mathmodel

This commit is contained in:
dan
2025-12-15 18:51:27 +03:00
parent e2a36c74a3
commit 74a99ba229
4 changed files with 193 additions and 0 deletions

View File

@@ -394,8 +394,69 @@ def generate_category_plots() -> None:
)
def generate_basic_scatters() -> None:
"""Повторяем набор из best_model_and_plots: все точки, без выбросов, без выбросов + тренд."""
df = prepare_client_data()
y_col = "orders_amt_total"
x_col = bmp.X_COL
x_max = bmp.DEFAULT_X_MAX
y_max = bmp.DEFAULT_Y_MAX
out_dir = OUTPUT_DIR / y_col
base = df[[x_col, y_col]].dropna()
base = bmp.filter_x_range(base, x_col, x_max)
base = base.copy()
base["alpha"] = compute_density_alpha(base, x_col, y_col, x_max, y_max)
def scatter_chart(data: pd.DataFrame, title: str, trend: Tuple[np.ndarray, np.ndarray] | None = None) -> alt.Chart:
x_scale = alt.Scale(domain=(0, x_max), clamp=True, nice=False, domainMin=0, domainMax=x_max)
y_scale = alt.Scale(domain=(bmp.DEFAULT_Y_MIN, y_max), clamp=True, nice=False)
points = alt.Chart(data).mark_circle(size=40).encode(
x=alt.X(x_col, title="Среднее число показов в день", scale=x_scale),
y=alt.Y(y_col, title=y_col, scale=y_scale),
opacity=alt.Opacity("alpha:Q", scale=alt.Scale(domain=(0, 1), clamp=True)),
color=alt.value(bmp.DEFAULT_SCATTER_COLOR),
tooltip=[x_col, y_col],
)
layers = [points]
if trend is not None and trend[0] is not None:
trend_df = pd.DataFrame({x_col: trend[0], "trend": trend[1]})
layers.append(
alt.Chart(trend_df).mark_line(color=bmp.DEFAULT_TREND_COLOR, strokeWidth=2.5).encode(
x=alt.X(x_col, scale=x_scale),
y=alt.Y("trend", scale=y_scale),
)
)
chart = alt.layer(*layers).resolve_scale(opacity="independent")
return configure_chart(chart, title, width=800, height=600)
# 1) все точки
scatter_chart(base, "Облако: все точки").save(out_dir / "scatter_all.html")
inject_font_css(out_dir / "scatter_all.html")
# 2) без выбросов
cleaned = bmp.remove_outliers(base, y_col=y_col, x_col=x_col, iqr_k=bmp.DEFAULT_IQR_K, q_low=bmp.DEFAULT_Q_LOW, q_high=bmp.DEFAULT_Q_HIGH)
cleaned = cleaned.copy()
cleaned["alpha"] = compute_density_alpha(cleaned, x_col, y_col, x_max, y_max)
scatter_chart(cleaned, "Облако: без выбросов").save(out_dir / "scatter_clean.html")
inject_font_css(out_dir / "scatter_clean.html")
# 3) без выбросов + тренд
tx, ty = bmp.compute_trend(
cleaned,
y_col=y_col,
x_col=x_col,
method=bmp.DEFAULT_TREND_METHOD,
lowess_frac=bmp.DEFAULT_TREND_FRAC,
savgol_window=bmp.DEFAULT_SAVGOL_WINDOW,
)
scatter_chart(cleaned, "Облако: без выбросов + тренд", trend=(tx, ty)).save(out_dir / "scatter_clean_trend.html")
inject_font_css(out_dir / "scatter_clean_trend.html")
def main() -> None:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
generate_basic_scatters()
generate_total_plots()
generate_category_plots()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long