scatters for pre-mathmodel
This commit is contained in:
61
new_plots.py
61
new_plots.py
@@ -394,8 +394,69 @@ def generate_category_plots() -> None:
|
||||
)
|
||||
|
||||
|
||||
def generate_basic_scatters() -> None:
|
||||
"""Повторяем набор из best_model_and_plots: все точки, без выбросов, без выбросов + тренд."""
|
||||
df = prepare_client_data()
|
||||
y_col = "orders_amt_total"
|
||||
x_col = bmp.X_COL
|
||||
x_max = bmp.DEFAULT_X_MAX
|
||||
y_max = bmp.DEFAULT_Y_MAX
|
||||
out_dir = OUTPUT_DIR / y_col
|
||||
|
||||
base = df[[x_col, y_col]].dropna()
|
||||
base = bmp.filter_x_range(base, x_col, x_max)
|
||||
base = base.copy()
|
||||
base["alpha"] = compute_density_alpha(base, x_col, y_col, x_max, y_max)
|
||||
|
||||
def scatter_chart(data: pd.DataFrame, title: str, trend: Tuple[np.ndarray, np.ndarray] | None = None) -> alt.Chart:
|
||||
x_scale = alt.Scale(domain=(0, x_max), clamp=True, nice=False, domainMin=0, domainMax=x_max)
|
||||
y_scale = alt.Scale(domain=(bmp.DEFAULT_Y_MIN, y_max), clamp=True, nice=False)
|
||||
points = alt.Chart(data).mark_circle(size=40).encode(
|
||||
x=alt.X(x_col, title="Среднее число показов в день", scale=x_scale),
|
||||
y=alt.Y(y_col, title=y_col, scale=y_scale),
|
||||
opacity=alt.Opacity("alpha:Q", scale=alt.Scale(domain=(0, 1), clamp=True)),
|
||||
color=alt.value(bmp.DEFAULT_SCATTER_COLOR),
|
||||
tooltip=[x_col, y_col],
|
||||
)
|
||||
layers = [points]
|
||||
if trend is not None and trend[0] is not None:
|
||||
trend_df = pd.DataFrame({x_col: trend[0], "trend": trend[1]})
|
||||
layers.append(
|
||||
alt.Chart(trend_df).mark_line(color=bmp.DEFAULT_TREND_COLOR, strokeWidth=2.5).encode(
|
||||
x=alt.X(x_col, scale=x_scale),
|
||||
y=alt.Y("trend", scale=y_scale),
|
||||
)
|
||||
)
|
||||
chart = alt.layer(*layers).resolve_scale(opacity="independent")
|
||||
return configure_chart(chart, title, width=800, height=600)
|
||||
|
||||
# 1) все точки
|
||||
scatter_chart(base, "Облако: все точки").save(out_dir / "scatter_all.html")
|
||||
inject_font_css(out_dir / "scatter_all.html")
|
||||
|
||||
# 2) без выбросов
|
||||
cleaned = bmp.remove_outliers(base, y_col=y_col, x_col=x_col, iqr_k=bmp.DEFAULT_IQR_K, q_low=bmp.DEFAULT_Q_LOW, q_high=bmp.DEFAULT_Q_HIGH)
|
||||
cleaned = cleaned.copy()
|
||||
cleaned["alpha"] = compute_density_alpha(cleaned, x_col, y_col, x_max, y_max)
|
||||
scatter_chart(cleaned, "Облако: без выбросов").save(out_dir / "scatter_clean.html")
|
||||
inject_font_css(out_dir / "scatter_clean.html")
|
||||
|
||||
# 3) без выбросов + тренд
|
||||
tx, ty = bmp.compute_trend(
|
||||
cleaned,
|
||||
y_col=y_col,
|
||||
x_col=x_col,
|
||||
method=bmp.DEFAULT_TREND_METHOD,
|
||||
lowess_frac=bmp.DEFAULT_TREND_FRAC,
|
||||
savgol_window=bmp.DEFAULT_SAVGOL_WINDOW,
|
||||
)
|
||||
scatter_chart(cleaned, "Облако: без выбросов + тренд", trend=(tx, ty)).save(out_dir / "scatter_clean_trend.html")
|
||||
inject_font_css(out_dir / "scatter_clean_trend.html")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
generate_basic_scatters()
|
||||
generate_total_plots()
|
||||
generate_category_plots()
|
||||
|
||||
|
||||
44
new_plots/orders_amt_total/scatter_all.html
Normal file
44
new_plots/orders_amt_total/scatter_all.html
Normal file
File diff suppressed because one or more lines are too long
44
new_plots/orders_amt_total/scatter_clean.html
Normal file
44
new_plots/orders_amt_total/scatter_clean.html
Normal file
File diff suppressed because one or more lines are too long
44
new_plots/orders_amt_total/scatter_clean_trend.html
Normal file
44
new_plots/orders_amt_total/scatter_clean_trend.html
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user