{ "cells": [ { "cell_type": "markdown", "id": "91de22bd", "metadata": {}, "source": [ "# Матрица выводов по сегментам\n", "\n", "Ответ на фидбек организаторов: делаем матрицу по возрастным сегментам и типам каналов, фиксируем закономерности и идеи изменений." ] }, { "cell_type": "code", "execution_count": 1, "id": "16660d45", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T18:42:58.838813Z", "iopub.status.busy": "2025-12-12T18:42:58.838553Z", "iopub.status.idle": "2025-12-12T18:43:05.571711Z", "shell.execute_reply": "2025-12-12T18:43:05.569612Z" } }, "outputs": [], "source": [ "import sqlite3\n", "from pathlib import Path\n", "import sys\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "sns.set_theme(style=\"whitegrid\")\n", "plt.rcParams[\"figure.figsize\"] = (10, 5)\n", "\n", "project_root = Path.cwd().resolve()\n", "if not (project_root / \"preanalysis\").exists():\n", " project_root = project_root.parent\n", "sys.path.append(str(project_root / \"preanalysis\"))\n", "import eda_utils as eda\n", "\n", "db_path = project_root / \"dataset\" / \"ds.sqlite\"\n", "conn = sqlite3.connect(db_path)\n", "df = pd.read_sql_query(\"select * from communications\", conn, parse_dates=[\"business_dt\"])\n", "conn.close()\n", "\n", "age_bins = [15, 25, 35, 45, 55, 120]\n", "age_labels = [\"15-24\", \"25-34\", \"35-44\", \"45-54\", \"55+\"]\n", "df[\"age_segment\"] = pd.cut(df[\"age\"], bins=age_bins, labels=age_labels, right=False, include_lowest=True)\n", "\n", "for cols, name in [\n", " (eda.ACTIVE_IMP_COLS, \"active_imp_total\"),\n", " (eda.PASSIVE_IMP_COLS, \"passive_imp_total\"),\n", " (eda.ACTIVE_CLICK_COLS, \"active_click_total\"),\n", " (eda.PASSIVE_CLICK_COLS, \"passive_click_total\"),\n", " (eda.ORDER_COLS, \"orders_amt_total\"),\n", "]:\n", " df[name] = df[cols].sum(axis=1)\n", "\n", "df[\"imp_total\"] = df[\"active_imp_total\"] + df[\"passive_imp_total\"]\n", "df[\"click_total\"] = df[\"active_click_total\"] + df[\"passive_click_total\"]\n", "\n", "df[\"ctr_active\"] = eda.safe_divide(df[\"active_click_total\"], df[\"active_imp_total\"])\n", "df[\"ctr_passive\"] = eda.safe_divide(df[\"passive_click_total\"], df[\"passive_imp_total\"])\n", "df[\"ctr_all\"] = eda.safe_divide(df[\"click_total\"], df[\"imp_total\"])\n", "df[\"cr_click2order\"] = eda.safe_divide(df[\"orders_amt_total\"], df[\"click_total\"])\n" ] }, { "cell_type": "markdown", "id": "317a8a38", "metadata": {}, "source": [ "## Матрица CTR/CR по 5 возрастным сегментам и каналам" ] }, { "cell_type": "code", "execution_count": 2, "id": "6743acba", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T18:43:05.578992Z", "iopub.status.busy": "2025-12-12T18:43:05.578499Z", "iopub.status.idle": "2025-12-12T18:43:05.614188Z", "shell.execute_reply": "2025-12-12T18:43:05.612157Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1031533/2377539060.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", " segment_perf = df.groupby(\"age_segment\").agg(\n" ] }, { "data": { "text/html": [ "
| \n", " | ctr_active | \n", "ctr_passive | \n", "cr_active | \n", "cr_passive | \n", "
|---|---|---|---|---|
| age_segment | \n", "\n", " | \n", " | \n", " | \n", " |
| 15-24 | \n", "0.669682 | \n", "0.026672 | \n", "0.057846 | \n", "0.627820 | \n", "
| 25-34 | \n", "0.675315 | \n", "0.035316 | \n", "0.087974 | \n", "0.707119 | \n", "
| 35-44 | \n", "0.672207 | \n", "0.037188 | \n", "0.090557 | \n", "0.741876 | \n", "
| 45-54 | \n", "0.668323 | \n", "0.040513 | \n", "0.077198 | \n", "0.631981 | \n", "
| 55+ | \n", "0.668371 | \n", "0.045245 | \n", "0.077028 | \n", "0.594488 | \n", "