{ "cells": [ { "cell_type": "markdown", "id": "34468500", "metadata": {}, "source": [ "# Доля пассивных показов и заказы\n", "\n", "**Вопрос:** повышает ли высокая доля пассивных показов вероятность заказа при контроле объёма коммуникаций?\n", "\n", "**Гипотеза:** большая доля пассивных показов связана с большей вероятностью заказа (проверяем ML)." ] }, { "cell_type": "code", "id": "46fb7ac5", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T19:11:43.639846Z", "iopub.status.busy": "2025-12-12T19:11:43.638998Z", "iopub.status.idle": "2025-12-12T19:11:50.215868Z", "shell.execute_reply": "2025-12-12T19:11:50.213723Z" }, "ExecuteTime": { "end_time": "2025-12-12T19:27:46.168843Z", "start_time": "2025-12-12T19:27:44.987935Z" } }, "source": [ "import sqlite3\n", "from pathlib import Path\n", "import sys\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import roc_auc_score\n", "\n", "sns.set_theme(style=\"whitegrid\")\n", "plt.rcParams[\"figure.figsize\"] = (10, 5)\n", "\n", "project_root = Path.cwd().resolve()\n", "while not (project_root / \"preanalysis\").exists() and project_root.parent != project_root:\n", " project_root = project_root.parent\n", " project_root = project_root.parent\n", "sys.path.append(str(project_root / \"preanalysis\"))\n", "import eda_utils as eda\n", "\n", "db_path = project_root / \"dataset\" / \"ds.sqlite\"\n", "conn = sqlite3.connect(db_path)\n", "df = pd.read_sql_query(\"select * from communications\", conn, parse_dates=[\"business_dt\"])\n", "conn.close()\n" ], "outputs": [], "execution_count": 1 }, { "cell_type": "code", "id": "73842cf6", "metadata": { "execution": { "iopub.execute_input": "2025-12-12T19:11:50.222842Z", "iopub.status.busy": "2025-12-12T19:11:50.222356Z", "iopub.status.idle": "2025-12-12T19:11:52.672337Z", "shell.execute_reply": "2025-12-12T19:11:52.670490Z" }, "ExecuteTime": { "end_time": "2025-12-12T19:27:46.794213Z", "start_time": "2025-12-12T19:27:46.179705Z" } }, "source": [ "for cols, name in [\n", " (eda.ACTIVE_IMP_COLS, \"active_imp_total\"),\n", " (eda.PASSIVE_IMP_COLS, \"passive_imp_total\"),\n", " (eda.ACTIVE_CLICK_COLS, \"active_click_total\"),\n", " (eda.PASSIVE_CLICK_COLS, \"passive_click_total\"),\n", " (eda.ORDER_COLS, \"orders_amt_total\"),\n", "]:\n", " df[name] = df[cols].sum(axis=1)\n", "\n", "df[\"imp_total\"] = df[\"active_imp_total\"] + df[\"passive_imp_total\"]\n", "df[\"click_total\"] = df[\"active_click_total\"] + df[\"passive_click_total\"]\n", "\n", "client = df.groupby(\"id\").agg(\n", " {\n", " \"active_imp_total\": \"sum\",\n", " \"passive_imp_total\": \"sum\",\n", " \"active_click_total\": \"sum\",\n", " \"passive_click_total\": \"sum\",\n", " \"orders_amt_total\": \"sum\",\n", " \"imp_total\": \"sum\",\n", " \"click_total\": \"sum\",\n", " \"age\": \"median\",\n", " \"gender_cd\": lambda s: s.mode().iat[0],\n", " \"device_platform_cd\": lambda s: s.mode().iat[0],\n", " }\n", ")\n", "\n", "client[\"passive_share\"] = eda.safe_divide(client[\"passive_imp_total\"], client[\"imp_total\"])\n", "client[\"ctr_all\"] = eda.safe_divide(client[\"click_total\"], client[\"imp_total\"])\n", "client[\"has_order\"] = (client[\"orders_amt_total\"] > 0).astype(int)\n", "client.head()\n" ], "outputs": [ { "data": { "text/plain": [ " active_imp_total passive_imp_total active_click_total \\\n", "id \n", "1 33.0 35.0 14.0 \n", "2 27.0 89.0 19.0 \n", "3 57.0 236.0 37.0 \n", "4 20.0 37.0 14.0 \n", "5 23.0 20.0 13.0 \n", "\n", " passive_click_total orders_amt_total imp_total click_total age \\\n", "id \n", "1 3.0 0 68.0 17.0 58.0 \n", "2 4.0 3 116.0 23.0 54.0 \n", "3 0.0 2 293.0 37.0 70.0 \n", "4 1.0 0 57.0 15.0 43.0 \n", "5 3.0 1 43.0 16.0 46.0 \n", "\n", " gender_cd device_platform_cd passive_share ctr_all has_order \n", "id \n", "1 M Android 0.514706 0.250000 0 \n", "2 M Android 0.767241 0.198276 1 \n", "3 F Android 0.805461 0.126280 1 \n", "4 F Android 0.649123 0.263158 0 \n", "5 M Android 0.465116 0.372093 1 " ], "text/html": [ "
| \n", " | active_imp_total | \n", "passive_imp_total | \n", "active_click_total | \n", "passive_click_total | \n", "orders_amt_total | \n", "imp_total | \n", "click_total | \n", "age | \n", "gender_cd | \n", "device_platform_cd | \n", "passive_share | \n", "ctr_all | \n", "has_order | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 1 | \n", "33.0 | \n", "35.0 | \n", "14.0 | \n", "3.0 | \n", "0 | \n", "68.0 | \n", "17.0 | \n", "58.0 | \n", "M | \n", "Android | \n", "0.514706 | \n", "0.250000 | \n", "0 | \n", "
| 2 | \n", "27.0 | \n", "89.0 | \n", "19.0 | \n", "4.0 | \n", "3 | \n", "116.0 | \n", "23.0 | \n", "54.0 | \n", "M | \n", "Android | \n", "0.767241 | \n", "0.198276 | \n", "1 | \n", "
| 3 | \n", "57.0 | \n", "236.0 | \n", "37.0 | \n", "0.0 | \n", "2 | \n", "293.0 | \n", "37.0 | \n", "70.0 | \n", "F | \n", "Android | \n", "0.805461 | \n", "0.126280 | \n", "1 | \n", "
| 4 | \n", "20.0 | \n", "37.0 | \n", "14.0 | \n", "1.0 | \n", "0 | \n", "57.0 | \n", "15.0 | \n", "43.0 | \n", "F | \n", "Android | \n", "0.649123 | \n", "0.263158 | \n", "0 | \n", "
| 5 | \n", "23.0 | \n", "20.0 | \n", "13.0 | \n", "3.0 | \n", "1 | \n", "43.0 | \n", "16.0 | \n", "46.0 | \n", "M | \n", "Android | \n", "0.465116 | \n", "0.372093 | \n", "1 | \n", "