{
"cells": [
{
"cell_type": "markdown",
"id": "b62313a3",
"metadata": {},
"source": [
"# Платформа и вероятность заказа\n",
"\n",
"**Вопрос:** даёт ли платформа (Android vs iOS) прирост заказа при одинаковом объёме коммуникаций?\n",
"\n",
"**Гипотеза:** при контроле показов/кликов Android-клиенты конвертируются выше."
]
},
{
"cell_type": "code",
"id": "8c8f09b1",
"metadata": {
"execution": {
"iopub.execute_input": "2025-12-12T19:12:03.874747Z",
"iopub.status.busy": "2025-12-12T19:12:03.874144Z",
"iopub.status.idle": "2025-12-12T19:12:10.515786Z",
"shell.execute_reply": "2025-12-12T19:12:10.513552Z"
},
"ExecuteTime": {
"end_time": "2025-12-12T19:27:18.761737Z",
"start_time": "2025-12-12T19:27:17.400625Z"
}
},
"source": [
"import sqlite3\n",
"from pathlib import Path\n",
"import sys\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import roc_auc_score\n",
"\n",
"sns.set_theme(style=\"whitegrid\")\n",
"plt.rcParams[\"figure.figsize\"] = (10, 5)\n",
"\n",
"project_root = Path.cwd().resolve()\n",
"while not (project_root / \"preanalysis\").exists() and project_root.parent != project_root:\n",
" project_root = project_root.parent\n",
" project_root = project_root.parent\n",
"sys.path.append(str(project_root / \"preanalysis\"))\n",
"import eda_utils as eda\n",
"\n",
"db_path = project_root / \"dataset\" / \"ds.sqlite\"\n",
"conn = sqlite3.connect(db_path)\n",
"df = pd.read_sql_query(\"select * from communications\", conn, parse_dates=[\"business_dt\"])\n",
"conn.close()\n"
],
"outputs": [],
"execution_count": 1
},
{
"cell_type": "code",
"id": "67ed5210",
"metadata": {
"execution": {
"iopub.execute_input": "2025-12-12T19:12:10.521535Z",
"iopub.status.busy": "2025-12-12T19:12:10.521072Z",
"iopub.status.idle": "2025-12-12T19:12:13.018480Z",
"shell.execute_reply": "2025-12-12T19:12:13.016893Z"
},
"ExecuteTime": {
"end_time": "2025-12-12T19:27:19.344169Z",
"start_time": "2025-12-12T19:27:18.770497Z"
}
},
"source": [
"for cols, name in [\n",
" (eda.ACTIVE_IMP_COLS, \"active_imp_total\"),\n",
" (eda.PASSIVE_IMP_COLS, \"passive_imp_total\"),\n",
" (eda.ACTIVE_CLICK_COLS, \"active_click_total\"),\n",
" (eda.PASSIVE_CLICK_COLS, \"passive_click_total\"),\n",
" (eda.ORDER_COLS, \"orders_amt_total\"),\n",
"]:\n",
" df[name] = df[cols].sum(axis=1)\n",
"\n",
"df[\"imp_total\"] = df[\"active_imp_total\"] + df[\"passive_imp_total\"]\n",
"df[\"click_total\"] = df[\"active_click_total\"] + df[\"passive_click_total\"]\n",
"\n",
"client = df.groupby(\"id\").agg(\n",
" {\n",
" \"active_imp_total\": \"sum\",\n",
" \"passive_imp_total\": \"sum\",\n",
" \"active_click_total\": \"sum\",\n",
" \"passive_click_total\": \"sum\",\n",
" \"orders_amt_total\": \"sum\",\n",
" \"imp_total\": \"sum\",\n",
" \"click_total\": \"sum\",\n",
" \"age\": \"median\",\n",
" \"gender_cd\": lambda s: s.mode().iat[0],\n",
" \"device_platform_cd\": lambda s: s.mode().iat[0],\n",
" }\n",
")\n",
"\n",
"client[\"has_order\"] = (client[\"orders_amt_total\"] > 0).astype(int)\n",
"client[\"ctr_all\"] = eda.safe_divide(client[\"click_total\"], client[\"imp_total\"])\n",
"client[\"cr_click2order\"] = eda.safe_divide(client[\"orders_amt_total\"], client[\"click_total\"])\n",
"client.head()\n"
],
"outputs": [
{
"data": {
"text/plain": [
" active_imp_total passive_imp_total active_click_total \\\n",
"id \n",
"1 33.0 35.0 14.0 \n",
"2 27.0 89.0 19.0 \n",
"3 57.0 236.0 37.0 \n",
"4 20.0 37.0 14.0 \n",
"5 23.0 20.0 13.0 \n",
"\n",
" passive_click_total orders_amt_total imp_total click_total age \\\n",
"id \n",
"1 3.0 0 68.0 17.0 58.0 \n",
"2 4.0 3 116.0 23.0 54.0 \n",
"3 0.0 2 293.0 37.0 70.0 \n",
"4 1.0 0 57.0 15.0 43.0 \n",
"5 3.0 1 43.0 16.0 46.0 \n",
"\n",
" gender_cd device_platform_cd has_order ctr_all cr_click2order \n",
"id \n",
"1 M Android 0 0.250000 0.000000 \n",
"2 M Android 1 0.198276 0.130435 \n",
"3 F Android 1 0.126280 0.054054 \n",
"4 F Android 0 0.263158 0.000000 \n",
"5 M Android 1 0.372093 0.062500 "
],
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" active_imp_total | \n",
" passive_imp_total | \n",
" active_click_total | \n",
" passive_click_total | \n",
" orders_amt_total | \n",
" imp_total | \n",
" click_total | \n",
" age | \n",
" gender_cd | \n",
" device_platform_cd | \n",
" has_order | \n",
" ctr_all | \n",
" cr_click2order | \n",
"
\n",
" \n",
" | id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 33.0 | \n",
" 35.0 | \n",
" 14.0 | \n",
" 3.0 | \n",
" 0 | \n",
" 68.0 | \n",
" 17.0 | \n",
" 58.0 | \n",
" M | \n",
" Android | \n",
" 0 | \n",
" 0.250000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 2 | \n",
" 27.0 | \n",
" 89.0 | \n",
" 19.0 | \n",
" 4.0 | \n",
" 3 | \n",
" 116.0 | \n",
" 23.0 | \n",
" 54.0 | \n",
" M | \n",
" Android | \n",
" 1 | \n",
" 0.198276 | \n",
" 0.130435 | \n",
"
\n",
" \n",
" | 3 | \n",
" 57.0 | \n",
" 236.0 | \n",
" 37.0 | \n",
" 0.0 | \n",
" 2 | \n",
" 293.0 | \n",
" 37.0 | \n",
" 70.0 | \n",
" F | \n",
" Android | \n",
" 1 | \n",
" 0.126280 | \n",
" 0.054054 | \n",
"
\n",
" \n",
" | 4 | \n",
" 20.0 | \n",
" 37.0 | \n",
" 14.0 | \n",
" 1.0 | \n",
" 0 | \n",
" 57.0 | \n",
" 15.0 | \n",
" 43.0 | \n",
" F | \n",
" Android | \n",
" 0 | \n",
" 0.263158 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 5 | \n",
" 23.0 | \n",
" 20.0 | \n",
" 13.0 | \n",
" 3.0 | \n",
" 1 | \n",
" 43.0 | \n",
" 16.0 | \n",
" 46.0 | \n",
" M | \n",
" Android | \n",
" 1 | \n",
" 0.372093 | \n",
" 0.062500 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 2
},
{
"cell_type": "markdown",
"id": "ee977b3f",
"metadata": {},
"source": [
"## Заказы по платформам"
]
},
{
"cell_type": "code",
"id": "3cb9ed5d",
"metadata": {
"execution": {
"iopub.execute_input": "2025-12-12T19:12:13.024492Z",
"iopub.status.busy": "2025-12-12T19:12:13.024166Z",
"iopub.status.idle": "2025-12-12T19:12:13.288887Z",
"shell.execute_reply": "2025-12-12T19:12:13.287256Z"
},
"ExecuteTime": {
"end_time": "2025-12-12T19:27:19.479169Z",
"start_time": "2025-12-12T19:27:19.376099Z"
}
},
"source": [
"platform_rate = client.groupby(\"device_platform_cd\")[\"has_order\"].mean().reset_index()\n",
"plt.figure(figsize=(8, 4))\n",
"sns.barplot(data=platform_rate, x=\"device_platform_cd\", y=\"has_order\")\n",
"plt.title(\"Доля клиентов с заказом по платформам\")\n",
"plt.tight_layout()\n",
"plt.show()\n",
"platform_rate\n"
],
"outputs": [
{
"data": {
"text/plain": [
""
],
"image/png": ""
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"data": {
"text/plain": [
" device_platform_cd has_order\n",
"0 Android 0.587575\n",
"1 IOS 0.545270\n",
"2 iOS 0.542612\n",
"3 iPadOS 0.569767"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" device_platform_cd | \n",
" has_order | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Android | \n",
" 0.587575 | \n",
"
\n",
" \n",
" | 1 | \n",
" IOS | \n",
" 0.545270 | \n",
"
\n",
" \n",
" | 2 | \n",
" iOS | \n",
" 0.542612 | \n",
"
\n",
" \n",
" | 3 | \n",
" iPadOS | \n",
" 0.569767 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 3
},
{
"cell_type": "markdown",
"id": "f65ad022",
"metadata": {},
"source": [
"## ML-модель с контролем объёма"
]
},
{
"cell_type": "code",
"id": "eaa4b459",
"metadata": {
"execution": {
"iopub.execute_input": "2025-12-12T19:12:13.294736Z",
"iopub.status.busy": "2025-12-12T19:12:13.294463Z",
"iopub.status.idle": "2025-12-12T19:12:13.423902Z",
"shell.execute_reply": "2025-12-12T19:12:13.421985Z"
},
"ExecuteTime": {
"end_time": "2025-12-12T19:27:19.655814Z",
"start_time": "2025-12-12T19:27:19.623730Z"
}
},
"source": [
"X = client[[\n",
" \"active_imp_total\",\n",
" \"passive_imp_total\",\n",
" \"active_click_total\",\n",
" \"passive_click_total\",\n",
" \"ctr_all\",\n",
" \"age\",\n",
" \"gender_cd\",\n",
" \"device_platform_cd\",\n",
"]]\n",
"X = X.copy()\n",
"X[\"gender_cd\"] = eda.normalize_gender(X[\"gender_cd\"])\n",
"X[\"device_platform_cd\"] = eda.normalize_device(X[\"device_platform_cd\"])\n",
"y = client[\"has_order\"]\n",
"\n",
"numeric_cols = [\"active_imp_total\", \"passive_imp_total\", \"active_click_total\", \"passive_click_total\", \"ctr_all\", \"age\"]\n",
"cat_cols = [\"gender_cd\", \"device_platform_cd\"]\n",
"\n",
"preprocess = ColumnTransformer(\n",
" [\n",
" (\"num\", Pipeline([(\"scaler\", StandardScaler())]), numeric_cols),\n",
" (\"cat\", OneHotEncoder(handle_unknown=\"ignore\"), cat_cols),\n",
" ]\n",
")\n",
"\n",
"model = Pipeline([(\"pre\", preprocess), (\"clf\", LogisticRegression(max_iter=1000))])\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"model.fit(X_train, y_train)\n",
"proba = model.predict_proba(X_test)[:, 1]\n",
"auc = roc_auc_score(y_test, proba)\n",
"coef = model.named_steps[\"clf\"].coef_[0]\n",
"features = model.named_steps[\"pre\"].get_feature_names_out()\n",
"coef_series = pd.Series(coef, index=features).sort_values(key=abs, ascending=False)\n",
"auc, coef_series.head(10)\n"
],
"outputs": [
{
"data": {
"text/plain": [
"(0.681635404420581,\n",
" num__passive_click_total 0.757779\n",
" num__ctr_all -0.257144\n",
" cat__device_platform_cd_Android 0.182476\n",
" cat__gender_cd_M 0.133747\n",
" num__active_click_total 0.119761\n",
" cat__device_platform_cd_iPadOS -0.100109\n",
" num__age -0.071048\n",
" num__passive_imp_total -0.050535\n",
" cat__device_platform_cd_iOS 0.040232\n",
" num__active_imp_total -0.019038\n",
" dtype: float64)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 4
},
{
"cell_type": "markdown",
"id": "ce032735",
"metadata": {},
"source": [
"## Вывод по гипотезе\n",
"- В сырой агрегированной доле заказов Android выше iOS.\n",
"- В модели при контроле объёма коммуникаций и CTR коэффициент при `device_platform_cd_Android` положительный и в топ‑фичах, AUC ~0.69. Гипотеза подтверждается: платформа влияет на вероятность заказа."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}