In [1]:
import warnings

import pandas as pd
import pandas_gbq as gbq
import seaborn as sns
from google.cloud import bigquery

client = bigquery.Client()

warnings.filterwarnings("ignore")

# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'

pal = [
    "#be0707",
    "#dc3248",
    "#ef5a80",
    "#f982b2",
    "#fca9dd",
    "#ffcfff",
    "#eebdfb",
    "#daacf9",
    "#c19cf8",
    "#a48ef7",
    "#7e82f7",
]

sns.set_theme(style="white", palette=pal, rc={"figure.figsize": (6, 4)})

from google.cloud import bigquery

project = "analytics-147612"
location = "EU"
client = bigquery.Client(project=project, location=location)


def table_reference(dataset, table):
    return f"{project}.{dataset}.{table}"


def get_table_fields(dataset, table):
    table_ref = client.get_table(table_reference(dataset, table))
    with open(f"/Users/gsokolov/Documents/{table}_fields.txt", "w") as f:
        data = {
            "table_name": table_reference(dataset, table),
            "fields": [field.name for field in table_ref.schema],
        }
        f.write(f"{data}")
    return print(data)

In [2]:
user_ids = pd.read_csv(
    "/Users/gsokolov/Library/CloudStorage/GoogleDrive-gsokolov@ourgapps.com/My Drive/Exported Data/ETH_recom_280224.csv"
)
user_ids.to_gbq(
    "dbt_gsokolov.eth_recom", location="EU", if_exists="append", chunksize=10
)

GenericGBQException: Reason: 404 POST https://bigquery.googleapis.com/upload/bigquery/v2/projects/analytics-147612/jobs?uploadType=multipart: Not found: Dataset analytics-147612:dbt_gsokolov

In [20]:
random_user_ids = pd.read_csv(
    "/Users/gsokolov/Library/CloudStorage/GoogleDrive-gsokolov@ourgapps.com/My Drive/Exported Data/Random_users_280224.csv"
)
random_user_ids.to_gbq(
    "dev_gsokolov.random_users", location="EU", if_exists="append", chunksize=10
)

In [3]:
sql = """
SELECT * FROM dev_gsokolov.user_deals
LIMIT 1000
"""
deals = gbq.read_gbq(sql, project_id="analytics-147612", location="EU")

Downloading: 100%|[32m██████████[0m|


In [8]:
deals["deals"].apply(lambda x: [i for i in x if "ETH" in i["symbol_name"]]).apply(
    lambda x: len(x)
).sum()

578

In [12]:
user_deals_cnt = pd.read_gbq(
    """
SELECT
    ud.user_id
    , SUM(ud.deals_cnt) AS total_deals_cnt
    , SUM(COALESCE(uc.eth_deals_cnt, 0)) AS eth_deals_cnt
FROM dev_gsokolov.user_conversion AS uc
    RIGHT JOIN dev_gsokolov.user_deals AS ud
        ON uc.user_id = ud.user_id
GROUP BY ud.user_id
ORDER BY eth_deals_cnt DESC;
"""
)

In [8]:
user_eth_stats = pd.read_gbq(
    """
SELECT * FROM dev_gsokolov.user_conversion
"""
)

In [10]:
user_stats = pd.read_gbq(
    """
SELECT
    ud.user_id
    , ud.trade_day
    , COUNT(DISTINCT ud.operation_id) AS deals_cnt
    , SUM(ud.volume) AS sum_vol
    , SUM(ud.profit) AS sum_profit
    , AVG(
        CAST(TIMESTAMP_DIFF(ud.close_time_dt, ud.open_time_dt, SECOND) / 60 AS INT64)
    ) AS avg_deal_duration
FROM dev_gsokolov.user_deals_flat AS ud
    LEFT JOIN dev_gsokolov.user_conversion AS uc
        ON ud.user_id = uc.user_id
GROUP BY
    1, 2
"""
)

In [14]:
print(user_eth_stats["user_id"].nunique())
print(user_stats["user_id"].nunique())

1537
17027


In [15]:
avg_eth_vol = (
    user_eth_stats.groupby("user_id")["eth_sum_vol"].sum()
    / user_eth_stats.groupby("user_id")["eth_deals_cnt"].sum()
)

In [5]:
%%sql
select * from dev_gsokolov.user_deals_flat

Unnamed: 0,user_id,operation_id,symbol_name,cmd,volume,open_price,close_price,profit,open_time_dt,close_time_dt,trade_day
0,37951711,17279954,XAUUSD,1,0.01,2035.05000,2035.42000,0.37,2024-02-07 13:53:03.783400,2024-02-07 14:23:05.148900,2024-02-07
1,37951711,17274652,XAUUSD,0,0.01,2034.17000,2035.26000,-1.09,2024-02-07 05:51:01.569100,2024-02-07 13:32:26.187400,2024-02-07
2,37951711,17243910,XAUUSD,1,0.01,2034.61000,2034.23000,-0.38,2024-02-07 05:35:38.351200,2024-02-07 05:51:21.805200,2024-02-07
3,37951711,17242607,USDJPY,0,0.01,147.94900,147.99600,-0.32,2024-02-07 04:57:26.806800,2024-02-07 05:15:53.754800,2024-02-07
4,37951711,17295112,XAUUSD,0,0.02,2033.84000,2039.14000,-10.60,2024-02-07 06:06:25.117200,2024-02-07 16:00:37.773100,2024-02-07
...,...,...,...,...,...,...,...,...,...,...,...
222300,35675515,21552862,XAGUSD,1,0.01,22.42700,22.48800,3.05,2024-02-28 04:58:50.060700,2024-02-28 13:32:10.849800,2024-02-28
222301,35675515,21602543,XAGUSD,1,0.01,22.43000,22.44300,0.65,2024-02-28 14:00:57.206700,2024-02-28 17:06:56.691100,2024-02-28
222302,31518527,21526927,EURUSD,1,0.01,1.08149,1.08157,0.08,2024-02-28 08:25:27.837700,2024-02-28 11:01:21.297400,2024-02-28
222303,31518527,21547000,EURUSD,1,0.01,1.08049,1.08131,0.82,2024-02-28 09:16:01.371500,2024-02-28 13:16:11.879300,2024-02-28


In [ ]:
random_user_deals = pd.read_gbq()

In [6]:
# Calculate the total count of deals and sum of volume of deals per day for every distinct user_id
grouped_by_user_and_day = random_user_deals.groupby(["user_id", "trade_day"])
deals_count = (
    grouped_by_user_and_day["operation_id"].nunique().reset_index(name="total_deals")
)
volume_sum = grouped_by_user_and_day["volume"].sum().reset_index(name="total_volume")

result = pd.merge(deals_count, volume_sum, on=["user_id", "trade_day"])
result

Unnamed: 0,user_id,trade_day,total_deals,total_volume
0,8362,2024-02-01,4,0.04
1,8362,2024-02-02,3,0.03
2,8362,2024-02-05,6,0.06
3,8362,2024-02-08,3,0.03
4,8362,2024-02-12,8,0.08
...,...,...,...,...
34178,38617918,2024-02-27,2,0.02
34179,38619263,2024-02-27,15,0.15
34180,38619263,2024-02-28,13,0.17
34181,38624766,2024-02-27,8,0.12


In [7]:
ethusd_df = random_user_deals[random_user_deals["symbol_name"] == "ETHUSD"]

ethusd_grouped_by_user_and_day = ethusd_df.groupby(["user_id", "trade_day"])
ethusd_deals_count = (
    ethusd_grouped_by_user_and_day["operation_id"]
    .nunique()
    .reset_index(name="ethusd_total_deals")
)
ethusd_volume_sum = (
    ethusd_grouped_by_user_and_day["volume"]
    .sum()
    .reset_index(name="ethusd_total_volume")
)

ethusd_result = pd.merge(
    ethusd_deals_count, ethusd_volume_sum, on=["user_id", "trade_day"]
)
ethusd_result

Unnamed: 0,user_id,trade_day,ethusd_total_deals,ethusd_total_volume
0,206905,2024-02-13,1,0.05
1,325218,2024-02-21,1,0.02
2,325218,2024-02-25,2,0.06
3,462714,2024-02-27,1,0.01
4,713382,2024-02-23,1,0.01
...,...,...,...,...
241,38512775,2024-02-28,3,0.03
242,38528448,2024-02-27,3,0.27
243,38528448,2024-02-28,11,0.41
244,38596586,2024-02-27,1,0.01


In [8]:
total_deals = random_user_deals["operation_id"].nunique()
ethusd_deals = random_user_deals[random_user_deals["symbol_name"] == "ETHUSD"][
    "operation_id"
].nunique()

percent_ethusd_deals = (ethusd_deals / total_deals) * 100

unique_ethusd_users = random_user_deals[random_user_deals["symbol_name"] == "ETHUSD"][
    "user_id"
].nunique()

percent_unique_ethusd_users = (
    unique_ethusd_users / random_user_deals["user_id"].nunique()
) * 100

print("Percentage of all deals that were ETHUSD deals:", percent_ethusd_deals)
print(
    "Percentage of unique users who made at least one ETHUSD deal:",
    percent_unique_ethusd_users,
)

Percentage of all deals that were ETHUSD deals: 0.22360695566103528
Percentage of unique users who made at least one ETHUSD deal: 3.068592057761733


In [19]:
%%sql
WITH users_all AS (
    SELECT DISTINCT user_id
    FROM (
        SELECT *
        FROM dev_gsokolov.eth_recom
        UNION ALL
        SELECT * FROM
            dev_gsokolov.random_users
    )
)

SELECT
    DATE(DATE_TRUNC(
        open_time_dt,
        WEEK (MONDAY)
    )) AS trade_week,
    COUNT(DISTINCT user_id) AS user_cnt,
    COUNT(DISTINCT operation_id) AS deal_cnt
FROM wh_raw.trading_real_raw
WHERE
    DATE(open_time_dt) BETWEEN '2024-01-01' AND '2024-03-01'
    AND user_id IN (SELECT user_id FROM users_all)
GROUP BY 1
ORDER BY 1 DESC

Unnamed: 0,trade_week,user_cnt,deal_cnt
0,2024-02-26,18341,867175
1,2024-02-19,16974,640927
2,2024-02-12,15724,647488
3,2024-02-05,14602,635742
4,2024-01-29,13946,714015
5,2024-01-22,11734,567715
6,2024-01-15,11101,530086
7,2024-01-08,10605,590048
8,2024-01-01,9789,490581


In [39]:
%%sql
SELECT
    properties.variant AS variant
    , SAFE_CAST(c.user_id AS INT64) AS user_id
FROM `analytics-147612`.`bloomreach_raw`.`campaign` c
WHERE campaign_id = '65cdead01d9a4d20fdcccd69'
AND action_id = 74

Unnamed: 0,variant,user_id
0,Control Group,21342381.0
1,Variant A,32832460.0
2,Variant A,16268949.0
3,Control Group,33554437.0
4,Variant A,36236757.0
...,...,...
17099,Variant A,22930674.0
17100,Variant A,36799598.0
17101,Variant A,26937569.0
17102,Variant A,193816.0


In [11]:
%%sql
select *
from dev_gsokolov.ab_users

Unnamed: 0,variant,user_id
0,Variant A,3264150.0
1,Variant A,13080286.0
2,Variant A,24112335.0
3,Variant A,6295244.0
4,Variant A,37842776.0
...,...,...
21954,Control Group,9707224.0
21955,Control Group,34078324.0
21956,Control Group,30302042.0
21957,Control Group,33408883.0


In [12]:
ab_users.to_csv("ab_users.csv", index=False)

In [73]:
%%sql
select * from 
             dev_gsokolov.user_deals

Unnamed: 0,user_id,variant,operation_id,symbol_name,cmd,volume,open_price,close_price,profit,open_time_dt,close_time_dt,trade_day
0,37884737,Variant A,19379176,FILUSD,0,1.00,8.79500,,,2024-03-05 22:18:32.934600,,2024-03-05
1,19402383,Variant B,19139857,XAUUSD,0,0.05,2085.40000,,,2024-03-04 07:24:24.868100,,2024-03-04
2,6402253,Variant A,19411324,EURUSD,1,0.01,1.08630,,,2024-03-06 07:13:28.286300,,2024-03-06
3,6402253,Variant B,19411324,EURUSD,1,0.01,1.08630,,,2024-03-06 07:13:28.286300,,2024-03-06
4,422466,Control Group,19525372,USDJPY,0,0.03,148.59500,,,2024-03-07 05:14:32.882600,,2024-03-07
...,...,...,...,...,...,...,...,...,...,...,...,...
809395,19475916,Variant A,4113166725,ETHUSD,0,0.03,3795.64000,3789.9900,1.70,2024-03-06 15:33:01.000000,2024-03-06 15:48:35.000000,2024-03-06
809396,19475916,Variant A,4113087463,ETHUSD,1,0.03,3829.49000,3830.2100,0.22,2024-03-06 09:01:17.000000,2024-03-06 10:19:31.000000,2024-03-06
809397,19475916,Variant A,4113166740,ETHUSD,0,0.03,3804.93000,3788.3400,4.98,2024-03-06 15:35:04.000000,2024-03-06 15:48:40.000000,2024-03-06
809398,35336659,Control Group,4113210856,BTCUSD,0,0.01,66755.32000,67355.6200,-6.00,2024-03-06 19:50:20.000000,2024-03-06 20:16:04.000000,2024-03-06


In [3]:
%%sql
SELECT
    ab.user_id
     , ab.variant
--      , DATE_TRUNC(d.open_time_dt, DAY) AS deal_day
--      , d.symbol_name
     , COUNT(DISTINCT d.operation_id) AS deals_cnt
     , SUM(d.volume) AS symbol_volume
     , SUM(CASE WHEN d.symbol_name = 'ETHUSD' THEN d.volume ELSE 0 END) as eth_vol
     , MIN(CASE WHEN d.symbol_name = 'ETHUSD' THEN d.open_time_dt ELSE NULL END) as first_eth_date
     , SUM(CASE WHEN d.symbol_name = 'ETHUSD' THEN 1 ELSE 0 END) as eth_deals_cnt
     , IF(SUM(CASE WHEN d.symbol_name = 'ETHUSD' THEN 1 ELSE 0 END) > 0, 1, 0) AS converted
FROM
    dev_gsokolov.ab_users ab
LEFT JOIN dev_gsokolov.user_deals d
ON ab.user_id = d.user_id
AND DATE(d.close_time_dt) BETWEEN '2024-03-05' AND '2024-03-10'
-- AND DATE(d.close_time_dt) BETWEEN '2024-02-26' AND '2024-03-04'
AND d.variant is not null
GROUP BY
    ab.user_id, ab.variant

Unnamed: 0,user_id,variant,deals_cnt,symbol_volume,eth_vol,first_eth_date,eth_deals_cnt,converted
0,3264150.0,Variant A,7,0.17,0.0,,0,0
1,13080286.0,Variant A,5,0.05,0.0,,0,0
2,24112335.0,Variant A,23,0.23,0.0,,0,0
3,6295244.0,Variant A,18,0.18,0.0,,0,0
4,10041125.0,Variant A,8,0.08,0.0,,0,0
...,...,...,...,...,...,...,...,...
21950,2470105.0,Control Group,0,,0.0,,0,0
21951,37273673.0,Control Group,0,,0.0,,0,0
21952,37236953.0,Control Group,0,,0.0,,0,0
21953,34664939.0,Control Group,0,,0.0,,0,0


In [4]:
ab_stats.to_csv("eth_deals_stats.csv", index=False)
ab_stats_filtered = ab_stats[ab_stats["deals_cnt"] > 0]

In [8]:
# Calculations grouped by variant
grouped_stats = (
    ab_stats.groupby("variant")
    .agg(
        user_count=pd.NamedAgg(column="user_id", aggfunc="nunique"),
        avg_vol_ethusd=pd.NamedAgg(column="eth_vol", aggfunc="mean"),
        std_volume_ethusd=pd.NamedAgg(column="eth_vol", aggfunc=lambda x: x.std()),
        total_deals_eth=pd.NamedAgg(column="eth_deals_cnt", aggfunc="sum"),
        vol_eth=pd.NamedAgg(column="eth_vol", aggfunc="sum"),
        total_vol=pd.NamedAgg(column="symbol_volume", aggfunc="sum"),
        total_converted=pd.NamedAgg(column="converted", aggfunc="sum"),
        total_deals=pd.NamedAgg(column="deals_cnt", aggfunc="sum"),
    )
    .round(4)
)
grouped_stats_filtered = (
    ab_stats_filtered.groupby("variant")
    .agg(
        user_count=pd.NamedAgg(column="user_id", aggfunc="nunique"),
        avg_vol_ethusd=pd.NamedAgg(column="eth_vol", aggfunc="mean"),
        std_volume_ethusd=pd.NamedAgg(column="eth_vol", aggfunc=lambda x: x.std()),
        total_deals_eth=pd.NamedAgg(column="eth_deals_cnt", aggfunc="sum"),
        vol_eth=pd.NamedAgg(column="eth_vol", aggfunc="sum"),
        total_vol=pd.NamedAgg(column="symbol_volume", aggfunc="sum"),
        total_converted=pd.NamedAgg(column="converted", aggfunc="sum"),
        total_deals=pd.NamedAgg(column="deals_cnt", aggfunc="sum"),
    )
    .round(4)
)

# display(grouped_stats.reset_index())
display(grouped_stats)
display(grouped_stats_filtered)

Unnamed: 0_level_0,user_count,avg_vol_ethusd,std_volume_ethusd,total_deals_eth,vol_eth,total_vol,total_converted,total_deals
variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control Group,8668,0.0527,1.2762,9518,456.43,8372.03,432,268100
Variant A,8430,0.0477,0.6766,13674,401.9,7739.66,449,260050
Variant B,4855,0.0435,0.6271,6500,211.07,4411.28,218,131449


Unnamed: 0_level_0,user_count,avg_vol_ethusd,std_volume_ethusd,total_deals_eth,vol_eth,total_vol,total_converted,total_deals
variant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Control Group,5807,0.0786,1.5587,9518,456.43,8372.03,432,268100
Variant A,5676,0.0708,0.8236,13674,401.9,7739.66,449,260050
Variant B,3344,0.0631,0.7548,6500,211.07,4411.28,218,131449


In [9]:
grouped_stats.reset_index("variant").to_csv("eth_promo.csv", index=False)
grouped_stats_filtered.reset_index("variant").to_csv(
    "eth_promo_filtered.csv", index=False
)

| variant       | user_count | avg_vol_ethusd | std_volume_ethusd | total_deals_eth | vol_eth | total_vol | total_converted | total_deals |
| ------------- | ---------- | -------------- | ----------------- | --------------- | ------- | --------- | --------------- | ----------- |
| Control Group | 8668       | 0.06           | 1.37              | 10709           | 517.33  | 10081.71  | 468             | 324543      |
| Variant A     | 8430       | 0.05           | 0.78              | 15369           | 457.84  | 9260.24   | 482             | 315362      |
| Variant B     | 4855       | 0.05           | 0.71              | 7447            | 238.41  | 5295.92   | 224             | 156530      |


In [34]:
%%bash
pip install nbconvert
pip install ipython
pip install ipynb

jupyter nbconvert --to script "/Users/gsokolov/Library/CloudStorage/GoogleDrive-gsokolov@ourgapps.com/My Drive/Code/ETH Promo Design.ipynb" \ --clear-output --allow-errors



[NbConvertApp] Converting notebook /Users/gsokolov/Library/CloudStorage/GoogleDrive-gsokolov@ourgapps.com/My Drive/Code/ETH Promo Design.ipynb to script
[NbConvertApp] Writing 9044 bytes to /Users/gsokolov/Library/CloudStorage/GoogleDrive-gsokolov@ourgapps.com/My Drive/Code/ETH Promo Design.py
