In [None]:
import pandas as pd
import altair as alt

import bok.parsers

# Define a conversion rate based on the current global market exchange rate.
IDR_TO_USD_RATE = 1.0/14150

## Extract data from the transactions file
transactions = bok.parsers.parse_transactions_log("data/clean/transactions.log")

## Distribution of transfer sizes

In [None]:
# Remove outliers from the data with the reseller transferring to their partner.
transfers = transactions.loc[
    (transactions["kind"] == "user_transfer") &
    (transactions["dest_user"] !=
     "ff26563a118d01972ef7ac443b65a562d7f19cab327a0115f5c42660c58ce2b8")]

transfers = transfers.reset_index()

alt.data_transformers.disable_max_rows()
alt.Chart(transfers).mark_point(opacity=0.2).encode(
    x=alt.X('dest_user',
            type="nominal",
            ),
    y=alt.Y('amount_idr',
            type="quantitative",
            scale=alt.Scale(type="linear"),
            ),
).display()


In [None]:
binned_transfers = transfers.groupby("amount_idr")["timestamp"].count()

binned_transfers = binned_transfers.reset_index().rename(
    {"timestamp": "count"}, axis="columns"
)

alt.Chart(binned_transfers).mark_bar().encode(
    x=alt.X("amount_idr",
            type="nominal",
            ),
    y=alt.Y("count",
            type="quantitative",
            ),
).display()


## Track each user's IDR balance vs. time

In [None]:
# Split transfers into positive components for the dest and negative for the source
transfers = transactions.loc[
    (transactions["kind"] == "user_transfer") |
    (transactions["kind"] == "admin_transfer")
].reset_index().drop(["index", "amount_bytes", "kind"], axis="columns")

user_ledger = transfers[["timestamp", "dest_user", "amount_idr"]]
user_ledger = user_ledger.rename({"dest_user": "user"}, axis="columns")

temp_ledger = transfers[["timestamp", "user", "amount_idr"]]
temp_ledger["amount_idr"] = -temp_ledger["amount_idr"]

user_ledger = user_ledger.append(temp_ledger).reset_index().drop("index", axis="columns")

# Add topups from the admin as positive user balance
topups = transactions.loc[
    (transactions["kind"] == "admin_topup")
    ].reset_index().drop(["index", "amount_bytes", "user", "kind"], axis="columns")
topups = topups.rename({"dest_user": "user"}, axis="columns")

user_ledger = user_ledger.append(topups).reset_index().drop("index", axis="columns")

# Add purchases as negative balance
purchases = transactions.loc[
    (transactions["kind"] == "purchase")
    ].reset_index().drop(["index", "amount_bytes", "dest_user", "kind"], axis="columns")

purchases["amount_idr"] = -purchases["amount_idr"]

user_ledger = user_ledger.append(purchases).set_index("timestamp")

In [None]:
running_user_balance = user_ledger
running_user_balance = running_user_balance.sort_values("timestamp")
running_user_balance["balance"] = running_user_balance.groupby("user")["amount_idr"].transform(pd.Series.cumsum)

test = running_user_balance.loc[
    running_user_balance["user"] ==
    "5759d99492dc4aace702a0d340eef1d605ba0da32a526667149ba059305a4ccb"]
# aaa16576d20325cbb47b9fc03d431f0728e51265aa2369385ea060e1b5524988
# ff26563a118d01972ef7ac443b65a562d7f19cab327a0115f5c42660c58ce2b8
# 5759d99492dc4aace702a0d340eef1d605ba0da32a526667149ba059305a4ccb

test = test.reset_index().sort_values("timestamp")
pd.set_option('display.max_rows', None)

alt.Chart(test).mark_line(interpolate='step-after').encode(
    x="timestamp",
    y="balance",
    tooltip=["balance", "amount_idr"],
).display()

In [None]:
# Time from purchase to time to zero, compared to 

# How do we characterize the curve between different types of re-up.
# Graph a linear fit versus a highe order polynomial