In [None]:
from tmlt.analytics.query_builder import QueryBuilder
from tmlt.analytics.privacy_budget import PureDPBudget
from tmlt.analytics.session import Session
from tmlt.analytics.keyset import KeySet

In [None]:
from pyspark.sql import SparkSession
import pandas as pd
spark = SparkSession.builder.getOrCreate()
private_data = spark.createDataFrame(pd.DataFrame([["0", 1, 0], ["1", 0, 1]], columns=["A", "B", "X"]))
public_data = spark.createDataFrame(pd.DataFrame([["0", 1], ["1", 0]], columns=["A", "Y"]))
total_budget = 10

### Create Session

In [None]:
sess = Session.from_dataframe(
    privacy_budget=PureDPBudget(total_budget),
    source_id="my_private_data",
    dataframe=private_data,
)

In [None]:
sess.private_sources

In [None]:
sess.add_public_dataframe("my_public_data", public_data)

In [None]:
sess.public_sources

### Create View

In [None]:
join_query = QueryBuilder("my_private_data").join_public("my_public_data")
sess.create_view(join_query, "joined_data", cache=True)

In [None]:
sess.private_sources

In [None]:
sess.get_schema("joined_data")

In [None]:
count_query = QueryBuilder("joined_data").groupby(KeySet.from_dict({"Y": [0, 1]})).count()
sess.evaluate(count_query, privacy_budget=PureDPBudget(total_budget/5)).show()

In [None]:
sess.remaining_privacy_budget

### Partition the Session

In [None]:
new_sessions = sess.partition_and_create(
    "joined_data", 
    privacy_budget=PureDPBudget(3),
    column="Y", 
    splits={"partition_0": 0, "partition_1": 1}
)

In [None]:
sess.remaining_privacy_budget

In [None]:
new_sessions["partition_0"].remaining_privacy_budget

In [None]:
new_sessions["partition_0"].private_sources

In [None]:
new_sessions["partition_0"].get_schema("partition_0")

In [None]:
new_sessions["partition_1"].remaining_privacy_budget

In [None]:
new_sessions["partition_1"].private_sources