<div style="background-color:#1e0d15; padding:20px; border-bottom:3px solid #cc3d56; text-align:center;"><a href="https://www.tmlt.io/"><img src="https://s3.amazonaws.com/tumult.demos/data/images/header.svg" width=200px></a><div style="background-color:#1e0d15; text-align:center; padding-top:15px;  color:white;">copyright 2022 Tumult Labs</div></div>

## Execute Simple Private Quantile Calculation with Tumult Core

### Import the necessary libraries

In [None]:
import pandas as pd
from pyspark.sql import SparkSession

from tmlt.core.privacy_framework.components import (
    PandasDataFrameAggregationByColumn,
    SparkGroupByApplyUDF,
    VectorQuantile,
)
from tmlt.core.privacy_framework.domains import (
    NumpyIntegerDomain,
    PandasDataFrameDomain,
    PandasSeriesDomain,
    SparkDataFrameDomain,
    SparkIntegerColumnDescriptor,
    SparkStringColumnDescriptor,
)
from tmlt.core.privacy_framework.metrics import SymmetricDifference

# If Java 1.8 is not your default, switch to Java 1.8
#%env JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_281.jdk/Contents/Home

### Perform the quantile calculation and print the results

In [None]:
"""Main function."""
spark = SparkSession.builder.config(
    "spark.ui.showConsoleProgress", "false"
).getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

sdf = spark.createDataFrame(  # pylint: disable=no-member
    pd.DataFrame(
        [
            ["F", 28],
            ["F", 26],
            ["F", 27],
            ["M", 23],
            ["F", 29],
            ["M", 22],
            ["M", 24],
            ["M", 25],
        ],
        columns=["Sex", "Age"],
    )
)

print("Dataframe:")
sdf.show()

vector_quantile = VectorQuantile(
    input_domain=PandasSeriesDomain(NumpyIntegerDomain()),
    quantile=0.5,
    lower=22,
    upper=29,
    epsilon=1,
)

df_aggregation_function = PandasDataFrameAggregationByColumn(
    input_domain=PandasDataFrameDomain(
        {"Age": PandasSeriesDomain(NumpyIntegerDomain())}
    ),
    input_metric=SymmetricDifference(),
    column_to_aggregation={"Age": vector_quantile},
)
measure = SparkGroupByApplyUDF(
    input_domain=SparkDataFrameDomain(
        {
            "Sex": SparkStringColumnDescriptor(),
            "Age": SparkIntegerColumnDescriptor(),
        }
    ),
    input_metric=SymmetricDifference(),
    groupby_domains={"Sex": ["M", "F"]},
    aggregation_function=df_aggregation_function,
)

print("Output:")
measure(sdf).withColumnRenamed("Age", "Noisy Median Age").show()

print(f"Privacy relation with epsilon=1: {measure.privacy_relation(1, 1)}")
print(f"Privacy relation with epsilon=.9: {measure.privacy_relation(1, '0.9')}")

<div style="background-color:#1e0d15; padding:20px; border-top:3px solid #cc3d56; text-align:center;"><a href="https://www.tmlt.io/"><img src="https://s3.amazonaws.com/tumult.demos/data/images/footer.svg" width=200px></a></div><div style="background-color:#1e0d15; text-align:center; padding-bottom:20px;  color:white;">Privacy protection, redefined</div>