In [0]:

from pyspark.sql.functions import count
from pyspark.sql import Row
from pyspark.sql import functions
import datetime
from dateutil.relativedelta import relativedelta


test_result = []
device_code_agg_mapping = {'android-phone': 'android-all', 'android-tablet': 'android-all',
                           'ios-phone': 'ios-all', 'ios-tablet': 'ios-all'}


def last_day_of_month(check_month):
    next_month = check_month.replace(day=28) + datetime.timedelta(days=4)
    return next_month - datetime.timedelta(days=next_month.day)


def get_monthly_date_list():
    result = []
    end = datetime.date(2019, 10, 31)
    start = datetime.date(2013, 01, 31)
    while start <= end:
        start = last_day_of_month(start)
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(months=1)
    return result


def get_weekly_date_list():
    result = []
    end = datetime.date(2019, 10, 31)
    start = datetime.date(2013, 01, 12)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(weeks=1)
    return result


def get_daily_date_list():
    result = []
    end = datetime.date(2019, 10, 31)
    start = datetime.date(2015, 12, 27)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(days=1)
    return result


def get_path_date_list(granularity):
    if granularity == 'daily':
        collect_date = get_daily_date_list()
    if granularity == 'weekly':
        collect_date = get_weekly_date_list()
    if granularity == 'monthly':
        collect_date = get_monthly_date_list()
    date_list = [(x[0][:7], x[0]) for x in collect_date]
    return date_list


def check_usage_unified_v1_v3_count(_granularity, date_list):
    unified_v1_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/' \
                   'usage.basic-kpi.v1/fact/granularity={v1_granularity}/date={v1_date}/'
    unified_v3_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/' \
                   'usage.basic-kpi.v5/fact/granularity={v3_granularity}/date={v3_date}/'
    for date in date_list:
        unified_v1_path_parse = unified_v1_path.format(v1_granularity=_granularity, v1_date=date[1])
        unified_v3_path_parse = unified_v3_path.format(v3_granularity=_granularity, v3_date=date[1])

        unified_v1 = spark.read.parquet(unified_v1_path_parse)
        unified_v1_count = unified_v1.count()

        unified_v1 = unified_v1.withColumn(
            'device_code', functions.UserDefinedFunction(lambda x: device_code_agg_mapping[x])(unified_v1['device_code']))
        unified_v1_agg_count = unified_v1.select('app_id', 'device_code', 'country_code').distinct().count()

        unified_v1_total_count = unified_v1_count + unified_v1_agg_count

        unified_v3_count = spark.read.parquet(unified_v3_path_parse).count()

        if unified_v1_total_count != unified_v3_count:
            print 'Count Test FAIL!!!! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                    unified_v1_total_count, unified_v3_count, date[1])
        else:
            print 'date: {} test PASS, unified_v1 data: {}, unified_v3 data: {}'.format(
            date[1], unified_v1_total_count, unified_v3_count)
        test_result.append((_granularity, unified_v1_total_count, unified_v3_count, date[1]))
    df_write_result = spark.createDataFrame(test_result, schema=['type', 'raw_count', 'unified_count', 'date'])

    from aadatapipelinecore.core.utils.retry import retry
    def write_test_result(df_write_result):
        df_write_result.write.format("delta").save(
            "s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0514/daily/",
            mode="append",
            partitionBy=["type"])
    retry(write_test_result, (df_write_result,), {}, interval=10)


granularity_list = ["daily"]
for granularity in granularity_list:
    check_usage_unified_v1_v3_count(granularity, get_path_date_list(granularity))
print 'pass'

In [0]:
%%sh
aws s3 ls s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0514/daily/

In [0]:

df = spark.read.parquet('s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0514/daily/')
result = df.distinct().orderBy('date').collect()
for row in result:
    print row['date'], '\t', row['raw_count'], '\t', row['unified_count']

In [0]:

from pyspark.sql.functions import count
from pyspark.sql import Row
from pyspark.sql import functions
import datetime
from dateutil.relativedelta import relativedelta


test_result = []
device_code_agg_mapping = {'android-phone': 'android-all', 'android-tablet': 'android-all',
                           'ios-phone': 'ios-all', 'ios-tablet': 'ios-all'}


def last_day_of_month(check_month):
    next_month = check_month.replace(day=28) + datetime.timedelta(days=4)
    return next_month - datetime.timedelta(days=next_month.day)


def get_monthly_date_list():
    result = []
    end = datetime.date(2019, 10, 31)
    start = datetime.date(2013, 01, 31)
    while start <= end:
        start = last_day_of_month(start)
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(months=1)
    return result


def get_weekly_date_list():
    result = []
    end = datetime.date(2019, 10, 31)
    start = datetime.date(2013, 01, 12)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(weeks=1)
    return result


def get_daily_date_list():
    result = []
    end = datetime.date(2020, 5, 23)
    start = datetime.date(2020, 1, 1)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(days=1)
    return result


def get_path_date_list(granularity):
    if granularity == 'daily':
        collect_date = get_daily_date_list()
    if granularity == 'weekly':
        collect_date = get_weekly_date_list()
    if granularity == 'monthly':
        collect_date = get_monthly_date_list()
    date_list = [(x[0][:7], x[0]) for x in collect_date]
    return date_list


def check_usage_unified_v1_v3_completeness(_granularity, date_list):
    """
        date_list:
                [(month1,day1), (month1,day2), (month2,day1), (month2,day2)]
        sample:
            [('2015-12', '2015-12-27'), ('2015-12', '2015-12-28'),
            ('2016-12', '2016-12-27'), ('2016-12', '2016-12-28')]
    """
    for date in date_list:
        unified_v1_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/app-tech.usage.basic-kpi.v3/fact/' \
                          'granularity={v1_granularity}/date={v1_date}/'
        unified_v3_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/usage.basic-kpi.v3/fact' \
                          '/granularity={v3_granularity}/date={v3_date}/'
        unified_v1_path_parse = unified_v1_path.format(v1_granularity=_granularity, v1_date=date[1])
        unified_v3_path_parse = unified_v3_path.format(v3_granularity=_granularity, v3_date=date[1])

        unified_v1 = spark.read.parquet(unified_v1_path_parse)
        unified_v1_count = unified_v1.count()
        unified_v3_count = spark.read.format("delta").load(unified_v3_path_parse).count()

        if unified_v1_count != unified_v3_count:
            print 'Completeness Test FAIL!!!! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        else:
            print 'Completeness Test Pass! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        test_result.append((_granularity, unified_v1_count, unified_v3_count, date[1]))
    df_write_result = spark.createDataFrame(test_result, schema=['type', 'raw_count', 'unified_count', 'date'])

    from aadatapipelinecore.core.utils.retry import retry

    def write_test_result(df_write_result):
        df_write_result.write.format("delta").save(
            "s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0609/daily/",
            mode="append",
            partitionBy=["type"])
    retry(write_test_result, (df_write_result,), {}, interval=10)


granularity_list = ["daily"]
for granularity in granularity_list:
    check_usage_unified_v1_v3_completeness(granularity, get_path_date_list(granularity))
print 'pass'

In [0]:

from pyspark.sql.functions import count
from pyspark.sql import Row
from pyspark.sql import functions
import datetime
from dateutil.relativedelta import relativedelta


test_result = []
device_code_agg_mapping = {'android-phone': 'android-all', 'android-tablet': 'android-all',
                           'ios-phone': 'ios-all', 'ios-tablet': 'ios-all'}


def last_day_of_month(check_month):
    next_month = check_month.replace(day=28) + datetime.timedelta(days=4)
    return next_month - datetime.timedelta(days=next_month.day)


def get_monthly_date_list():
    result = []
    end = datetime.date(2020, 4, 30)
    start = datetime.date(2020, 1, 31)
    while start <= end:
        start = last_day_of_month(start)
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(months=1)
    return result


def get_weekly_date_list():
    result = []
    end = datetime.date(2020, 5, 23)
    start = datetime.date(2020, 1, 4)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(weeks=1)
    return result


def get_daily_date_list():
    result = []
    end = datetime.date(2020, 5, 23)
    start = datetime.date(2020, 1, 1)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(days=1)
    return result


def get_path_date_list(granularity):
    if granularity == 'daily':
        collect_date = get_daily_date_list()
    if granularity == 'weekly':
        collect_date = get_weekly_date_list()
    if granularity == 'monthly':
        collect_date = get_monthly_date_list()
    date_list = [(x[0][:7], x[0]) for x in collect_date]
    return date_list


def check_usage_unified_v1_v3_completeness(_granularity, date_list):
    """
        date_list:
                [(month1,day1), (month1,day2), (month2,day1), (month2,day2)]
        sample:
            [('2015-12', '2015-12-27'), ('2015-12', '2015-12-28'),
            ('2016-12', '2016-12-27'), ('2016-12', '2016-12-28')]
    """
    for date in date_list:
        unified_v1_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/app-tech.usage.basic-kpi.v3/fact/' \
                          'granularity={v1_granularity}/date={v1_date}/'
        unified_v3_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/usage.basic-kpi.v3/fact' \
                          '/granularity={v3_granularity}/date={v3_date}/'
        unified_v1_path_parse = unified_v1_path.format(v1_granularity=_granularity, v1_date=date[1])
        unified_v3_path_parse = unified_v3_path.format(v3_granularity=_granularity, v3_date=date[1])

        unified_v1 = spark.read.parquet(unified_v1_path_parse)
        unified_v1_count = unified_v1.count()
        unified_v3_count = spark.read.format("delta").load(unified_v3_path_parse).count()

        if unified_v1_count != unified_v3_count:
            print 'Completeness Test FAIL!!!! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        else:
            print 'Completeness Test Pass! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        test_result.append((_granularity, unified_v1_count, unified_v3_count, date[1]))
    df_write_result = spark.createDataFrame(test_result, schema=['type', 'raw_count', 'unified_count', 'date'])

    from aadatapipelinecore.core.utils.retry import retry

    def write_test_result(df_write_result):
        df_write_result.write.format("delta").save(
            "s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0609/weekly/",
            mode="append",
            partitionBy=["type"])
    retry(write_test_result, (df_write_result,), {}, interval=10)


granularity_list = ["weekly"]
for granularity in granularity_list:
    check_usage_unified_v1_v3_completeness(granularity, get_path_date_list(granularity))
print 'pass'

In [0]:

from pyspark.sql.functions import count
from pyspark.sql import Row
from pyspark.sql import functions
import datetime
from dateutil.relativedelta import relativedelta


test_result = []
device_code_agg_mapping = {'android-phone': 'android-all', 'android-tablet': 'android-all',
                           'ios-phone': 'ios-all', 'ios-tablet': 'ios-all'}


def last_day_of_month(check_month):
    next_month = check_month.replace(day=28) + datetime.timedelta(days=4)
    return next_month - datetime.timedelta(days=next_month.day)


def get_monthly_date_list():
    result = []
    end = datetime.date(2020, 4, 30)
    start = datetime.date(2020, 1, 31)
    while start <= end:
        start = last_day_of_month(start)
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(months=1)
    return result


def get_weekly_date_list():
    result = []
    end = datetime.date(2020, 5, 23)
    start = datetime.date(2020, 1, 4)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(weeks=1)
    return result


def get_daily_date_list():
    result = []
    end = datetime.date(2018, 3, 31)
    start = datetime.date(2018, 2, 1)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(days=1)
    return result


def get_path_date_list(granularity):
    if granularity == 'daily':
        collect_date = get_daily_date_list()
    if granularity == 'weekly':
        collect_date = get_weekly_date_list()
    if granularity == 'monthly':
        collect_date = get_monthly_date_list()
    date_list = [(x[0][:7], x[0]) for x in collect_date]
    return date_list


def check_usage_unified_v1_v3_completeness(_granularity, date_list):
    """
        date_list:
                [(month1,day1), (month1,day2), (month2,day1), (month2,day2)]
        sample:
            [('2015-12', '2015-12-27'), ('2015-12', '2015-12-28'),
            ('2016-12', '2016-12-27'), ('2016-12', '2016-12-28')]
    """
    for date in date_list:
        unified_v1_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/usage.basic-kpi.v1/fact/' \
                          'granularity={v1_granularity}/date={v1_date}/'
        unified_v3_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/usage.basic-kpi.v3/fact' \
                          '/granularity={v3_granularity}/date={v3_date}/'
        unified_v1_path_parse = unified_v1_path.format(v1_granularity=_granularity, v1_date=date[1])
        unified_v3_path_parse = unified_v3_path.format(v3_granularity=_granularity, v3_date=date[1])

        unified_v1 = spark.read.parquet(unified_v1_path_parse)
        unified_v1_count = unified_v1.count()
        unified_v3_count = spark.read.format("delta").load(unified_v3_path_parse).count()

        if unified_v1_count != unified_v3_count:
            print 'Completeness Test FAIL!!!! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        else:
            print 'Completeness Test Pass! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        test_result.append((_granularity, unified_v1_count, unified_v3_count, date[1]))
    df_write_result = spark.createDataFrame(test_result, schema=['type', 'raw_count', 'unified_count', 'date'])

    from aadatapipelinecore.core.utils.retry import retry

    def write_test_result(df_write_result):
        df_write_result.write.format("delta").save(
            "s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0616/daily/",
            mode="append",
            partitionBy=["type"])
    retry(write_test_result, (df_write_result,), {}, interval=10)


granularity_list = ["daily"]
for granularity in granularity_list:
    check_usage_unified_v1_v3_completeness(granularity, get_path_date_list(granularity))
print 'pass'



In [0]:

from pyspark.sql.functions import count
from pyspark.sql import Row
from pyspark.sql import functions
import datetime
from dateutil.relativedelta import relativedelta


test_result = []
device_code_agg_mapping = {'android-phone': 'android-all', 'android-tablet': 'android-all',
                           'ios-phone': 'ios-all', 'ios-tablet': 'ios-all'}


def last_day_of_month(check_month):
    next_month = check_month.replace(day=28) + datetime.timedelta(days=4)
    return next_month - datetime.timedelta(days=next_month.day)


def get_monthly_date_list():
    result = []
    end = datetime.date(2020, 4, 30)
    start = datetime.date(2020, 1, 31)
    while start <= end:
        start = last_day_of_month(start)
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(months=1)
    return result


def get_weekly_date_list():
    result = []
    end = datetime.date(2020, 5, 23)
    start = datetime.date(2020, 1, 4)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(weeks=1)
    return result


def get_daily_date_list():
    result = []
    end = datetime.date(2020, 5, 23)
    start = datetime.date(2020, 1, 1)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(days=1)
    return result


def get_path_date_list(granularity):
    if granularity == 'daily':
        collect_date = get_daily_date_list()
    if granularity == 'weekly':
        collect_date = get_weekly_date_list()
    if granularity == 'monthly':
        collect_date = get_monthly_date_list()
    date_list = [(x[0][:7], x[0]) for x in collect_date]
    return date_list


def check_usage_unified_v1_v3_completeness(_granularity, date_list):
    """
        date_list:
                [(month1,day1), (month1,day2), (month2,day1), (month2,day2)]
        sample:
            [('2015-12', '2015-12-27'), ('2015-12', '2015-12-28'),
            ('2016-12', '2016-12-27'), ('2016-12', '2016-12-28')]
    """
    for date in date_list:
        unified_v1_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/usage.basic-kpi.v1/fact/' \
                          'granularity={v1_granularity}/date={v1_date}/'
        unified_v3_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/usage.basic-kpi.v3/fact' \
                          '/granularity={v3_granularity}/date={v3_date}/'
        unified_v1_path_parse = unified_v1_path.format(v1_granularity=_granularity, v1_date=date[1])
        unified_v3_path_parse = unified_v3_path.format(v3_granularity=_granularity, v3_date=date[1])

        unified_v1 = spark.read.parquet(unified_v1_path_parse)
        unified_v1_count = unified_v1.count()
        unified_v3_count = spark.read.format("delta").load(unified_v3_path_parse).count()

        if unified_v1_count != unified_v3_count:
            print 'Completeness Test FAIL!!!! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        else:
            print 'Completeness Test Pass! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        test_result.append((_granularity, unified_v1_count, unified_v3_count, date[1]))
    df_write_result = spark.createDataFrame(test_result, schema=['type', 'raw_count', 'unified_count', 'date'])

    from aadatapipelinecore.core.utils.retry import retry

    def write_test_result(df_write_result):
        df_write_result.write.format("delta").save(
            "s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0616/daily/",
            mode="append",
            partitionBy=["type"])
    retry(write_test_result, (df_write_result,), {}, interval=10)


granularity_list = ["daily"]
for granularity in granularity_list:
    check_usage_unified_v1_v3_completeness(granularity, get_path_date_list(granularity))
print 'pass'

In [0]:
%%sh
aws s3 ls s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0616/

In [0]:

from pyspark.sql.functions import count
from pyspark.sql import Row
from pyspark.sql import functions
import datetime
from dateutil.relativedelta import relativedelta


test_result = []
device_code_agg_mapping = {'android-phone': 'android-all', 'android-tablet': 'android-all',
                           'ios-phone': 'ios-all', 'ios-tablet': 'ios-all'}


def last_day_of_month(check_month):
    next_month = check_month.replace(day=28) + datetime.timedelta(days=4)
    return next_month - datetime.timedelta(days=next_month.day)


def get_monthly_date_list():
    result = []
    end = datetime.date(2020, 4, 30)
    start = datetime.date(2020, 1, 31)
    while start <= end:
        start = last_day_of_month(start)
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(months=1)
    return result


def get_weekly_date_list():
    result = []
    end = datetime.date(2020, 5, 23)
    start = datetime.date(2020, 1, 4)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(weeks=1)
    return result


def get_daily_date_list():
    result = []
    end = datetime.date(2018, 1, 31)
    start = datetime.date(2015, 12, 27)
    while start <= end:
        month_data_raw = datetime.datetime.strftime(start, '%Y-%m-%d')
        result.append(Row(month_data_raw))
        start += relativedelta(days=1)
    return result


def get_path_date_list(granularity):
    if granularity == 'daily':
        collect_date = get_daily_date_list()
    if granularity == 'weekly':
        collect_date = get_weekly_date_list()
    if granularity == 'monthly':
        collect_date = get_monthly_date_list()
    date_list = [(x[0][:7], x[0]) for x in collect_date]
    return date_list


def check_usage_unified_v1_v3_completeness(_granularity, date_list):
    """
        date_list:
                [(month1,day1), (month1,day2), (month2,day1), (month2,day2)]
        sample:
            [('2015-12', '2015-12-27'), ('2015-12', '2015-12-28'),
            ('2016-12', '2016-12-27'), ('2016-12', '2016-12-28')]
    """
    for date in date_list:
        unified_v1_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/usage.basic-kpi.v1/fact/' \
                          'granularity={v1_granularity}/date={v1_date}/'
        unified_v3_path = 's3://b2c-prod-data-pipeline-unified-usage/unified/usage.basic-kpi.v3/fact' \
                          '/granularity={v3_granularity}/date={v3_date}/'
        unified_v1_path_parse = unified_v1_path.format(v1_granularity=_granularity, v1_date=date[1])
        unified_v3_path_parse = unified_v3_path.format(v3_granularity=_granularity, v3_date=date[1])

        unified_v1 = spark.read.parquet(unified_v1_path_parse)
        unified_v1_count = unified_v1.count()
        unified_v3_count = spark.read.format("delta").load(unified_v3_path_parse).count()

        if unified_v1_count != unified_v3_count:
            print 'Completeness Test FAIL!!!! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        else:
            print 'Completeness Test Pass! unified_v1 data: {}, unified_v3 data: {}, date: {}'.format(
                unified_v1_count, unified_v3_count, date[1])
        test_result.append((_granularity, unified_v1_count, unified_v3_count, date[1]))
    df_write_result = spark.createDataFrame(test_result, schema=['type', 'raw_count', 'unified_count', 'date'])

    from aadatapipelinecore.core.utils.retry import retry

    def write_test_result(df_write_result):
        df_write_result.write.format("delta").save(
            "s3://b2c-prod-data-pipeline-qa/aa.usage/result_usage_unified_v1_v3_count_0616/daily/",
            mode="append",
            partitionBy=["type"])
    retry(write_test_result, (df_write_result,), {}, interval=10)


granularity_list = ["daily"]
for granularity in granularity_list:
    check_usage_unified_v1_v3_completeness(granularity, get_path_date_list(granularity))
print 'pass'