In [0]:

from pyspark.sql import types as T
from pyspark.sql import functions as F
from aadatapipelinecore.core.urn import Urn
from aadatapipelinecore.core.pipeline import type_
from applications.common.parser import SqlParser
from applications.common.executor import SqlExecutor
from applications.auto_pipeline.transform import _view
spark.sparkContext.addPyFile("/home/hadoop/bdp/application/libs/python/dependencies.zip")
import aaplproxy
def run(spark, raw_data, sql_text, dry_run=True):
    urn = Urn(namespace=raw_data["namespace"])
    source_data_list = raw_data.pop("source")
    raw_data.update(raw_data.pop("options"))
    _view(spark, sql_text, None, source_data_list)
    context = raw_data
    tasks = SqlParser(spark, sql_text, context).parse()
    if dry_run:
        sql_executor = DryRunSqlExecutor
    else:
        sql_executor = SqlExecutor
    sql_executor(urn, spark, tasks, type_.EventType.TRANSFORM, context).run()
sql_text ="""
WITH domain_id_name_mapping AS (
    SELECT
        db_table(
            engine="plproxy",
            database="aa",
            sql="SELECT domain_id AS domain_id, name AS domain_name FROM aa_domain_metadata WHERE is_disabled IN ('f') and sensitive_status IN (0)"
        )
);
"""


test_date='2020-03-24'
namespace = "aa.store.market-size.v1"
ingest_msg = {
    "namespace": "aa.store.market-size.v1",
    "job_type": "routine",
    "options":{},
    "source": [
        {}
    ]
}
run(spark, ingest_msg, sql_text)
spark.sql("select * from domain_id_name_mapping limit 3").show()
spark.sql("select * from rank_raw limit 3").show()


In [0]:
%md
#### download attribution raw data - download attribution share value from DF
```python
raw_path="s3://b2c-mktint-prod-dca-kpi/download_attribution/week_and_month_routine/v1.0.0/WEEK/2020-04-18/AU/part-00000-038bcf58-df5a-487d-b1c9-534b8e30cac4-c000.snappy.parquet"
spark.read.parquet(raw_path).where("product_id in (1093474684, 20600013167630)").show()
+-------------+------------+-----------+----------+--------------+------------------------------+
|  device_code|country_code|granularity|      date|    product_id|est_non_organic_download_share|
+-------------+------------+-----------+----------+--------------+------------------------------+
|    ios-phone|          au|     weekly|2020-04-18|    1093474684|                    0.48971623|
|android-phone|          au|     weekly|2020-04-18|20600013167630|                     0.3903653|
+-------------+------------+-----------+----------+--------------+------------------------------+
```

#### download attribution raw data - unified store est value from store unified data
```python
base_path="s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.app-est.v1/fact/granularity=daily"
parquet_path="s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.app-est.v1/fact/granularity=daily/date=2020-04-{17,18}"
sql_where="date in ('{}') and app_id in (1093474684 ,20600013167630 ) and country_code='AU' ".format("','".join(['2020-04-18','2020-04-17']))
spark.read.option("basePath", base_path).parquet(parquet_path).where(sql_where).show()
+------------------+--------------+------------+-----------------+-----------------+-------+-----------+---------------+----------+-----------+
|       _identifier|        app_id|country_code|free_app_download|paid_app_download|revenue|revenue_iap|revenue_non_iap|      date|device_code|
+------------------+--------------+------------+-----------------+-----------------+-------+-----------+---------------+----------+-----------+
|220200420045618974|    1093474684|          AU|               89|             null|   null|       null|           null|2020-04-18|    ios-all|
|220200420045717774|    1093474684|          AU|               60|             null|   null|       null|           null|2020-04-17|  ios-phone|
|220200420045618974|    1093474684|          AU|               89|             null|   null|       null|           null|2020-04-18|  ios-phone|
|220200420045717774|    1093474684|          AU|               63|             null|   null|       null|           null|2020-04-17|    ios-all|
|220200420045717774|    1093474684|          AU|                3|             null|   null|       null|           null|2020-04-17| ios-tablet|
|220200421062240465|20600013167630|          AU|             1388|             null|   null|       null|           null|2020-04-18|android-all|
|220200421062340592|20600013167630|          AU|             1234|             null|   null|       null|           null|2020-04-17|android-all|
+------------------+--------------+------------+-----------------+-----------------+-------+-----------+---------------+----------+-----------+
```

#### download attribution unified data:
```python
base_path="s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.download-attribution.v3/fact/"
sql_where="granularity='daily' and  date in ('{}') and app_id in (1093474684 ,20600013167630 ) and country_code='AU' ".format("','".join(['2020-04-18','2020-04-17']))
spark.read.format("delta").load(base_path).where(sql_where).show()

+-----------+------------------+----------+----------+-----------+------------+--------------+-----------------+-----------------+-------+----------------------+
|granularity|       _identifier|data_stage|      date|device_code|country_code|        app_id|free_app_download|paid_app_download|revenue|organic_download_share|
+-----------+------------------+----------+----------+-----------+------------+--------------+-----------------+-----------------+-------+----------------------+
|      daily|220200423104651269|     final|2020-04-18|  ios-phone|          AU|    1093474684|               89|             null|   null|    0.5102837681770325|
|      daily|220200423104651269|     final|2020-04-17|  ios-phone|          AU|    1093474684|               60|             null|   null|    0.5102837681770325|
|      daily|220200423104651269|     final|2020-04-18|android-all|          AU|20600013167630|             1388|             null|   null|    0.6096346974372864|
|      daily|220200423104651269|     final|2020-04-17|android-all|          AU|20600013167630|             1234|             null|   null|    0.6096346974372864|
|      daily|220200423104651269|     final|2020-04-17| ios-tablet|          AU|    1093474684|                3|             null|   null|    0.5102837681770325|
+-----------+------------------+----------+----------+-----------+------------+--------------+-----------------+-----------------+-------+----------------------+

```

In [0]:
%md
Accuracy Test Steps:
1. mapping device code of raw data from DF same as unified data
2. join raw data from DF to raw data from store est data
3. filter data with both free_app_download and paid_app_download are not null, and coalesce the null value to 0 for both raw data 
4. compare data

In [0]:

import datetime
from pyspark.sql import types as T
from pyspark.sql import functions as F

from aadatapipelinecore.core.urn import Urn
from aadatapipelinecore.core.pipeline import type_
from applications.common.parser import SqlParser
from applications.common.executor import SqlExecutor
from applications.auto_pipeline.transform import _view
from aadatapipelinecore.core.utils.spark import eject_all_caches

spark.sparkContext.addPyFile("/home/hadoop/bdp/application/libs/python/dependencies.zip")
import aaplproxy

start = "2016-08-28"
end = "2016-09-05"

real_date1 = datetime.date(*[int(x) for x in start.split('-')])
real_date2 = datetime.date(*[int(x) for x in end.split('-')])
date_range = real_date2 - real_date1
dates = list()
sar_list = list()
for days in xrange(date_range.days):
    dates.append(real_date1 + datetime.timedelta(days))
    if (real_date1 + datetime.timedelta(days)).weekday() == 5:
        temp = list()
        while dates:
            temp.append(dates.pop())
        sar_list.append({real_date1 + datetime.timedelta(days): temp})

test_path = list()

for x in sar_list:
    for key, item in x.items():
        test_path.append(
            (
                [
                    "s3://b2c-mktint-prod-dca-kpi/download_attribution/week_and_month_routine/v1.0.0/WEEK/{}/*/".format(
                        key)],
                [i.strftime("%Y-%m-%d") for i in item],
                [
                    "s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.app-est.v1/fact/granularity=daily/date={}".format(
                        i) for i in item]
            )
        )


class DryRunSqlExecutor(SqlExecutor):
    def _verify_tasks(self):
        pass


def run(spark, raw_data, sql_text, dry_run=True):
    urn = Urn(namespace=raw_data["namespace"])
    source_data_list = raw_data.pop("source")
    raw_data.update(raw_data.pop("options"))
    _view(spark, sql_text, None, source_data_list)
    context = raw_data
    tasks = SqlParser(spark, sql_text, context).parse()
    if dry_run:
        sql_executor = DryRunSqlExecutor
    else:
        sql_executor = SqlExecutor
    sql_executor(urn, spark, tasks, type_.EventType.TRANSFORM, context).run()


def test_download_attribution(test_data):
    # print test_data
    spark.read.option("basePath",
                      "s3://b2c-mktint-prod-dca-kpi/download_attribution/week_and_month_routine/v1.0.0/WEEK/").parquet(
        test_data[0][0]).createOrReplaceTempView("download_attribution")

    print test_data[1]
    spark.read.format("delta").load(
        "s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.download-attribution.v3/fact/").where(
        "granularity='daily' and  date in ('{}') ".format("','".join(
            test_data[1]))).createOrReplaceTempView("test_unified_download_attribution")

    sql_text = """

WITH download_attribution AS 
( 
       SELECT *, 
              Cast(est_non_organic_download_share AS DECIMAL(36,20)) AS new_est_non_organic_download_share
       FROM   download_attribution ); 
       
WITH download_attribution_1 AS 
( 
       SELECT device_code, 
              country_code, 
              granularity, 
              date, 
              product_id, 
              new_est_non_organic_download_share, 
              CASE 
                     WHEN device_code='android-phone' THEN 'android-all' 
                     WHEN device_code='ios-phone' THEN 'ios-tablet' 
              END AS new_device_code 
       FROM   download_attribution ); 
       
       
WITH download_attribution_2 AS 
( 
                SELECT DISTINCT * 
                FROM            ( 
                                       SELECT new_device_code AS device_code, 
                                              country_code, 
                                              granularity, 
                                              date, 
                                              product_id, 
                                              new_est_non_organic_download_share AS est_non_organic_download_share
                                       FROM   download_attribution_1 
                                       UNION ALL 
                                       SELECT device_code, 
                                              country_code, 
                                              granularity, 
                                              date, 
                                              product_id, 
                                              est_non_organic_download_share 
                                       FROM   download_attribution ) AS t1 
                WHERE           device_code!='android-phone' );WITH union_data AS 
( 
                SELECT          *, 
                                store_unified.device_code  AS unified_device_code , 
                                store_unified.country_code AS unified_country_code 
                FROM            store_unified 
                FULL OUTER JOIN download_attribution_2 
                ON              store_unified.device_code=download_attribution_2.device_code 
                AND             store_unified.country_code=Upper(download_attribution_2.country_code)
                AND             store_unified.app_id=download_attribution_2.product_id 
                WHERE           est_non_organic_download_share IS NOT NULL ); 
                
                
WITH calculate_data_prepare AS 
( 
       SELECT app_id, 
              COALESCE(free_app_download, 0)  AS free_app_download, 
              COALESCE(paid_app_download, 0 ) AS paid_app_download, 
              revenue, 
              unified_device_code, 
              unified_country_code, 
              est_non_organic_download_share 
       FROM   union_data 
       WHERE  NOT ( 
                     free_app_download IS NULL 
              AND    paid_app_download IS NULL) );WITH caculate_data AS 
( 
       SELECT app_id, 
              free_app_download, 
              paid_app_download, 
              revenue, 
              unified_device_code, 
              unified_country_code, 
              est_non_organic_download_share 
       FROM   calculate_data_prepare ); 
       
       
WITH compare_data_raw AS 
( 
       SELECT app_id, 
              free_app_download, 
              paid_app_download, 
              unified_device_code                  AS device_code, 
              unified_country_code                 AS country_code, 
              (1 - est_non_organic_download_share) AS organic_download_share 
       FROM   caculate_data ); 
       
       
WITH compare_data_unified AS 
( 
       SELECT app_id, 
              COALESCE(free_app_download, 0 ) AS free_app_download, 
              COALESCE(paid_app_download, 0 ) AS paid_app_download, 
              device_code, 
              country_code, 
              organic_download_share 
       FROM   test_unified_download_attribution );

    """


    namespace = "aa.store.market-size.v1"
    ingest_msg = {
        "namespace": "aa.store.market-size.v1",
        "job_type": "routine",
        "options": {},
        "source": [
            {
                "data_encoding": "parquet",
                "compression": "gzip",
                "name": "store_unified",
                "path": test_data[2]
            }

        ]
    }

    run(spark, ingest_msg, sql_text)

    df_1 = spark.sql("SELECT * FROM compare_data_raw WHERE app_id IS NOT NULL EXCEPT ALL SELECT * FROM compare_data_unified WHERE app_id IS NOT NULL")
    spark.sql("SELECT * FROM compare_data_unified EXCEPT ALL SELECT * FROM compare_data_raw")
    count_1 = spark.sql("SELECT Count(*) FROM compare_data_raw where app_id IS NOT NULL").take(1)
    count_2 = spark.sql("SELECT Count(*) FROM compare_data_unified ").take(1)
    if count_1[0][0] != count_2[0][0]:
        print 'failed!!!!!!!!!!!!!'


    eject_all_caches(spark)


sc.parallelize(map(test_download_attribution, test_path), 1)


In [0]:
%md
#### store est unified data:
```python
base_path="s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.app-est.v1/fact/granularity=daily/date=2020-04-17/"
sql_where="app_id in (1093474684 ,20600013167630) and country_code='AU'"
spark.read.parquet(base_path).where(sql_where).show()
+------------------+--------------+------------+-----------------+-----------------+-------+-----------+---------------+-----------+
|       _identifier|        app_id|country_code|free_app_download|paid_app_download|revenue|revenue_iap|revenue_non_iap|device_code|
+------------------+--------------+------------+-----------------+-----------------+-------+-----------+---------------+-----------+
|220200420045717774|    1093474684|          AU|               60|             null|   null|       null|           null|  ios-phone|
|220200420045717774|    1093474684|          AU|               63|             null|   null|       null|           null|    ios-all|
|220200420045717774|    1093474684|          AU|                3|             null|   null|       null|           null| ios-tablet|
|220200421062340592|20600013167630|          AU|             1234|             null|   null|       null|           null|android-all|
+------------------+--------------+------------+-----------------+-----------------+-------+-----------+---------------+-----------+

```


#### store category unified data:
```python
base_path="s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.app-est-category-rank.v1/fact/granularity=daily/date=2020-04-17"
sql_where="app_id in (1093474684 ,20600013167630) and country_code='AU'"
spark.read.parquet(base_path).where(sql_where).show()
+------------------+--------------+-----------+------------+-----------------+-----------------+-------+-----------+---------------+-----------+
|       _identifier|        app_id|category_id|country_code|free_app_download|paid_app_download|revenue|revenue_iap|revenue_non_iap|device_code|
+------------------+--------------+-----------+------------+-----------------+-----------------+-------+-----------+---------------+-----------+
|220200420045717774|    1093474684|     100000|          AU|             3157|             null|   null|       null|           null|  ios-phone|
|220200420045717774|    1093474684|     100000|          AU|             3898|             null|   null|       null|           null|    ios-all|
|220200420045717774|    1093474684|     100021|          AU|             2417|             null|   null|       null|           null|    ios-all|
|220200420045717774|    1093474684|     100034|          AU|              133|             null|   null|       null|           null|    ios-all|
|220200420045717774|    1093474684|     100021|          AU|             2139|             null|   null|       null|           null|  ios-phone|
|220200420045717774|    1093474684|     100034|          AU|              123|             null|   null|       null|           null|  ios-phone|
|220200420045717774|    1093474684|     100000|          AU|             8142|             null|   null|       null|           null| ios-tablet|
|220200420045717774|    1093474684|     100034|          AU|              219|             null|   null|       null|           null| ios-tablet|
|220200420045717774|    1093474684|     100021|          AU|             4398|             null|   null|       null|           null| ios-tablet|
|220200421062340592|20600013167630|     400000|          AU|              116|             null|   null|       null|           null|android-all|
|220200421062340592|20600013167630|     400001|          AU|               38|             null|   null|       null|           null|android-all|
|220200421062340592|20600013167630|     400002|          AU|                4|             null|   null|       null|           null|android-all|
+------------------+--------------+-----------+------------+-----------------+-----------------+-------+-----------+---------------+-----------+
```


In [0]:
%md

Count Test Steps:
1. Join store category / est table, according to category rank get top N, ignore ios-all data
2. Set metric as null for category temp table if metric rank is not in top N
3. filter data with both free_app_download and paid_app_download are not null, and coalesce the null value to 0 for both raw data
4. Use count to get est/category table count
5. Use the count to compare with data from DB
6. Save the test data to qa bucket



In [0]:

from pyspark.sql import types as T
from pyspark.sql import functions as F
import datetime

from aadatapipelinecore.core.urn import Urn
from aadatapipelinecore.core.pipeline import type_
from applications.common.parser import SqlParser
from applications.common.executor import SqlExecutor
from applications.auto_pipeline.transform import _view

spark.sparkContext.addPyFile("/home/hadoop/bdp/application/libs/python/dependencies.zip")
import aaplproxy

class DryRunSqlExecutor(SqlExecutor):
    def _verify_tasks(self):
        pass
def run(spark, raw_data, sql_text, dry_run=True):
    urn = Urn(namespace=raw_data["namespace"])
    source_data_list = raw_data.pop("source")
    raw_data.update(raw_data.pop("options"))
    _view(spark, sql_text, None, source_data_list)
    context = raw_data
    tasks = SqlParser(spark, sql_text, context).parse()
    if dry_run:
        sql_executor = DryRunSqlExecutor
    else:
        sql_executor = SqlExecutor
    sql_executor(urn, spark, tasks, type_.EventType.TRANSFORM, context).run()


start='2013-01-01'
end='2013-01-02'
real_date1 = datetime.date(*[int(x) for x in start.split('-')])
real_date2 = datetime.date(*[int(x) for x in end.split('-')])
date_range = real_date2 - real_date1
dates = list()
for days in xrange(date_range.days):
    dates.append(real_date1 + datetime.timedelta(days))
dates.reverse()


def test_count(test_date):
    # test_date = '2017-08-01'
    namespace = "aa.store.market-size.v1"
    
    ingest_msg = {
        "namespace": "aa.store.market-size.v1",
        "job_type": "routine",
        "options": {},
        "source": [
            {
                "data_encoding": "parquet",
                "compression": "gzip",
                "name": "store_unified",
                "path": [
                    "s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.app-est.v1/fact/granularity=daily/date={}".format(
                        test_date)],

            }, {
                "data_encoding": "parquet",
                "compression": "gzip",
                "name": "rank_unified",
                "path": [
                    "s3://b2c-prod-data-pipeline-unified-store-paid/unified/store.app-est-category-rank.v1/fact/granularity=daily/date={}".format(
                        test_date)],

            }, {
                "data_encoding": "parquet",
                "compression": "gzip",
                "name": "category_mapping_deminsion_service",
                "path": ["s3://b2c-prod-data-pipeline-qa/aa.store/store_cateogry_mapping"],
            },
            {
                "data_encoding": "csv",
                "compression": "gzip",
                "name": "ios_country_mapping",
                "data_schema": [
                    {"name": "store_id", "type": "int", "nullable": False},
                    {"name": "country_code", "type": "string", "nullable": False},
                    {"name": "country_name", "type": "string", "nullable": False}
                ],
                "csv_options": {
                    'header': True,
                    'sep': '\t',
                    'quote': '',
                    'encoding': 'utf-8',
                    'escape': ''
                },
    
                "path": ["s3://b2c-prod-dca-store-estimates/store_back/dimension/IOS_COUNTRY_MAPPING"],
            },
            {
                "data_encoding": "csv",
                "compression": "gzip",
                "name": "android_country_mapping",
                "data_schema": [
                    {"name": "store_id", "type": "int", "nullable": False},
                    {"name": "country_code", "type": "string", "nullable": False},
                    {"name": "country_name", "type": "string", "nullable": False}
                ],
                "csv_options": {
                    'header': True,
                    'sep': '\t',
                    'quote': '',
                    'encoding': 'utf-8',
                    'escape': ''
                },
    
                "path": ["s3://b2c-prod-dca-store-estimates/store_back/dimension/ANDROID_COUNTRY_MAPPING"],
            }
        ]
    }
    
    sql_text = """
    
    
    -- rank_unified,store_unified
    WITH unified_data_test AS 
    ( 
                    SELECT          store_unified.country_code, 
                                    store_unified.device_code, 
                                    store_unified.free_app_download AS est_free_app_download , 
                                    store_unified.paid_app_download AS est_paid_app_download, 
                                    store_unified.revenue           AS est_revenue, 
                                    store_unified.revenue_iap       AS est_revenue_iap, 
                                    store_unified.revenue_non_iap   AS est_revenue_non_iap, 
                                    rank_unified.category_id, 
                                    rank_unified.app_id, 
                                    rank_unified.free_app_download, 
                                    rank_unified.paid_app_download, 
                                    rank_unified.revenue, 
                                    rank_unified.revenue_iap, 
                                    rank_unified.revenue_non_iap, 
                                    rank_unified.granularity, 
                                    rank_unified.date 
                    FROM            rank_unified 
                    FULL OUTER JOIN store_unified 
                    ON              rank_unified.app_id = store_unified.app_id 
                    AND             rank_unified.country_code = store_unified.country_code 
                    AND             rank_unified.device_code = store_unified.device_code 
                    AND             rank_unified.date = store_unified.date );
    
    
    
    WITH unified_rank_filter_data_free_app_download AS 
    ( 
           SELECT * 
           FROM   unified_data_test 
           WHERE ( ( ( 
                                free_app_download<=1000 
                         AND    country_code!="WW" ) 
                  OR     ( 
                                free_app_download<=4000 
                         AND    country_code=="WW" ) ) 
           OR     ( ( 
                                paid_app_download<=1000 
                         AND    country_code!="WW" ) 
                  OR     ( 
                                paid_app_download<=4000 
                         AND    country_code=="WW" ) ) 
           OR     ( ( 
                                revenue<=1000 
                         AND    country_code!="WW" ) 
                  OR     ( 
                                revenue<=4000 
                         AND    country_code=="WW" ) ) )
           AND    device_code!='ios-all'
    );
    
    
    
           WITH unified_category_filter_data_free_app_download AS 
    ( 
           SELECT * ,
           CASE WHEN (free_app_download > 1000 and country_code !='WW') or (free_app_download > 4000 and country_code =='WW' ) or (free_app_download is null or free_app_download <= 0) Then null else est_free_app_download END AS est_free_app_download_category,
           CASE WHEN (paid_app_download > 1000 and country_code !='WW') or (paid_app_download > 4000 and country_code =='WW' ) or (paid_app_download is null or paid_app_download <= 0) Then null else est_paid_app_download END AS est_paid_app_download_category,
           CASE WHEN (revenue > 1000 and country_code !='WW') or (revenue > 4000 and country_code =='WW') or (revenue is null or revenue <= 0) Then null else est_revenue  END AS est_revenue_category
           FROM   unified_rank_filter_data_free_app_download 


    );    

    
    """


    run(spark, ingest_msg, sql_text)
    # current est test:
    free_app_download_count = spark.sql(
        "SELECT Count(app_id) FROM (SELECT DISTINCT app_id, country_code, device_code, est_free_app_download from unified_rank_filter_data_free_app_download WHERE est_free_app_download IS NOT NULL ) AS test").take(
        1)
    paid_app_download_count = spark.sql(
        "SELECT Count(app_id) FROM (SELECT DISTINCT app_id, country_code, device_code, est_paid_app_download from unified_rank_filter_data_free_app_download WHERE est_paid_app_download IS NOT NULL ) AS test").take(
        1)
    revenue_count = spark.sql(
        "SELECT Count(app_id) FROM (SELECT DISTINCT app_id, country_code, device_code, est_revenue from unified_rank_filter_data_free_app_download WHERE est_revenue IS NOT NULL ) AS test ").take(
        1)
        
        

    free_app_download_category_count = spark.sql(
        "SELECT Count(*) FROM (SELECT app_id, country_code, device_code, category_id, est_free_app_download FROM unified_category_filter_data_free_app_download WHERE est_free_app_download_category IS NOT NULL ) AS test").take(
        1)
    paid_app_download_category_count = spark.sql(
        "SELECT Count(*) FROM (SELECT app_id, country_code, device_code, category_id, est_paid_app_download FROM unified_category_filter_data_free_app_download WHERE est_paid_app_download_category IS NOT NULL ) AS test").take(
        1)
    revenue_category_count = spark.sql(
        "SELECT Count(*) FROM (SELECT app_id, country_code, device_code, category_id, est_revenue FROM unified_category_filter_data_free_app_download WHERE est_revenue_category IS NOT NULL ) AS test ").take(
        1)
    
    import psycopg2
    import datetime
    
    spark.sparkContext.addPyFile("/home/hadoop/bdp/application/libs/python/dependencies.zip")
    import aaplproxy
    from aadatapipelinecore.core.urn import Urn
    from aaplproxy.da.local_sqlrunner import LocalSqlRunner
    from aadatapipelinecore.core.utils.module import application_settings
    from pyspark.sql import Row
    
    
    def citus_row_category(date, country_code, market_code, device_code, category_id, metric_name):
        def get_data_in_citus(date, country_code, market_code, device_code, category_id, metric_name):
            citus_dsn_ = (
                "dbname='{db}' user='{user}' password='{password}' "
                "host='{host}' port='{port}'".format(
                    db="aa_store_db",
                    user="citus_bdp_prod_app_int_qa",
                    host="10.2.10.254",
                    password="wZw8cfBuuklIskVG",
                    port=5432
                )
            )

            sql_est_free = "SELECT Count(*) FROM store.store_est_fact_v6 WHERE date='{}' and granularity='daily' and est_free_app_download IS NOT NULL ".format(
                date)
            sql_est_paid = "SELECT Count(*) FROM store.store_est_fact_v6 WHERE date='{}' and granularity='daily' and est_paid_app_download IS NOT NULL ".format(
                date)
            sql_est_download = "SELECT Count(*) FROM store.store_est_fact_v6 WHERE date='{}' and granularity='daily' and est_revenue IS NOT NULL ".format(
                date)
                
            sql_category_free = "SELECT Count(*) FROM store.store_est_category_fact_v6 WHERE date='{}' and granularity='daily' and est_free_app_download IS NOT NULL".format(
                date)
            sql_category_paid = "SELECT Count(*) FROM store.store_est_category_fact_v6 WHERE date='{}' and granularity='daily' and est_paid_app_download IS NOT NULL".format(
                date)
            sql_category_revenue = "SELECT Count(*) FROM store.store_est_category_fact_v6 WHERE date='{}' and granularity='daily' and est_revenue IS NOT NULL".format(
                date)
                
            sql_est_app_count = "SELECT Count (DISTINCT app_id) FROM store.store_est_fact_v6 WHERE date='{}' and granularity='daily' ".format(date) ; 
            sql_category_app_count = "SELECT count (DISTINCT app_id) FROM store.store_est_category_fact_v6 WHERE date='{}' and granularity='daily' ".format(date) ; 


            sql_est_app_count = "SELECT Count (DISTINCT app_id) FROM store.store_est_fact_v6 WHERE date='{}' and granularity='daily' ".format(date) ; 
            sql_category_app_count = "SELECT Count (DISTINCT app_id) FROM store.store_est_category_fact_v6 WHERE date='{}' and granularity='daily' ".format(date) ; 



            # db_category_count_result = ''  # query(citus_dsn_, sql_category)
    
            query_list = [sql_est_free, sql_est_paid, sql_est_download]
            data_est_count_result = [query(citus_dsn_, sql) for sql in query_list]
            
            query_list_category = [sql_category_free, sql_category_paid, sql_category_revenue]
            data_category_count_result = [query(citus_dsn_, sql) for sql in query_list_category]

            data_est_app_id_count_result = query(citus_dsn_, sql_est_app_count)
            data_category_app_id_count_result = query(citus_dsn_, sql_category_app_count)

    
            # print 'running.....'
            return data_category_count_result, data_est_count_result, data_est_app_id_count_result, data_category_app_id_count_result
    
        def query(dsn, sql):
            with psycopg2.connect(dsn) as conn:
                conn.autocommit = True
                with conn.cursor() as cur:
                    cur.execute(sql)
                    result = cur.fetchall()
                    conn.commit()
            return result
    
        result_category, result_est, result_est_app_count, result_category_app_count = get_data_in_citus(date, country_code, market_code, device_code, category_id,
                                                        metric_name)
        return  [ Row(app_id=r[0]) for r in result_category], [Row(app_id=r[0]) for r in result_est], [Row(app_id=result_est_app_count[0])], [Row(app_id=result_category_app_count[0])]

    
    
    db_test_result_category, db_test_result_est, db_est_count, db_category_count = citus_row_category(test_date, 'US', 'apple-store', 'ios-phone', 100000,
                                        'est_free_app_download')
    
    unified_result = [free_app_download_count[0][0], paid_app_download_count[0][0], revenue_count[0][0]]
    db_result = [compare_data[0][0] for compare_data in db_test_result_est]
    
    unified_category_result=[free_app_download_category_count[0][0], paid_app_download_category_count[0][0], revenue_category_count[0][0]]
    db_category_result = [compare_data[0][0] for compare_data in db_test_result_category]
    

    from datetime import datetime
    df_write_result = spark.createDataFrame([('est', test_date.strftime("%Y-%m-%d"), unified_result, db_result), 
                                            ("category", test_date.strftime("%Y-%m-%d"), unified_category_result, db_category_result)], 
                                            schema=["type","test_date","unified","db"])

    from aadatapipelinecore.core.utils.retry import retry
    def write_test_result(df_write_result):
        df_write_result.write.format("delta").save("s3://b2c-prod-data-pipeline-qa/aa.store/result_store_unified_db_count_demo/",
                                          mode="append",
                                          partitionBy=["type"])
    retry(write_test_result,(df_write_result,),{},interval=10)
    
    
    if unified_result == db_result:
        print test_date, 'est pass'
    else:
        print test_date, 'est failed!!!!!'


    if unified_category_result == db_category_result:
        print test_date, 'category pass'
    else:
        print test_date, 'category failed!!!!!'


    if db_est_count[0][0] == db_category_count[0][0]:
        print test_date, 'app_count pass'
    else:
        print test_date, 'app_count failed!!!!!'



sc.parallelize(map(test_count, dates), 1)
