In [0]:

from bdce.common.utils import update_application_code
update_application_code(spark, role="BDP-PROD-APP-INT-QA", application_name="zidong-application-autopipeline")

# reload dependencies from temp
spark.sparkContext.addPyFile("/tmp/zeppelin_application_code/libs/python/dependencies.zip")
# spark.sparkContext.addPyFile("/home/hadoop/bdp/application/libs/python/dependencies.zip")
import aaplprox.connection

In [0]:

import random
import datetime
import unittest
from applications.db_check_v1.common.db_check_utils import _get_date_from_refresh_routing_config, etl_skip
from applications.db_check_v1.common.constants import query
from applications.db_check_v1.common.db_check_utils import query_df
from applications.db_check_v1.common.base_test import PipelineTest
from applications.db_check_v1.common.utils import string_to_datetime, datetime_to_string
from pyspark.sql.types import DoubleType
from pyspark.sql import functions
from pyspark.sql.functions import lit, coalesce
from pyspark.sql.types import LongType
CITUS_USAGE_HOSTS = [('10.2.10.254', 5432)]
CITUS_USAGE_NAME = 'aa_store_db'
CITUS_USAGE_ACCESS_ID = 'citus_bdp_prod_app_int_qa'
CITUS_USAGE_SECRET_KEY = 'wZw8cfBuuklIskVG'

DEVICE_CODE_MAPPING = {
    1: {'1': 'android-phone', '2': 'android-tablet'},
    2: {'1': 'ios-phone', '2': 'ios-tablet'}}

GRANULARITY_IN_RAW_PATH_MAPPING = {
    "daily": "DAY",
    "weekly": "WEEK",
    "monthly": "MONTH"
}

CITUS_DSN = (
    "dbname='{db}' user='{user}' password='{password}' "
    "host='{host}' port='{port}'".format(
        db=CITUS_USAGE_NAME,
        user=CITUS_USAGE_ACCESS_ID,
        host=CITUS_USAGE_HOSTS[0][0],
        password=CITUS_USAGE_SECRET_KEY,
        port=CITUS_USAGE_HOSTS[0][1]
    )
)


class UsageRoutineRawData(object):
    """
    Get data from Data Foundation
    """
    _raw_s3_path = 's3://aardvark-prod-pdx-mdm-to-int/basic_kpi/version=1.0.0/range_type={granularity}/date={date}'

    def __init__(self, spark):
        self.spark = spark

    def get(self, granularity, date):
        """
        :return: Raw data from DF team
        :rtype: pyspark.sql.DataFrame
        """
        raw_data = self.spark.read.parquet(self._raw_s3_path.format(
            granularity=GRANULARITY_IN_RAW_PATH_MAPPING[granularity], date=date))
        return raw_data


class TestUsageRoutineRawCompleteness(PipelineTest):
    table_name = 'usage.usage_basic_kpi_fact_v6'
    db_name = 'usage'

    def setUp(self):
        # super(PipelineTest, self).setUp()
        self.check_date = None
        self.granularity = None

    def check_routine_raw_completeness(self):
        date_list = [self.check_date]

        if self.granularity == 'daily':
            date = string_to_datetime(self.check_date)
            weekly_day_nums = 7
            date_list = [datetime_to_string(date - datetime.timedelta(days=x)) for x in range(weekly_day_nums)]

        for date in date_list:
            routine_df = UsageRoutineRawData(self.spark).get(self.granularity, date)
            routine_count = routine_df.count()

            citus_db_count = self.get_citus_db_count(date)
            print routine_count, citus_db_count

            self.assertEqual(routine_count, citus_db_count[0][0],
                             msg="fount count mismatch when compare usage routine raw and citus db. "
                                 "granularity is {}, date is {}, raw count is:{}, citus db count is:{}".format(
                                    self.granularity, date, routine_count, citus_db_count[0][0]))

    def get_citus_db_count(self, date):
        sql = """select count(1) as cnt from {table_name} where date='{date}' and granularity='{granularity}';
        """.format(table_name=self.table_name, date=date, granularity=self.granularity)
        result = query(CITUS_DSN, sql)
        return result


class TestUsageRoutineRawCompletenessDaily(TestUsageRoutineRawCompleteness):

    trigger_date_config = ("0 12 * * 5", 6)
    check_date = _get_date_from_refresh_routing_config(trigger_date_config)

    def setUp(self):
        self.granularity = "daily"

    def test_routine_raw_completeness_daily(self):
        self.check_routine_raw_completeness()


class TestUsageRoutineRawCompletenessWeekly(TestUsageRoutineRawCompleteness):

    trigger_date_config = ("0 12 * * 5", 6)
    check_date = _get_date_from_refresh_routing_config(trigger_date_config)

    def setUp(self):
        self.granularity = "weekly"

    @etl_skip()
    def test_routine_raw_completeness_weekly(self):
        self.check_routine_raw_completeness()


class TestUsageRoutineRawCompletenessMonthly(TestUsageRoutineRawCompleteness):

    trigger_date_config = ("0 12 6 * * ", 6)
    check_date = _get_date_from_refresh_routing_config(trigger_date_config)

    def setUp(self):
        self.granularity = "monthly"

    @etl_skip()
    def test_routine_raw_completeness_monthly(self):
        self.check_routine_raw_completeness()


suite = unittest.TestSuite()
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUsageRoutineRawCompletenessDaily))
# suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUsageRoutineRawCompletenessWeekly))
# suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUsageRoutineRawCompletenessMonthly))
# suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUsageRoutineRawAccuracyDaily))
# suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUsageRoutineRawAccuracyWeekly))
# suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUsageRoutineRawAccuracyMonthly))
runner = unittest.TextTestRunner()
runner.run(suite)