In [1]:
from snowflake.snowpark.session import Session
from snowflake.snowpark.types import *
from snowflake.snowpark.functions import col, lit
import getpass
import pandas as pd
import json
from get_secret import get_secret
from datetime import datetime
import snowflake.snowpark.functions as F

In [2]:
connection_parameters = json.loads(get_secret())
connection_parameters.update({'warehouse':'COMPUTE_WH', 'database':'ANALYTICS_PROD','schema': 'IOATAWARE',"loglevel":'DEBUG'})
session = Session.builder.configs(connection_parameters).create()

In [3]:
print(f"Current Database and schema: {session.get_fully_qualified_current_schema()}")
print(f"Current Warehouse: {session.get_current_warehouse()}")

Current Database and schema: "ANALYTICS_PROD"."IOATAWARE"
Current Warehouse: "COMPUTE_WH"


# Import and clean snowflake views

In [7]:
view_name = "VW_MONTHLY_REVENUE"

In [8]:
snowpark_df = session.table(view_name)

In [9]:
snowflake_pd_df = snowpark_df.to_pandas()

In [10]:
snowflake_pd_df['Flight Date'] = pd.to_datetime(snowflake_pd_df['Flight Date'])
snowflake_pd_df['Daily Base Revenue'] = snowflake_pd_df['Daily Base Revenue'].astype(float)
snowflake_pd_df['Daily Ancillary Revenue'] = snowflake_pd_df['Daily Ancillary Revenue'].astype(float)
snowflake_pd_df['Total'] = snowflake_pd_df['Total'].astype(float)

In [12]:
start_date = '2022-07-31'
end_date = '2023-07-31'
mask  = (snowflake_pd_df['Flight Date'] > start_date) & (snowflake_pd_df['Flight Date'] <= end_date)
snowflake_pd_df1 = snowflake_pd_df.loc[mask]

In [17]:
snowflake_pd_df1[snowflake_pd_df1["Flight Date"] == "2023-05-01	"]

Unnamed: 0,Flight Date,Daily Base Revenue,Daily Ancillary Revenue,Total
1089,2023-05-01,732975.53,555411.12,1288386.65


In [18]:
report_df[report_df["FlightDate"] == "2023-05-01"]

Unnamed: 0,FlightDate,DailyBaseRevenue,DailyAncillaryRevenue,Total
274,2023-05-01,727066.95,722803.74,1449870.69


In [24]:
snowflake_pd_df1[snowflake_pd_df1["Flight Date"] == "2023-07-19	"]

Unnamed: 0,Flight Date,Daily Base Revenue,Daily Ancillary Revenue,Total
8,2023-07-19,1131266.45,632493.4,1763759.85


In [25]:
report_df[report_df["FlightDate"] == "2023-07-19"]

Unnamed: 0,FlightDate,DailyBaseRevenue,DailyAncillaryRevenue,Total
353,2023-07-19,1105147.37,809052.79,1914200.16


In [None]:
#Remove the column we are not testing for now

# Import and clean report data

In [21]:
report_df = pd.read_csv('MonthlyRevenue/Monthly_Revenue_Summary.csv', skiprows=0)   

In [22]:
report_df['DailyBaseRevenue'] = report_df['DailyBaseRevenue'].str.replace('$', '')
report_df['DailyBaseRevenue'] = report_df['DailyBaseRevenue'].str.replace(',', '')
report_df['DailyAncillaryRevenue'] = report_df['DailyAncillaryRevenue'].str.replace('$', '')
report_df['DailyAncillaryRevenue'] = report_df['DailyAncillaryRevenue'].str.replace(',', '')
report_df['Total'] = report_df['Total'].str.replace('$', '')
report_df['Total'] = report_df['Total'].str.replace(',', '')

In [23]:
report_df['FlightDate'] = pd.to_datetime(report_df['FlightDate'])
report_df['DailyBaseRevenue'] = report_df['DailyBaseRevenue'].astype(float)
report_df['DailyAncillaryRevenue'] = report_df['DailyAncillaryRevenue'].astype(float)
report_df['Total'] = report_df['Total'].astype(float)

# Compare two dataframes

In [None]:
set_snowflake = set(map(tuple, snowflake_pd_df.to_numpy()))
set_report = set(map(tuple, report_df.to_numpy()))
common_rows = set_snowflake.intersection(set_report)

In [None]:
in_both_df = snowflake_pd_df[snowflake_pd_df.apply(tuple, axis=1).isin(common_rows)]
only_in_snowflake_df = snowflake_pd_df[~snowflake_pd_df.apply(tuple, axis=1).isin(common_rows)]
only_in_report_df = report_df[~report_df.apply(tuple, axis=1).isin(common_rows)]


In [None]:
in_both_df

# Accuracy and output the difference file

In [None]:
print("accuracy is: ",(len(in_both_df)/len(report_df))* 100)

In [None]:
try:
    only_in_snowflake_df.to_csv("snowflake_only_record.csv", index=False) 
except:
    print("No snowflake only records")
try:
    only_in_report_df.to_csv("report_only_record.csv", index=False) 
except:
    print("No report only records")