## Accuracy: 98.33% 

Things to note:
1. Column to drop - purchase cnt/Ancillary Category/Resident Base Discount different
2. "Sked Detail Id Nmbr" is null

In [29]:
from snowflake.snowpark.session import Session
from snowflake.snowpark.types import *
from snowflake.snowpark.functions import col, lit
import getpass
import pandas as pd
import json
from get_secret import get_secret
from datetime import datetime
import snowflake.snowpark.functions as F

In [30]:
connection_parameters = json.loads(get_secret())
connection_parameters.update({'warehouse':'COMPUTE_WH', 'database':'ANALYTICS_PROD','schema': 'IOATAWARE',"loglevel":'DEBUG'})
session = Session.builder.configs(connection_parameters).create()

In [31]:
print(f"Current Database and schema: {session.get_fully_qualified_current_schema()}")
print(f"Current Warehouse: {session.get_current_warehouse()}")

Current Database and schema: "ANALYTICS_PROD"."IOATAWARE"
Current Warehouse: "COMPUTE_WH"


# Import and clean snowflake views

In [46]:
view_name = "VW_BASIC_CHARGE_DATA"
charge_date = "08/01/2023"

In [47]:
snowpark_df = session.table(view_name)

In [48]:
snowflake_pd_df = snowpark_df.filter(F.col('Charge Date') == charge_date ).to_pandas()

In [49]:
columns_to_remove = ['SOURCE','Charge Date']
snowflake_pd_df = snowflake_pd_df.drop(columns=columns_to_remove)

snowflake_pd_df['CHARGE_AMT'] = snowflake_pd_df['CHARGE_AMT'].astype(float)
snowflake_pd_df['CHARGE TAXES'] = snowflake_pd_df['CHARGE TAXES'].astype(float)
snowflake_pd_df['CHARGE TOTAL'] = snowflake_pd_df['CHARGE TOTAL'].astype(float)
snowflake_pd_df['CHARGE_DATE'] = pd.to_datetime(snowflake_pd_df['CHARGE_DATE'])

  snowflake_pd_df['CHARGE_DATE'] = pd.to_datetime(snowflake_pd_df['CHARGE_DATE'])


In [66]:
#snowflake_pd_df.dtypes

# Import and clean report data

In [51]:
report_df = pd.read_csv('not_start_Basic_Charge/Basic_Charges_0801_Amelia.csv', skiprows=0)   

In [52]:
def process_fee_column(value):
    value = value.replace('$', '')  # Remove $
    value = value.replace(')', '')   # Remove )
    value = value.replace('(', '-')   # Replace ( with -
    return float(value)

In [53]:
columns_to_remove = ['Source','SALE_USERNAME']
report_df = report_df.drop(columns=columns_to_remove)

columns_to_process = ['CHARGE_AMT', 'CHARGE_TAXES','CHARGE_TOTAL']

# Apply the function to specified columns in the dataframe
for col in columns_to_process:
    report_df[col] = report_df[col].apply(process_fee_column)

report_df['CHARGE_DATE'] = pd.to_datetime(report_df['CHARGE_DATE'])

  report_df['CHARGE_DATE'] = pd.to_datetime(report_df['CHARGE_DATE'])


In [54]:
#report_df.dtypes

# Compare two dataframes

In [55]:
snowflake_pd_df.columns = report_df.columns

In [56]:
def calculate_df2_accuracy(df1, df2):
    merged = df1.merge(df2, how = 'outer', indicator = True)
    df1_only = merged[merged['_merge'] == "left_only"]
    df2_only = merged[merged['_merge'] == "right_only"]


    # Calculate accuracy as a percentage
    accuracy_df1 = ((len(df1) - len(df1_only)) / len(df1)) * 100
    accuracy_df2 = ((len(df2) - len(df2_only)) / len(df2)) * 100

    return accuracy_df1,accuracy_df2,df1_only,df2_only


In [57]:
report_accuracy,snowflake_accuracy,report_only,snowflake_only = calculate_df2_accuracy(report_df,snowflake_pd_df)

In [58]:
print(f"Accuracy: {snowflake_accuracy:.2f}%" " snowflake records in report\nThe number of records in snowflake but not in report is", len(snowflake_only))
print(f"Accuracy: {report_accuracy:.2f}%"" report records in snowflake\nThe number of records in report but not in snowflake is", len(report_only))

Accuracy: 60.03% snowflake records in report
The number of records in snowflake but not in report is 61783
Accuracy: 99.99% report records in snowflake
The number of records in report but not in snowflake is 7


# Output the difference file

In [60]:
try:
    snowflake_only.to_csv("Basic_Charge/snowflake_only_record.csv", index=False) 
except:
    print("No snowflake only records")
try:
    report_only.to_csv("Basic_Charge/report_only_record.csv", index=False) 
except:
    print("No report only records")

In [61]:
snow_res = list(snowflake_pd_df['RES_NMBR'].unique())
report_res = list(report_df['RES_NMBR'].unique())

In [63]:
report_file_path = "Basic_Charge/report_reservation.txt"
with open(report_file_path, "w") as file:
    for item in report_res:
        file.write("%s," % item)

In [65]:
elements_in_snow_not_in_report = list(set(snow_res) - set(report_res))
elements_in_report_not_in_snow = list(set(report_res)-set(snow_res))

snow_file_path = "Basic_Charge/snowonly_reservation.txt"
report_file_path = "Basic_Charge/reportonly_reservation.txt"

# Open the file for writing
with open(snow_file_path, "w") as file:
    for item in elements_in_snow_not_in_report:
        file.write("%s," % item)
with open(report_file_path, "w") as file:
    for item in elements_in_snow_not_in_report:
        file.write("%s," % item)