### Loading the json data

In [None]:
import pandas as pd
import json



#Loading the json files
def load_json_file(file_path): 
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            data = [json.loads(line) for line in file]  
        print(f"Success: JSON Loaded from {file_path}")
        return data
    except json.JSONDecodeError as e:
        print(f"Failed: Error loading JSON from {file_path}: {e}")  # Handle JSON formatting errors
        return None
    except FileNotFoundError:
        print(f"File not found: {file_path}")   # Handle missing files
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}") # Catch any other unexpected errors
        return None
    
# Load JSON files into Python lists
users_data = load_json_file("../data_folder/users.json")
brands_data = load_json_file("../data_folder/brands.json")
receipts_data = load_json_file("../data_folder/receipts.json")

Success: JSON Loaded from ../data_folder/users.json
Success: JSON Loaded from ../data_folder/brands.json
Success: JSON Loaded from ../data_folder/receipts.json


### Read and normalize the json data

In [3]:
import pandas as pd

def load_and_normalize_json(file_path, explode_column=None, sep='_', n_preview=5):
    try:
        #read and normalize the data 
        df = pd.read_json(file_path, lines=True)            
        df = pd.json_normalize(df.to_dict(orient='records'), sep=sep) 
        
        # Explode nested column if specified
        if explode_column:
            df = df.explode(explode_column)
            df = pd.json_normalize(df[explode_column].dropna(), sep=sep)

        # Preview the data
        print(f"\nPreview of {file_path}:")
        print(df.head(n_preview))
        
        return df
    
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return pd.DataFrame()  # Return empty DataFrame on error
    
# Load and normalize data from JSON files
users_df = load_and_normalize_json('../data_folder/users.json')
brands_df = load_and_normalize_json('../data_folder/brands.json')
receipts_df = load_and_normalize_json('../data_folder/receipts.json', explode_column='rewardsReceiptItemList')



Preview of ../data_folder/users.json:
   active      role signUpSource state                  _id_$oid  \
0    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
1    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
2    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
3    True  consumer        Email    WI  5ff1e1eacfcf6c399c274ae6   
4    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   

   createdDate_$date  lastLogin_$date  lastLogin  
0      1609687444800     1.609688e+12        NaN  
1      1609687444800     1.609688e+12        NaN  
2      1609687444800     1.609688e+12        NaN  
3      1609687530554     1.609688e+12        NaN  
4      1609687444800     1.609688e+12        NaN  

Preview of ../data_folder/brands.json:
        barcode        category      categoryCode                       name  \
0  511111019862          Baking            BAKING  test brand @1612366101024   
1  511111519928       Beverages         BEVERA