## Loading the json data

In [7]:
import pandas as pd
import json

#Loading the json files
def load_json_file(file_path): 
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            data = [json.loads(line) for line in file]  
        print(f"Success: JSON Loaded from {file_path}")
        return data
    except json.JSONDecodeError as e:
        print(f"Failed: Error loading JSON from {file_path}: {e}")  # Handle JSON formatting errors
        return None
    except FileNotFoundError:
        print(f"File not found: {file_path}")   # Handle missing files
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}") # Catch any other unexpected errors
        return None
    
# Load JSON files into Python lists
users_data = load_json_file("users.json")
brands_data = load_json_file("brands.json")
receipts_data = load_json_file("receipts.json")

Success: JSON Loaded from users.json
Success: JSON Loaded from brands.json
Success: JSON Loaded from receipts.json


## Read and normalize the json data

In [8]:
import pandas as pd

def load_and_normalize_json(file_path, explode_column=None, sep='_', n_preview=5):
    try:
        #read and normalize the data 
        df = pd.read_json(file_path, lines=True)            
        df = pd.json_normalize(df.to_dict(orient='records'), sep=sep) 
        
        # Explode nested column if specified
        if explode_column:
            df = df.explode(explode_column)
            df = pd.json_normalize(df[explode_column].dropna(), sep=sep)

        # Preview the data
        print(f"\nPreview of {file_path}:")
        print(df.head(n_preview))
        
        return df
    
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return pd.DataFrame()  # Return empty DataFrame on error
    
# Load and normalize data from JSON files
users_df = load_and_normalize_json('users.json')
brands_df = load_and_normalize_json('brands.json')
receipts_df = load_and_normalize_json('receipts.json', explode_column='rewardsReceiptItemList')



Preview of users.json:
   active      role signUpSource state                  _id_$oid  \
0    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
1    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
2    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   
3    True  consumer        Email    WI  5ff1e1eacfcf6c399c274ae6   
4    True  consumer        Email    WI  5ff1e194b6a9d73a3a9f1052   

   createdDate_$date  lastLogin_$date  lastLogin  
0      1609687444800     1.609688e+12        NaN  
1      1609687444800     1.609688e+12        NaN  
2      1609687444800     1.609688e+12        NaN  
3      1609687530554     1.609688e+12        NaN  
4      1609687444800     1.609688e+12        NaN  

Preview of brands.json:
        barcode        category      categoryCode                       name  \
0  511111019862          Baking            BAKING  test brand @1612366101024   
1  511111519928       Beverages         BEVERAGES                  Starbucks

## Convert json to csv 

In [None]:
import pandas as pd

def convert_json_to_csv():
    # Load JSON data, normalize, and export to CSV files for SQL querying.
    try:
        # Load and normalize users data
        users_df = pd.read_json('users.json', lines=True)
        users_df = pd.json_normalize(users_df.to_dict(orient='records'), sep='_')
        users_df.to_csv('users.csv', index=False)                                         #creates users.csv file
        print("users.csv created successfully!")

        # Load and normalize brands data
        brands_df = pd.read_json('brands.json', lines=True)
        brands_df = pd.json_normalize(brands_df.to_dict(orient='records'), sep='_')
        brands_df.to_csv('brands.csv', index=False)                                         #creates brands.csv file
        print("brands.csv created successfully!")

        # Load and normalize receipts data
        receipts_df = pd.read_json('receipts.json', lines=True)
        receipts_df = pd.json_normalize(receipts_df.to_dict(orient='records'), sep='_')
        receipts_df.to_csv('receipts.csv', index=False)                                     #creates receipts.csv file
        print("receipts.csv created successfully!")

        # Explode and normalize 'rewardsReceiptItemList' in a separate csv file
        receipt_items_df = receipts_df.explode('rewardsReceiptItemList')
        receipt_items_df = pd.json_normalize(
            receipt_items_df['rewardsReceiptItemList'].dropna(), sep='_'
        )
        receipt_items_df.to_csv('receipt_items.csv', index=False)                             #creates receipt_items.csv file
        print("receipt_items.csv created successfully!")

    except Exception as e:
        print(f"Error: {e}")

# Run the conversion function
convert_json_to_csv()


users.csv created successfully!
brands.csv created successfully!
receipts.csv created successfully!
receipt_items.csv created successfully!
