In [34]:
import pandas as pd
import os
import warnings
import requests
import time
import json
from dotenv import load_dotenv
import logging
warnings.filterwarnings('ignore')

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Load Environment variables
load_dotenv()

True

In [3]:
# The 'APIConnection' class manages the connection to the e-commerce API using HTTP requests via Python's 'requests' package

# Configuration for API
path = os.path.dirname(os.getcwd())
with open(os.path.join(path,'config.json')) as config_file:
    config = json.load(config_file)
            
class APIConnection:
    """
    Handles the Real-Time API connection and fetches product information.
    """
    def __init__(self, product, total_pages=None, url=None, headers=None):
        self.headers = headers or {"x-rapidapi-key": os.getenv("API_KEY"), "x-rapidapi-host": os.getenv("API_HOST")} 
        self.url = url or config.get('url')
        self.total_pages = total_pages if (total_pages and total_pages >= 1) else config.get('total_pages',1) 
        self.params = config['Product'].get(product) 

    def request(self, page):
        try:
            self.params['page'] = page
            response = requests.get(url=self.url, headers=self.headers, params=self.params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.JSONDecodeError as err:
            logger.error(f"Parsing JSON error: {err} on page {page}")
            return None
        except requests.exceptions.RequestException as err:
            logger.error(f"An unexpected error occurred: {err} on page {page}")
            return None

    def parse_data(self):
        data = []
        for page in range(1, self.total_pages + 1):
            response = self.request(page)
            if response:
                response = response.get('data', {}).get('products')
                if isinstance(response,list):
                    data.extend(response)
                    time.sleep(3)
                else:
                    logger.error(f"Invalid Response on page {page}")

            else:
                logger.error(f"Failed to retrieve data on page {page}")
        return data


# Initialise API Connection and parse data
all_data = []
for product in config['Product'].keys():
    logger.info(f"Starting data retrieval for product: {product}")
    api_conn = APIConnection(product)
    data = api_conn.parse_data()

    for item in data:
        item['Product'] = product
        all_data.append(item)

# Convert the aggregated product data into a DataFrame
logger.info("Completed data retrieval for all product types")
df = pd.DataFrame(all_data)

2025-02-26 17:03:42,496 - INFO - Starting data retrieval for product: Phone
2025-02-26 17:05:15,619 - INFO - Starting data retrieval for product: Tablet
2025-02-26 17:06:49,311 - INFO - Starting data retrieval for product: Headphones & Earphones


In [21]:
# Export raw data to CSV
data_path = os.path.join(os.path.dirname(os.getcwd()), 'data')
df.to_csv(os.path.join(data_path,'raw_data.csv'))