In [None]:
import csv
import requests
import os
import boto3
from datetime import datetime

# Function to fetch categories and save them to CSV
def fetch_and_save_categories():
    url = "https://real-time-amazon-data.p.rapidapi.com/product-category-list"
    headers = {
        "X-RapidAPI-Key": os.getenv('RAPIDAPI_KEY'),
        "X-RapidAPI-Host": "real-time-amazon-data.p.rapidapi.com"
    }
    params = {}

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        categories = data.get("data", [])
        
        # Define CSV file name and headers
        csv_file = "amazon_AllCategories.csv"
        csv_headers = ["ID", "Name","Date"]

        # Write data to CSV file
        with open(csv_file, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=csv_headers)
            writer.writeheader()
            for category in categories:
                writer.writerow({
                    "ID": category.get("id", ""),
                    "Name": category.get("name", ""),
                    "Date": datetime.now().strftime("%Y%m%d")
                })
        print("Data has been written to", csv_file)
    else:
        print("Failed to retrieve data:", response.status_code)

    return csv_file

# Main function to upload CSV file to S3
def upload_csv_to_s3(csv_file):
    # Define S3 client
    s3_client = boto3.client('s3',
                            aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
                            aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
                            region_name='us-east-1')
    
    # Create main bucket if it doesn't exist
    main_bucket_name = 'amazon-live-api-data-group2'
    try:
        s3_client.head_bucket(Bucket=main_bucket_name)
    except s3_client.exceptions.ClientError as e:
    # If bucket does not exist, create it
        if e.response['Error']['Code'] == '404':
            if s3_client.meta.region_name == 'us-east-1':
                s3_client.create_bucket(Bucket=main_bucket_name)
            else:
                s3_client.create_bucket(Bucket=main_bucket_name, CreateBucketConfiguration={'LocationConstraint': s3_client.meta.region_name})
        else:
            raise e

    
    # Create subfolder with current date
    current_date = datetime.now().strftime("%Y%m%d")
    subfolder_name = current_date + "/"
    
    # Upload CSV file to S3 bucket
    object_key = subfolder_name + csv_file
    s3_client.upload_file(csv_file, main_bucket_name, object_key)

    # Remove the local CSV file after uploading to S3
    os.remove(csv_file)

    print(f"Data has been uploaded to S3: s3://{main_bucket_name}/{object_key}")

if __name__ == "__main__":
    # Fetch categories and save them to CSV
    csv_file = fetch_and_save_categories()
    # Upload CSV file to S3
    upload_csv_to_s3(csv_file)

Now for all categories find the products

In [None]:
import csv
import requests
import os
import boto3
from datetime import datetime
import io

# Function to make API call and append data to existing product CSV
def fetch_and_save_data(category_id, categories_csv_data, s3_client, bucket_name):
    url = "https://real-time-amazon-data.p.rapidapi.com/search"
    headers = {
        "X-RapidAPI-Key": os.getenv('RAPIDAPI_KEY'),
        "X-RapidAPI-Host": "real-time-amazon-data.p.rapidapi.com"
    }
    params = {
        "query": "Phone",
        "country": "US",
        "category_id": category_id
    }

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        products = data.get("data", {}).get("products", [])

        # Define CSV file name and headers
        current_date = datetime.now().strftime("%Y%m%d")
        csv_file_name = f"{current_date}_products.csv"
        csv_headers = ["ASIN", "Product Title", "Price", "Original Price", "Currency", "Star Rating", "Number of Ratings", "Product URL", "Number of Offers", "Minimum Offer Price", "Is Best Seller", "Is Amazon Choice", "Is Prime", "Climate Pledge Friendly", "Sales Volume", "Delivery","CategoryId"]

        # Initialize an empty list to store new data
        new_data = []

        # Write data to the new data list
        for product in products:
            new_data.append({
                "ASIN": product.get("asin", ""),
                "Product Title": product.get("product_title", ""),
                "Price": product.get("product_price", ""),
                "Original Price": product.get("product_original_price", ""),
                "Currency": product.get("currency", ""),
                "Star Rating": product.get("product_star_rating", ""),
                "Number of Ratings": product.get("product_num_ratings", ""),
                "Product URL": product.get("product_url", ""),
                "Number of Offers": product.get("product_num_offers", ""),
                "Minimum Offer Price": product.get("product_minimum_offer_price", ""),
                "Is Best Seller": product.get("is_best_seller", ""),
                "Is Amazon Choice": product.get("is_amazon_choice", ""),
                "Is Prime": product.get("is_prime", ""),
                "Climate Pledge Friendly": product.get("climate_pledge_friendly", ""),
                "Sales Volume": product.get("sales_volume", ""),
                "Delivery": product.get("delivery", ""),
                "CategoryId": category_id             
            })

        # Check if the CSV file already exists in S3
        product_category_file_key = f"{current_date}/productdetails/{csv_file_name}"
        try:
            existing_csv_data = s3_client.get_object(Bucket=bucket_name, Key=product_category_file_key)
            existing_csv_content = existing_csv_data['Body'].read().decode('utf-8')
            # Convert existing data to a list of dictionaries
            existing_data_list = [row for row in csv.DictReader(io.StringIO(existing_data))]
        except s3_client.exceptions.NoSuchKey:
            # If the file does not exist on S3, initialize existing_data_list as an empty list
            existing_data_list = []

        # Combine existing data with new data
        combined_data = existing_data_list + new_data

        # Write combined data to a new CSV string
        csv_buffer = io.StringIO()
        writer = csv.DictWriter(csv_buffer, fieldnames=csv_headers)
        writer.writeheader()
        writer.writerows(combined_data)

        # Upload updated CSV content to S3
        s3_client.put_object(Bucket=bucket_name, Key=product_category_file_key, Body=csv_buffer.getvalue())
        print(f"Data has been appended and uploaded to S3: s3://{bucket_name}/{product_category_file_key}")
    else:
        print("Failed to retrieve data for category:", category_id, "Status Code:", response.status_code)



# Read categories from S3
bucket_name = 'amazon-live-api-data-group2'
current_date = datetime.now().strftime("%Y%m%d")
categories_file_key = f"{current_date}/amazon_AllCategories.csv"
s3_client = boto3.client('s3', aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),region_name='us-east-1')
categories_csv_data = s3_client.get_object(Bucket=bucket_name, Key=categories_file_key)
categories = csv.DictReader(categories_csv_data['Body'].read().decode('utf-8').splitlines())

# For each category now call product api and save product api details
# Main function
for row in categories:
    category_id = row["ID"]
    fetch_and_save_data(category_id, categories_csv_data,s3_client,bucket_name)

In [None]:
import csv
import requests
import os
import boto3
from datetime import datetime
import io

def fetch_and_save_deals():
    url = "https://real-time-amazon-data.p.rapidapi.com/deals"
    headers = {
        "X-RapidAPI-Key": os.getenv('RAPIDAPI_KEY'),
        "X-RapidAPI-Host": "real-time-amazon-data.p.rapidapi.com"
    }
    params = {
        "country": "US",
        "deal_state": "ALL",
        "deal_type": "ALL"
    }

    response = requests.get(url, headers=headers, params=params)
    
    # print("Response:", response.text)  # Print the response content
    
    if response.status_code == 200:
        data = response.json()
        deals = data.get("data", {}).get("deals", [])
        # print("Data:", data)
        # print("Deals:", deals)
        # Define CSV file name and headers
        current_date = datetime.now().strftime("%Y%m%d")
        csv_file_name = "deals_new.csv"
        csv_headers = ["Deal ID", "Deal Type", "Deal Title", "Deal Photo", "Deal State", "Deal URL", "Deal Starts At", "Deal Ends At", "Deal Price Min", "Deal Price Max", "Deal Badge", "Type", "Product ASIN", "Is Prime","Date"]

        # Write data to CSV file
        with open(csv_file_name, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=csv_headers)
            writer.writeheader()
            for deal in deals:
                writer.writerow({
                    "Deal ID": deal.get("deal_id", ""),
                    "Deal Type": deal.get("deal_type", ""),
                    "Deal Title": deal.get("deal_title", ""),
                    "Deal Photo": deal.get("deal_photo", ""),
                    "Deal State": deal.get("deal_state", ""),
                    "Deal URL": deal.get("deal_url", ""),
                    "Deal Starts At": deal.get("deal_starts_at", ""),
                    "Deal Ends At": deal.get("deal_ends_at", ""),
                    "Deal Price Min": deal.get("deal_price_min", {}).get("amount", ""),
                    "Deal Price Max": deal.get("deal_price_max", {}).get("amount", ""),
                    "Deal Badge": deal.get("deal_badge", ""),
                    "Type": deal.get("type", ""),
                    "Product ASIN": deal.get("product_asin", ""),
                    "Is Prime": deal.get("is_prime", ""),
                    "Date": current_date
                })

        # Upload CSV file to S3 bucket
        s3_client = boto3.client('s3',
                                 aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
                                 aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
                                 region_name='us-east-1')  
        
        # Create deals folder if it doesn't exist
        folder_key = f"{current_date}/deals"
        # s3_client.put_object(Bucket=bucket_name, Key=folder_key)
        bucket_name = 'amazon-live-api-data-group2'
        # Upload the CSV file to S3
        object_key = f"{folder_key}/{csv_file_name}"
        with open(csv_file_name, "rb") as file:
            s3_client.upload_fileobj(file, bucket_name, object_key)

        print(f"Data has been written to and uploaded to S3: s3://{bucket_name}/{object_key}")
    else:
        print("Failed to retrieve deals. Status Code:", response.status_code)

if __name__ == "__main__":
    fetch_and_save_deals()


Response: {"status":"OK","request_id":"808f5901-5ec7-445d-b693-3c23e130fa79","data":{"deals":[{"deal_id":"eaacaf73","deal_type":"BEST_DEAL","deal_title":"Apple iPad Air (5th Generation): with M1 chip, 10.9-inch Liquid Retina Display, 64GB, Wi-Fi 6, 12MP front/12MP Back Camera, Touch ID, All-Day Battery Life – Blue","deal_photo":"https://m.media-amazon.com/images/I/71VbHaAqbML.jpg","deal_state":"AVAILABLE","deal_url":"https://www.amazon.com/2022-Apple-iPad-10-9-inch-Wi-Fi/dp/B09V3JH3XS","canonical_deal_url":"https://www.amazon.com/deal/eaacaf73","deal_starts_at":"2024-05-10T07:00:00.000Z","deal_ends_at":"2024-07-01T06:59:59.000Z","deal_price":{"amount":"399.0","currency":"USD"},"list_price":{"amount":"599.0","currency":"USD"},"savings_percentage":33,"savings_amount":{"amount":"200.0","currency":"USD"},"deal_badge":"33% off","type":"SINGLE_ITEM","product_asin":"B09V3JH3XS"},{"deal_id":"7ff715e1","deal_type":"BEST_DEAL","deal_title":"Apple AirPods Max Wireless Over-Ear Headphones, Active 