# Summarizing an AmazonCustomer Review

## Introduction
This program summarizes customer reviews for a given Amazon product whose URL is given.

In [5]:
#import packages
import requests
import os
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [6]:
# Load environment variables. 
# Here we're loading the OpenAPI Key, instead of hardcoding it in the code.
# This requires keeping the key in .env file. 

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
MODEL = "gpt-4o-mini"    # The OpenAI mode to use

# Check the key
if not api_key:
    print("No API key was found, please check that .env file exists.")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start with sk-proj-; please check you're using the right key")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("API key found, and looks good so far!")

API key found, and looks good so far!


In [7]:
# Create object of OpenAI
openai = OpenAI()

In [23]:
# Define a class to represent a Webpage
# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        # Find all review elements (adjust the class or tag based on the structure of the page)
        reviews =  soup.select("div.review")
        # Extract review details (e.g., rating, title, text)
        print(reviews)
        for review in reviews:
            rating = review.find('i', class_='a-icon-star').text.strip() if review.find('i', class_='a-icon-star') else 'No Rating'
            title = review.find('a', class_='review-title').text.strip() if review.find('a', class_='review-title') else 'No Title'
            text = review.find('span', class_='a-size-base review-text review-text-content').text.strip() if review.find('span', class_='a-size-base review-text review-text-content') else 'No Review Text'
    
            print(f"Rating: {rating}")
            print(f"Title: {title}")
            print(f"Review: {text}\n")

In [25]:
website = Website('https://www.amazon.com/Apple-2022-10-9-inch-iPad-Wi-Fi/dp/B0BJLXMVMV')


[]


In [31]:
def fetch_amazon_reviews(product_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    response = requests.get(product_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find review section (this part may vary based on Amazon's structure)
        reviews = []
        review_section = soup.find_all('span', {'class': 'cr-widget-FocalReviews'})  # Adjust this based on your findings
        print(review_section)
        for review in review_section:
            review_text = review.get_text(strip=True)
            reviews.append(review_text)

        return reviews
    else:
        return None

# Example usage
product_url = 'https://www.amazon.com/Apple-2022-10-9-inch-iPad-Wi-Fi/dp/B0BJLXMVMV'  # Replace with the product URL you want to scrape
reviews = fetch_amazon_reviews(product_url)

if reviews:
    for idx, review in enumerate(reviews[:5]):  # Show first 5 reviews as example
        print(f"Review {idx + 1}: {review}")
else:
    print("Failed to retrieve reviews.")

[]
Failed to retrieve reviews.
