# Import necessary libraries

In [1]:
import pandas as pd

# Load the dataset

In [5]:
scraped = pd.read_csv("../scraper/scraped_products.csv")

In [6]:
scraped.head()

Unnamed: 0,ProductName,ScrapedPrice,Rating
0,A Light in the Attic,51.77,Three
1,Tipping the Velvet,53.74,One
2,Soumission,50.1,One
3,Sharp Objects,47.82,Four
4,Sapiens: A Brief History of Humankind,54.23,Five


## Convert Ratings to Numbers

In [8]:
rating_map = {
    "One": 1,
    "Two": 2,
    "Three": 3,
    "Four": 4,
    "Five": 5
}

scraped["RatingScore"] = scraped["Rating"].map(rating_map)

In [9]:
scraped[['Rating', 'RatingScore']].head()

Unnamed: 0,Rating,RatingScore
0,Three,3
1,One,1
2,One,1
3,Four,4
4,Five,5


## Basic Validation

In [10]:
scraped.isnull().sum()
scraped.describe()

Unnamed: 0,ScrapedPrice,RatingScore
count,20.0,20.0
mean,38.0485,2.85
std,15.135231,1.565248
min,13.99,1.0
25%,22.6375,1.0
50%,41.38,3.0
75%,51.865,4.0
max,57.25,5.0


## Merge with Sales Data

In [11]:
sales = pd.read_csv("../data/cleaned/sales_data_features.csv")

In [12]:
df = sales.merge(
    scraped[['ProductName', 'ScrapedPrice', 'RatingScore']],
    how="left",
    on="ProductName"
)

In [13]:
df[['ProductName', 'UnitPrice', 'ScrapedPrice', 'RatingScore']].head()

Unnamed: 0,ProductName,UnitPrice,ScrapedPrice,RatingScore
0,Jeans,14066.28,,
1,Chair,26609.405,,
2,T-Shirt,33870.19,,
3,Table,47210.32,,
4,Table,13594.61,,


## Create Market Comparison Features

## Price difference vs market

In [14]:
df['PriceVsMarket'] = df['UnitPrice'] - df['ScrapedPrice']

## Above / Below market flag

In [15]:
df['AboveMarketPrice'] = df['PriceVsMarket'] > 0

# Save Final Enriched Dataset

In [16]:
df.to_csv("../data/cleaned/sales_data_final.csv", index=False)