In [1]:
import json

file = "data\Electronics.jsonl"   # e.g., "All_Beauty.jsonl", downloaded from the `review` link above
with open(file, 'r') as fp:
    for i, line in enumerate(fp):
        if i >= 3:  # Only show first 3 samples
            break
        print(f"Sample {i+1}:")
        print(json.loads(line.strip()))
        print("-" * 50)

Sample 1:
{'rating': 3.0, 'title': 'Smells like gasoline! Going back!', 'text': 'First & most offensive: they reek of gasoline so if you are sensitive/allergic to petroleum products like I am you will want to pass on these.  Second: the phone adapter is useless as-is. Mine was not drilled far enough to be able to tighten it into place for my iPhone 12 max. It just slipped & slid all over. Stupid me putting the adapter together first without picking up the binoculars to smell them bc I wasted 15 minutes trying to figure out how to put the adapter together bc it does not come with instructions!  I had to come back here to the website which was a total pain. Third: the tripod is also useless. I would not trust the iOS to hold my $1600 phone nor even a Mattel Barbie for that matter. It’s just inefficient for the job imo.  Third: in order to try to give an honest review I did don gloves & eyewear to check the binoculars out.  They seemed average except for mine seemed to be missing about 10

In [2]:
import json

file = "data\meta_Electronics.jsonl" # e.g., "meta_All_Beauty.jsonl", downloaded from the `meta` link above
with open(file, 'r') as fp:
    for i, line in enumerate(fp):
        if i >= 3:  # Only show first 3 samples
            break
        print(f"Sample {i+1}:")
        print(json.loads(line.strip()))
        print("-" * 50)

Sample 1:
{'main_category': 'All Electronics', 'title': 'FS-1051 FATSHARK TELEPORTER V3 HEADSET', 'average_rating': 3.5, 'rating_number': 6, 'features': [], 'description': ['Teleporter V3 The “Teleporter V3” kit sets a new level of value in the FPV world with Fat Shark renowned performance and quality. The fun of FPV is experienced firsthand through the large screen FPV headset with integrated NexwaveRF receiver technology while simultaneously recording onboard HD footage with the included “PilotHD” camera. The “Teleporter V3” kit comes complete with everything you need to step into the cockpit of your FPV vehicle. We’ve included our powerful 250mW 5.8Ghz transmitter, 25 degree FOV headset (largest QVGA display available), the brand new “PilotHD” camera with live AV out and all the cables, antennas and connectors needed.'], 'price': None, 'images': [{'thumb': 'https://m.media-amazon.com/images/I/41qrX56lsYL._AC_US40_.jpg', 'large': 'https://m.media-amazon.com/images/I/41qrX56lsYL._AC_.

In [4]:
import json
import pandas as pd

# Load metadata first
metadata = {}
with open("data\meta_Electronics.jsonl", 'r') as fp:
    for line in fp:
        meta = json.loads(line.strip())
        asin = meta.get('parent_asin')
        if asin:
            metadata[asin] = meta

print(f"Loaded {len(metadata)} products from metadata")

# Load reviews and merge with metadata
merged_data = []
with open("data\Electronics.jsonl", 'r') as fp:
    for i, line in enumerate(fp):
        if i >= 5:  # Show first 5 merged samples
            break
        
        review = json.loads(line.strip())
        asin = review.get('parent_asin')
        
        if asin in metadata:
            # Create merged record
            merged_record = {
                'product_id': asin,
                'product_name': metadata[asin].get('title', ''),
                'product_description': ' '.join(metadata[asin].get('description', [])),
                'user_id': review.get('user_id', ''),
                'review_text': review.get('text', ''),
                'review_summary': review.get('title', ''),
                'rating': review.get('rating', 0),
                'avg_rating': metadata[asin].get('average_rating', 0),
                'rating_count': metadata[asin].get('rating_number', 0),
                'category': metadata[asin].get('main_category', ''),
                # 'store': metadata[asin].get('store', ''),
                'price': metadata[asin].get('price')
                # 'verified_purchase': review.get('verified_purchase', False),
                # 'helpful_vote': review.get('helpful_vote', 0)
            }
            merged_data.append(merged_record)

{'main_category': 'All Electronics', 'title': 'FS-1051 FATSHARK TELEPORTER V3 HEADSET', 'average_rating': 3.5, 'rating_number': 6, 'features': [], 'description': ['Teleporter V3 The “Teleporter V3” kit sets a new level of value in the FPV world with Fat Shark renowned performance and quality. The fun of FPV is experienced firsthand through the large screen FPV headset with integrated NexwaveRF receiver technology while simultaneously recording onboard HD footage with the included “PilotHD” camera. The “Teleporter V3” kit comes complete with everything you need to step into the cockpit of your FPV vehicle. We’ve included our powerful 250mW 5.8Ghz transmitter, 25 degree FOV headset (largest QVGA display available), the brand new “PilotHD” camera with live AV out and all the cables, antennas and connectors needed.'], 'price': None, 'images': [{'thumb': 'https://m.media-amazon.com/images/I/41qrX56lsYL._AC_US40_.jpg', 'large': 'https://m.media-amazon.com/images/I/41qrX56lsYL._AC_.jpg', 'variant': 'MAIN', 'hi_res': None}], 'videos': [], 'store': 'Fat Shark', 'categories': ['Electronics', 'Television & Video', 'Video Glasses'], 'details': {'Date First Available': 'August 2, 2014', 'Manufacturer': 'Fatshark'}, 'parent_asin': 'B00MCW7G9M', 'bought_together': None}
{'rating': 3.0, 'title': 'Smells like gasoline! Going back!', 'text': 'First & most offensive: they reek of gasoline so if you are sensitive/allergic to petroleum products like I am you will want to pass on these.  Second: the phone adapter is useless as-is. Mine was not drilled far enough to be able to tighten it into place for my iPhone 12 max. It just slipped & slid all over. Stupid me putting the adapter together first without picking up the binoculars to smell them bc I wasted 15 minutes trying to figure out how to put the adapter together bc it does not come with instructions!  I had to come back here to the website which was a total pain. Third: the tripod is also useless. I would not trust the iOS to hold my $1600 phone nor even a Mattel Barbie for that matter. It’s just inefficient for the job imo.  Third: in order to try to give an honest review I did don gloves & eyewear to check the binoculars out.  They seemed average except for mine seemed to be missing about 10% of the film costing in the lower edge of one of the lenses which would have ruined every video & photograph unplanned to take so for me these are a very huge hard pass.  I expect the accessories that come with the main product to be as good or better than the product I’m buying. Otherwise I would just buy the product as a stand alone.  Sadly, I found a decent pair of binoculars last year with a much better quality phone adapter & tripod, but they had a defect too.  Guess I’m going to have to pay more.  Ugh.', 'images': [{'small_image_url': 'https://m.media-amazon.com/images/I/71YN+Qk3kCL._SL256_.jpg', 'medium_image_url': 'https://m.media-amazon.com/images/I/71YN+Qk3kCL._SL800_.jpg', 'large_image_url': 'https://m.media-amazon.com/images/I/71YN+Qk3kCL._SL1600_.jpg', 'attachment_type': 'IMAGE'}], 'asin': 'B083NRGZMM', 'parent_asin': 'B083NRGZMM', 'user_id': 'AFKZENTNBQ7A7V7UXW5JJI6UGRYQ', 'timestamp': 1658185117948, 'helpful_vote': 0, 'verified_purchase': True}


# Display merged samples
for i, record in enumerate(merged_data):
    print(f"Merged Sample {i+1}:")
    for key, value in record.items():
        if key == 'product_description' and len(str(value)) > 200:
            print(f"  {key}: {str(value)[:200]}...")
        else:
            print(f"  {key}: {value}")
    print("-" * 80)


Loaded 1610012 products from metadata
Merged Sample 1:
  product_id: B083NRGZMM
  product_name: Binoculars, 12x42 Binoculars for Adults, Binoculars for Hunting, Compact Binoculars with Tripod, Smartphone Adapter for Hunting, Bird Watching, Hiking, Traveling and Sports
  product_description: 
  user_id: AFKZENTNBQ7A7V7UXW5JJI6UGRYQ
  review_text: First & most offensive: they reek of gasoline so if you are sensitive/allergic to petroleum products like I am you will want to pass on these.  Second: the phone adapter is useless as-is. Mine was not drilled far enough to be able to tighten it into place for my iPhone 12 max. It just slipped & slid all over. Stupid me putting the adapter together first without picking up the binoculars to smell them bc I wasted 15 minutes trying to figure out how to put the adapter together bc it does not come with instructions!  I had to come back here to the website which was a total pain. Third: the tripod is also useless. I would not trust the iOS to hold 

In [None]:
# Create complete merged dataset
print("Creating complete merged dataset")

complete_merged_data = []
with open("data\Electronics.jsonl", 'r') as fp:
    for line in fp:
        review = json.loads(line.strip())
        asin = review.get('parent_asin')
        
        if asin in metadata:
            # Create merged record
            merged_record = {
                'product_id': asin,
                'product_name': metadata[asin].get('title', ''),
                'product_description': ' '.join(metadata[asin].get('description', [])),
                'user_id': review.get('user_id', ''),
                'review_text': review.get('text', ''),
                'review_summary': review.get('title', ''),
                'rating': review.get('rating', 0),
                'avg_rating': metadata[asin].get('average_rating', 0),
                'rating_count': metadata[asin].get('rating_number', 0),
                'category': metadata[asin].get('main_category', ''),
                'price': metadata[asin].get('price')
            }
            complete_merged_data.append(merged_record)

print(f"Total merged records: {len(complete_merged_data)}")

# Convert to DataFrame for easier analysis
df = pd.DataFrame(complete_merged_data)
print(f"DataFrame shape: {df.shape}")
print(f"Columns: {list(df.columns)}")

# Save to JSON
with open("data/merged_electronics_data.json", 'w', encoding='utf-8') as f:
    json.dump(complete_merged_data, f, indent=2, ensure_ascii=False)
print("Saved merged data to 'data/electronics_data.json'")

# Also save as JSONL (one JSON object per line) for easier processing
with open("data/merged_electronics_data.jsonl", 'w', encoding='utf-8') as f:
    for record in complete_merged_data:
        f.write(json.dumps(record, ensure_ascii=False) + '\n')
print("Also saved as JSONL format to 'data/electronics_data.jsonl'")


In [None]:
# Show basic statistics
print("\nBasic Statistics:")
print(f"Unique products: {df['product_id'].nunique()}")
print(f"Unique users: {df['user_id'].nunique()}")
print(f"Average rating: {df['rating'].mean():.2f}")
print(f"Average product rating: {df['avg_rating'].mean():.2f}")
print(f"Verified purchases: {df['verified_purchase'].sum()}")