In [1]:
# Libraries
import json
import pandas as pd

In [2]:
# Json import function to handle errors or bad rows
def load_jsonl_safe(path, max_errors=20):
    records = []
    errors = 0

    with open(path, "r", encoding="utf-8", errors="replace") as f:
        for i, line in enumerate(f):
            line = line.strip()
            if not line:
                continue
            try:
                records.append(json.loads(line))
            except json.JSONDecodeError:
                errors += 1
                if errors <= max_errors:
                    print(f"Skipping bad JSON at line {i}")
                if errors > max_errors:
                    print("Too many errors, suppressing further messages")
                    break

    print(f"Loaded {len(records)} records with {errors} bad lines skipped")
    return pd.DataFrame(records)


In [3]:
# Loads the 5% sample data
reviews_df = load_jsonl_safe("../../sampled_data/review_California_5pct.json")
meta_df = load_jsonl_safe("../../sampled_data/meta_California_5pct.json")

reviews_df.shape, meta_df.shape

Skipping bad JSON at line 380129
Loaded 380129 records with 1 bad lines skipped
Skipping bad JSON at line 7318
Loaded 7318 records with 1 bad lines skipped


((380129, 8), (7318, 15))

In [4]:
reviews_df.head()

Unnamed: 0,user_id,name,time,rating,text,pics,resp,gmap_id
0,101226371370637614545,Faranak Rafizadeh,1618261672851,5.0,Nice people\nhelpful,,,0x80c2c98c0e3c16fd:0x29ec8a728764fdf9
1,112640357449611959087,David Han,1583643882296,4.0,They make Korean traditional food very properly.,,,0x80c2c778e3b73d33:0xbdc58662a4a97d49
2,100185732664454906803,Ana Salazar,1547799582640,5.0,This food is delicious üòÅ,,,0x80c2c778e3b73d33:0xbdc58662a4a97d49
3,111125108075765074122,kimchi lee,1492967841515,5.0,,,,0x80c2c778e3b73d33:0xbdc58662a4a97d49
4,108991152262655788985,Song Ro,1611028304034,4.0,,,,0x80c2cf163db6bc89:0x219484e2edbcfa41


In [5]:
meta_df.head()

Unnamed: 0,name,address,gmap_id,description,latitude,longitude,category,avg_rating,num_of_reviews,price,hours,MISC,state,relative_results,url
0,City Textile,"City Textile, 3001 E Pico Blvd, Los Angeles, C...",0x80c2c98c0e3c16fd:0x29ec8a728764fdf9,,34.018891,-118.21529,[Textile exporter],4.5,6,,,,Open now,"[0x80c2c624136ea88b:0xb0315367ed448771, 0x80c2...",https://www.google.com/maps/place//data=!4m2!3...
1,San Soo Dang,"San Soo Dang, 761 S Vermont Ave, Los Angeles, ...",0x80c2c778e3b73d33:0xbdc58662a4a97d49,,34.058092,-118.29213,[Korean restaurant],4.4,18,,"[[Thursday, 6:30AM‚Äì6PM], [Friday, 6:30AM‚Äì6PM],...","{'Service options': ['Takeout', 'Dine-in', 'De...",Open ‚ãÖ Closes 6PM,"[0x80c2c78249aba68f:0x35bf16ce61be751d, 0x80c2...",https://www.google.com/maps/place//data=!4m2!3...
2,Matrix International Textiles,"Matrix International Textiles, 1363 S Bonnie B...",0x80c2cf163db6bc89:0x219484e2edbcfa41,,34.015505,-118.181839,[Fabric store],3.5,6,,"[[Thursday, 8:30AM‚Äì5:30PM], [Friday, 8:30AM‚Äì5:...",{'Accessibility': ['Wheelchair accessible entr...,Open ‚ãÖ Closes 5:30PM,"[0x80c2cf042a5d9561:0xd0024ad6f81f1335, 0x80c2...",https://www.google.com/maps/place//data=!4m2!3...
3,Black Tie Ski Rental Delivery of Mammoth,"Black Tie Ski Rental Delivery of Mammoth, 501 ...",0x80960c29f2e3bf29:0x4b291f0d275a5699,,37.638754,-118.966055,"[Ski rental service, Snowboard rental service]",5.0,34,,"[[Thursday, 8AM‚Äì5PM], [Friday, 8AM‚Äì5PM], [Satu...",{'Accessibility': ['Wheelchair accessible entr...,Open ‚ãÖ Closes 5PM,"[0x80960dcd6ba76731:0x9a6875ced2f9228e, 0x8096...",https://www.google.com/maps/place//data=!4m2!3...
4,Angel-A Massage,"Angel-A Massage, 5329 Dewey Dr, Fair Oaks, CA ...",0x809adef58ae560bd:0x83412889e022ff70,,38.66263,-121.308639,"[Massage spa, Chiropractor, Massage, Massage t...",4.3,6,,"[[Thursday, 9:30AM‚Äì9:30PM], [Friday, 9:30AM‚Äì9:...",,Open ‚ãÖ Closes 9:30PM,"[0x809adee9ac6f1bd7:0xe2e56d16919b9b8d, 0x809a...",https://www.google.com/maps/place//data=!4m2!3...
