In [5]:
# Generate business_category
import json
import pandas as pd

# Read data
with open('yelp/yelp.business.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Expand categories
expanded_data = []
for item in data:
    _id = item.get('_id')
    categories = item.get('categories')
    if categories is not None:
        for category in categories:
            expanded_data.append({'business_id': _id, 'category': category})

expanded_data = pd.DataFrame(expanded_data)

# Save file
csv_file_path = 'yelp/business_category.csv'
expanded_data.to_csv(csv_file_path, index=False, encoding='utf-8')

In [3]:
# Generate checkin
import json
import pandas as pd

# Read data
with open('yelp/yelp.business.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Expand checkin_date
expanded_data = []
for item in data:
    _id = item.get('_id')
    dates = item.get('checkin_date')
    if dates is not None:
        for date in dates:
            expanded_data.append({'business_id': _id, 'date': date['$date']})

expanded_data = pd.DataFrame(expanded_data)
expanded_data.index.name = 'checkin_id'

# Save file
csv_file_path = 'yelp/checkin.csv'
expanded_data.to_csv(csv_file_path, encoding='utf-8')

In [5]:
# Generate business_attribute
import json
import pandas as pd

# Read data
with open('yelp/yelp.business.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Expand attributes
expanded_data = []
for item in data:
    business_id = item.get('_id')
    attributes = item.get('attributes', {})
    if attributes is not None:
        for key, value in attributes.items():
            if isinstance(value, dict):
                for sub_key, sub_value in value.items():
                    expanded_data.append({
                    'business_id': business_id,
                    'attribute_name': f"{key}_{sub_key}",
                    'attribute_value': sub_value
                    })
            else:
                expanded_data.append({
                'business_id': business_id,
                'attribute_name': key,
                'attribute_value': value
                })


expanded_data = pd.DataFrame(expanded_data)
csv_file_path = 'yelp/business_attribute.csv'
expanded_data.to_csv(csv_file_path, index=False, encoding='utf-8')

In [4]:
# Generate business_hour
import json
import pandas as pd

# Read data
with open('yelp/yelp.business.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# expand hours
expanded_data = []
for item in data:
    business_id = item.get('_id')
    hours = item.get('hours', {})
    if hours is not None:
        expanded_data.append({
                'business_id': business_id,
                'hours_Monday': hours.get('Monday', None),
                'hours_Tuesday': hours.get('Tuesday', None),
                'hours_Wednesday': hours.get('Wednesday', None),
                'hours_Thursday': hours.get('Thursday', None),
                'hours_Friday': hours.get('Friday', None),
                'hours_Saturday': hours.get('Saturday', None),
                'hours_Sunday': hours.get('Sunday', None)
        })

# Save data
expanded_data = pd.DataFrame(expanded_data)
csv_file_path = 'yelp/business_hour.csv'
expanded_data.to_csv(csv_file_path, index=False, encoding='utf-8')

In [1]:
# Generate user_elite
import json
import pandas as pd

# Read data
with open('yelp/yelp.user.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Expand elite
expanded_data = []
for item in data:
    _id = item.get('_id')
    years = item.get('elite')
    if years is not None:
        for elite_year in years:
            expanded_data.append({'user_id': _id, 'elite_year': elite_year})

# Save data
expanded_data = pd.DataFrame(expanded_data)
csv_file_path = 'yelp/user_elite.csv'
expanded_data.to_csv(csv_file_path, index=False, encoding='utf-8')

In [None]:
# Generate user_friend
import pandas as pd

# Expand friends
with open('yelp/yelp.user.json', 'r', encoding='utf-8') as infile:
    new_data = []

    for line in infile:
        try:
            data = json.loads(line)
            if 'friends' in data and data['friends'] is not None:
                for friend_id in data['friends']:
                    new_data.append([data['_id'], friend_id])
        except json.JSONDecodeError:
            pass

infile.close()

# Save data
new_df = pd.DataFrame(new_data, columns=['user_id', 'friend_id'])
new_df.to_csv("user_friend.csv", index=False)