In [None]:
# Connect to the PlanetScale database

from sqlalchemy import create_engine, MetaData

def load_db_credentials(filepath='db_credentials'):
    creds = {}
    with open(filepath, 'r') as f:
        for line in f:
            key, val = line.strip().split('=', 1)
            creds[key] = val
    return creds

creds = load_db_credentials()

username = creds['username']
password = creds['password']
host = creds['host']
database = creds['database']
port = int(creds['port'])

# SQLAlchemy connection string (MySQL connector)
connection_string = f'mysql+mysqlconnector://{username}:{password}@{host}:{port}/{database}'

# Create the engine
engine = create_engine(connection_string)

In [None]:
# Reflect database schema
meta = MetaData()
meta.reflect(bind=engine)

# Drop all tables
meta.drop_all(bind=engine)
print("All tables dropped!")

***table: category***

In [None]:
import pandas as pd

from sqlalchemy import text

# Load your CSV
df = pd.read_csv('table_category.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('category', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text('ALTER TABLE `category` ADD PRIMARY KEY (`category_id`);'))

**table: genre**

In [None]:
# Load your CSV
df = pd.read_csv('table_genre.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('genre', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text('ALTER TABLE `genre` ADD PRIMARY KEY (`genre_id`);'))

**table: publisher**

In [None]:
# Load your CSV
df = pd.read_csv('table_publisher.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('publisher', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text('ALTER TABLE `publisher` ADD PRIMARY KEY (`publisher_id`);'))

**table: developer**

In [None]:
# Load your CSV
df = pd.read_csv('table_developer.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('developer', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text('ALTER TABLE `developer` ADD PRIMARY KEY (`developer_id`);'))

**table: game**

In [None]:
# Load your CSV
df = pd.read_csv('table_game.csv')

df['release_dates'] = pd.to_datetime(df['release_dates'], errors='coerce', format='%Y-%m-%d %H:%M:%S', utc=True).dt.date

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('game', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text('ALTER TABLE `game` ADD PRIMARY KEY (`steam_id`);'))

**link table: score**

In [None]:
# Load your CSV
df = pd.read_csv('table_score.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

df['lowest_price_date'] = pd.to_datetime(df['lowest_price_date'], errors='coerce', format='%Y-%m-%d %H:%M:%S%z', utc=True).dt.date
df['last_deal_date'] = pd.to_datetime(df['last_deal_date'], errors='coerce', format='%Y-%m-%d %H:%M:%S%z', utc=True).dt.date

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('score', con=engine, if_exists='replace', index=False)
    
# Add foreign key constraint
with engine.connect() as con:
    con.execute(text(f"""
        ALTER TABLE score
        ADD CONSTRAINT fk_score_steam_id
        FOREIGN KEY (steam_id) REFERENCES game(steam_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))

**link table: price_log**

In [None]:
# Load your CSV
df = pd.read_csv('table_price_log.csv')

df['date'] = pd.to_datetime(df['date'], errors='coerce', format='%Y-%m-%d %H:%M:%S%z', utc=True).dt.date

chunk_size = 50000
# Split DataFrame and insert into DB
for idx, start in enumerate(range(0, len(df), chunk_size)):
    chunk = df.iloc[start:start+chunk_size]
    if idx == 0:
        chunk.to_sql('price_log', con=engine, if_exists='replace', index=False)
        print(f"Inserted initial chunk {idx} with replace.")
    else:
        chunk.to_sql('price_log', con=engine, if_exists='append', index=False)
        print(f"Inserted chunk {idx} with append.")

with engine.connect() as con:
    con.execute(text("""
        ALTER TABLE price_log
        ADD CONSTRAINT fk_price_log_steam_id
        FOREIGN KEY (steam_id) REFERENCES game(steam_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))

In [None]:
years = range(2015, 2026)

for year in years:
    csv_file = f"table_price_log_{year}.csv"
    table_name = f"price_log_{year}"
    
    print(f"Processing {csv_file} into table {table_name}...")
    
    # Load CSV
    df = pd.read_csv(csv_file)

    df['date'] = pd.to_datetime(df['date'], errors='coerce', format='%Y-%m-%d %H:%M:%S%z', utc=True).dt.date
    
    # Insert into table (replace to create new table)
    df.to_sql(table_name, con=engine, if_exists='replace', index=False)
    
    # Add foreign key constraint
    with engine.connect() as con:
        con.execute(text(f"""
            ALTER TABLE {table_name}
            ADD CONSTRAINT fk_{table_name}_steam_id
            FOREIGN KEY (steam_id) REFERENCES game(steam_id)
            ON DELETE CASCADE
            ON UPDATE CASCADE;
        """))
    
    print(f"Finished processing {year}")

**link table: game_category**

In [None]:
# Load your CSV
df = pd.read_csv('link_game_category.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('game_category', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text("""
        ALTER TABLE game_category
        ADD CONSTRAINT fk_game_category_steam_id
        FOREIGN KEY (steam_id) REFERENCES game(steam_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))
    con.execute(text("""
        ALTER TABLE game_category
        ADD CONSTRAINT fk_game_category_category_id
        FOREIGN KEY (category_id) REFERENCES category(category_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))

**link table: game_genre**

In [None]:
# Load your CSV
df = pd.read_csv('link_game_genre.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('game_genre', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text("""
        ALTER TABLE game_genre
        ADD CONSTRAINT fk_game_genre_steam_id
        FOREIGN KEY (steam_id) REFERENCES game(steam_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))
    con.execute(text("""
        ALTER TABLE game_genre
        ADD CONSTRAINT fk_game_genre_genre_id
        FOREIGN KEY (genre_id) REFERENCES genre(genre_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))

**link table: game_publisher**

In [None]:
# Load your CSV
df = pd.read_csv('link_game_publisher.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('game_publisher', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text("""
        ALTER TABLE game_publisher
        ADD CONSTRAINT fk_game_publisher_steam_id
        FOREIGN KEY (steam_id) REFERENCES game(steam_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))
    con.execute(text("""
        ALTER TABLE game_publisher
        ADD CONSTRAINT fk_game_publisher_publisher_id
        FOREIGN KEY (publisher_id) REFERENCES publisher(publisher_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))

**link table: game_developer**

In [None]:
# Load your CSV
df = pd.read_csv('link_game_developer.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('game_developer', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text("""
        ALTER TABLE game_developer
        ADD CONSTRAINT fk_game_developer_steam_id
        FOREIGN KEY (steam_id) REFERENCES game(steam_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))
    con.execute(text("""
        ALTER TABLE game_developer
        ADD CONSTRAINT fk_game_developer_developer_id
        FOREIGN KEY (developer_id) REFERENCES developer(developer_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))

**table: game**

In [None]:
# Load your CSV
df = pd.read_csv('table_platform.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('platform', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text('ALTER TABLE `platform` ADD PRIMARY KEY (`platform_id`);'))

**link table: game_developer**

In [None]:
# Load your CSV
df = pd.read_csv('link_game_platform.csv')

# Optional: preprocess or adjust column names if needed
# df.columns = [col.lower() for col in df.columns]  # standardize column names

# Push to SQL table (replace 'your_table' with the desired table name)
df.to_sql('game_platform', con=engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute(text("""
        ALTER TABLE game_platform
        ADD CONSTRAINT fk_game_platform_steam_id
        FOREIGN KEY (steam_id) REFERENCES game(steam_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))
    con.execute(text("""
        ALTER TABLE game_platform
        ADD CONSTRAINT fk_game_platform_platform_id
        FOREIGN KEY (platform_id) REFERENCES platform(platform_id)
        ON DELETE CASCADE
        ON UPDATE CASCADE;
    """))