In [1]:
import sqlite3
import pandas as pd

# Connect to SQLite DB
conn = sqlite3.connect("../data/sqlite/nba_mvp.db")

# ---------------------------
# 1. List all tables
# ---------------------------
print("📋 Tables in database:")
tables = pd.read_sql(
    "SELECT name FROM sqlite_master WHERE type='table';", conn
)
print(tables)

# ---------------------------
# 2. View table row/column counts
# ---------------------------
def get_table_info(table_name):
    df = pd.read_sql(f"SELECT * FROM {table_name} LIMIT 5;", conn)
    row_count = pd.read_sql(f"SELECT COUNT(*) as rows FROM {table_name};", conn).iloc[0]["rows"]
    print(f"\n🧾 Table: {table_name}")
    print(f"   ➤ Rows: {row_count}")
    print(f"   ➤ Columns: {len(df.columns)}")
    print(f"   ➤ Column names: {list(df.columns)}")

# Inspect key tables
for t in tables["name"]:
    get_table_info(t)

table_count = pd.read_sql("SELECT COUNT(*) as count FROM sqlite_master WHERE type='table';", conn)
print("🧮 Number of tables in the database:", table_count.iloc[0]['count'])
# Close connection
conn.close()
# ---------------------------
# 3. Load & merge (optional)
# ---------------------------
# player_stats = pd.read_sql("SELECT * FROM player_season_stats", conn)
# features = pd.read_sql("SELECT * FROM engineered_features", conn)

# # Normalize player name
# player_stats["Name"] = player_stats["Name"].str.strip()
# features["Name"] = features["Name"].str.strip()

# # Optional: show any overlapping column names before merge
# common_cols = set(player_stats.columns).intersection(set(features.columns))
# common_cols.discard("Name")
# common_cols.discard("season")
# if common_cols:
#     print("\n⚠️ Warning: Potential duplicate columns in merge:", common_cols)

# # Safe merge
# # merged = pd.merge(player_stats, features, on=["Name", "season"], how="inner")

# print("\n✅ Merged shape:", merged.shape)
# print("📌 Sample merged rows:")
# print(merged[["Name", "season"] + [col for col in merged.columns if col not in ['Name', 'season']][:3]].head())  # limit for brevity

# # Save merged
# merged.to_sql("final_player_data", conn, if_




📋 Tables in database:
                  name
0  player_season_stats
1        player_id_map
2  engineered_features
3    final_player_data

🧾 Table: player_season_stats
   ➤ Rows: 1150
   ➤ Columns: 23
   ➤ Column names: ['RK', 'Name', 'POS', 'GP', 'MIN', 'PTS', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'REB', 'AST', 'STL', 'BLK', 'TO', 'DD2', 'TD3', 'season']

🧾 Table: player_id_map
   ➤ Rows: 287
   ➤ Columns: 2
   ➤ Column names: ['player_id', 'player_name']

🧾 Table: engineered_features
   ➤ Rows: 1150
   ➤ Columns: 28
   ➤ Column names: ['RK', 'Name', 'POS', 'GP', 'MIN', 'PTS', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'REB', 'AST', 'STL', 'BLK', 'TO', 'DD2', 'TD3', 'season', 'team_name', 'player_name', 'player_id', 'team_win_pct', 'team_rank']

🧾 Table: final_player_data
   ➤ Rows: 1150
   ➤ Columns: 49
   ➤ Column names: ['RK_x', 'Name', 'POS_x', 'GP_x', 'MIN_x', 'PTS_x', 'FGM_x', 'FGA_x', 'FG%_x', '3PM_x', '3PA_x', '3P%_x', 'FTM_x', 'FTA_

In [None]:
import sqlite3

# Connect to the database
conn = sqlite3.connect("../data/sqlite/nba_mvp.db")
cursor = conn.cursor()

# Query column names from final_player_data
cursor.execute("PRAGMA table_info(final_player_data);")
columns_info = cursor.fetchall()

# Extract just the column names into a list
column_names = [col[1] for col in columns_info]
 
# Print the result
print(column_names)

# Close connection
conn.close()


['RK_x', 'Name', 'POS_x', 'GP_x', 'MIN_x', 'PTS_x', 'FGM_x', 'FGA_x', 'FG%_x', '3PM_x', '3PA_x', '3P%_x', 'FTM_x', 'FTA_x', 'FT%_x', 'REB_x', 'AST_x', 'STL_x', 'BLK_x', 'TO_x', 'DD2_x', 'TD3_x', 'season', 'RK_y', 'POS_y', 'GP_y', 'MIN_y', 'PTS_y', 'FGM_y', 'FGA_y', 'FG%_y', '3PM_y', '3PA_y', '3P%_y', 'FTM_y', 'FTA_y', 'FT%_y', 'REB_y', 'AST_y', 'STL_y', 'BLK_y', 'TO_y', 'DD2_y', 'TD3_y', 'team_name', 'player_name', 'player_id', 'team_win_pct', 'team_rank']


In [4]:
import sqlite3
import pandas as pd

# Connect to the database
conn = sqlite3.connect("../data/sqlite/nba_mvp.db")

# Read a few rows from the table
df = pd.read_sql("SELECT * FROM final_player_data LIMIT 5;", conn)
df.to_csv("../data/final_player_data.csv", index=False)


# Display the result
print(df)

# Close the connection
conn.close()


  RK_x                 Name POS_x GP_x MIN_x PTS_x FGM_x FGA_x FG%_x 3PM_x  \
0    -     Allen IversonPHI    SG   60  43.7  31.4  11.1  27.8  39.8   1.3   
1    -  Shaquille O'NealLAL     C   67  36.1  27.2  10.6  18.3  57.9   0.0   
2    -       Paul PierceBOS    SF   82  40.3  26.1   8.6  19.5  44.2   2.6   
3    -     Tracy McGradyORL    SG   76  38.3  25.6   9.4  20.9  45.1   1.4   
4    -         Tim DuncanSA     C   82  40.6  25.5   9.3  18.3  50.8   0.0   

   ... STL_y BLK_y TO_y DD2_y TD3_y team_name       player_name player_id  \
0  ...   2.8   0.2  4.0     4     1       PHI     Allen Iverson         7   
1  ...   0.6   2.0  2.6    40     0       LAL  Shaquille O'Neal       253   
2  ...   1.9   1.0  2.9    17     0       BOS       Paul Pierce       234   
3  ...   1.6   1.0  2.5    24     1       ORL     Tracy McGrady       270   
4  ...   0.7   2.5  3.2    67     0        SA        Tim Duncan       265   

  team_win_pct team_rank  
0     0.524390       4.0  
1     0.707317

In [None]:
import pandas as pd

# Load CSV
df = pd.read_csv("../data/final_player_data.csv")

# Convert to HTML and save
html_table = df.to_html(index=False)
with open("../data/cleaned_final_player_data.html", "w", encoding="utf-8") as f:
    f.write(html_table)

print("✅ HTML table saved to final_player_data.html")


✅ HTML table saved to final_player_data.html
