# Commissioner Sheet Export to GCS

Run this in Google Colab to export Commissioner Sheet data to GCS.
Colab runs in Google's infrastructure and can often access sheets that timeout elsewhere.

In [None]:
# Authenticate with Google (will prompt for auth)
from google.colab import auth

auth.authenticate_user()

In [None]:
import datetime

import gspread
import pandas as pd
from google.auth import default
from google.cloud import storage

# Get authenticated credentials
creds, _ = default()
gc = gspread.authorize(creds)

In [None]:
# Open the Commissioner Sheet
SHEET_ID = "1jYAGKzPmaQnmvomLzARw9mL6-JbguwkFQWlOfN7VGNY"
sheet = gc.open_by_key(SHEET_ID)
print(f"Opened: {sheet.title}")
print(f"Worksheets: {[ws.title for ws in sheet.worksheets()]}")

In [None]:
# Export only the specified owner tabs to DataFrames
data = {}
all_worksheets = {ws.title: ws for ws in sheet.worksheets()}
print(f"Total worksheets in sheet: {len(all_worksheets)}")
print(f"Available worksheets: {list(all_worksheets.keys())}\n")

for tab_name in OWNER_TABS:
    if tab_name not in all_worksheets:
        print(f"⚠️  Tab '{tab_name}' not found - skipping")
        continue

    print(f"Reading {tab_name}...", end=" ")
    ws = all_worksheets[tab_name]

    try:
        # Get all values from this owner's tab
        values = ws.get_all_values()

        if values and len(values) > 1:
            # Convert to DataFrame (first row as headers)
            df = pd.DataFrame(values[1:], columns=values[0])
            # Clean up empty columns
            df = df.loc[:, (df != "").any(axis=0)]
            data[tab_name] = df
            print(f"✓ {len(df)} rows × {len(df.columns)} columns")
        else:
            print("⚠️  No data found")
    except Exception as e:
        print(f"❌ Error: {e}")

print(f"\n✅ Successfully exported {len(data)}/{len(OWNER_TABS)} owner tabs")

In [None]:
# Export each worksheet to a DataFrame
data = {}
for ws in sheet.worksheets():
    print(f"Reading {ws.title}...")
    try:
        # Get all values
        values = ws.get_all_values()
        if values:
            # Convert to DataFrame
            df = pd.DataFrame(values[1:], columns=values[0])
            data[ws.title] = df
            print(f"  ✓ {len(df)} rows × {len(df.columns)} columns")
    except Exception as e:
        print(f"  ✗ Error: {e}")

In [None]:
# Upload to GCS with organized structure

bucket_name = "ff-analytics"
client = storage.Client()
bucket = client.bucket(bucket_name)

# Upload each owner's CSV to GCS
dt = datetime.datetime.now().strftime("%Y-%m-%d")
timestamp = datetime.datetime.now().isoformat()

print("Uploading to GCS...")
for owner_name, df in data.items():
    # Store by owner and date
    blob_name = f"raw/commissioner/rosters/{owner_name}/dt={dt}/data.csv"
    blob = bucket.blob(blob_name)

    # Add metadata
    blob.metadata = {
        "source": "commissioner_sheet",
        "owner": owner_name,
        "export_timestamp": timestamp,
        "rows": str(len(df)),
        "columns": str(len(df.columns)),
    }

    # Upload from DataFrame
    csv_data = df.to_csv(index=False)
    blob.upload_from_string(csv_data, content_type="text/csv")
    print(f"  ✓ {owner_name} → gs://{bucket_name}/{blob_name}")

print(f"\n✅ Upload complete! {len(data)} owner tabs exported to GCS")

In [None]:
# Display summary of exported data
print("=" * 50)
print("Export Summary")
print("=" * 50)

for owner_name, df in data.items():
    print(f"\n{owner_name}:")
    print(f"  Shape: {df.shape[0]} rows × {df.shape[1]} columns")

    # Show first few column names as a preview
    cols = list(df.columns[:5])
    if len(df.columns) > 5:
        cols.append("...")
    print(f"  Columns: {cols}")

    # Check for key columns (adjust based on your sheet structure)
    key_cols = ["Player", "Team", "Position", "Contract"]
    found_keys = [col for col in key_cols if col in df.columns]
    if found_keys:
        print(f"  Key columns found: {found_keys}")

In [None]:
# Upload to GCS
from google.cloud import storage

bucket_name = "ff-analytics"
client = storage.Client()
bucket = client.bucket(bucket_name)

# Upload each CSV to GCS
dt = datetime.datetime.now().strftime("%Y-%m-%d")
for sheet_name, df in data.items():
    blob_name = f"raw/commissioner/{sheet_name}/dt={dt}/data.csv"
    blob = bucket.blob(blob_name)

    # Upload from DataFrame
    blob.upload_from_string(df.to_csv(index=False), content_type="text/csv")
    print(f"Uploaded to gs://{bucket_name}/{blob_name}")