# F1Ops Data Ingestion & Exploration

This notebook demonstrates loading and exploring F1 calendar data for the European season.

**Version**: 0.1 (Feb 2020)
**Data**: 2010-2019 seasons

In [None]:
import sys
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from f1ops.data_loader import get_available_seasons, get_european_races, load_circuits
from f1ops.config import EUROPEAN_COUNTRIES

%matplotlib inline
sns.set_style('whitegrid')

## 1. Load Available Seasons

In [None]:
seasons = get_available_seasons()
print(f"Available seasons: {seasons}")
print(f"Total seasons: {len(seasons)}")

## 2. Load Circuits Database

In [None]:
circuits_df = load_circuits()
print(f"Total European circuits: {len(circuits_df)}")
circuits_df.head(10)

## 3. Analyze a Sample Season (2019)

In [None]:
races_2019 = get_european_races(2019)
print(f"European races in 2019: {len(races_2019)}")

# Convert to DataFrame for analysis
races_data = []
for race in races_2019:
    races_data.append({
        'round': race.round,
        'race_name': race.race_name,
        'circuit': race.circuit.name,
        'city': race.circuit.city,
        'country': race.circuit.country,
        'date': race.race_date,
        'latitude': race.circuit.latitude,
        'longitude': race.circuit.longitude
    })

df_2019 = pd.DataFrame(races_data)
df_2019

## 4. Geographic Distribution

In [None]:
# Plot circuits on a simple scatter plot
plt.figure(figsize=(12, 8))
plt.scatter(circuits_df['longitude'], circuits_df['latitude'], s=100, alpha=0.6)

for idx, row in circuits_df.iterrows():
    plt.annotate(row['city'], (row['longitude'], row['latitude']), 
                fontsize=8, xytext=(5, 5), textcoords='offset points')

plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('European F1 Circuits Distribution')
plt.grid(True, alpha=0.3)
plt.show()

## 5. Races by Country

In [None]:
country_counts = df_2019['country'].value_counts()

plt.figure(figsize=(10, 6))
country_counts.plot(kind='bar')
plt.title('2019 European Races by Country')
plt.xlabel('Country')
plt.ylabel('Number of Races')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## 6. Seasonal Patterns Across Years

In [None]:
# Analyze number of European races per year
races_per_year = {}
for season in seasons:
    races = get_european_races(season)
    races_per_year[season] = len(races)

plt.figure(figsize=(12, 6))
plt.plot(list(races_per_year.keys()), list(races_per_year.values()), 
         marker='o', linewidth=2, markersize=8)
plt.xlabel('Season')
plt.ylabel('Number of European Races')
plt.title('European F1 Races Per Season (2010-2019)')
plt.grid(True, alpha=0.3)
plt.xticks(seasons, rotation=45)
plt.tight_layout()
plt.show()

## Summary

This notebook demonstrated:
- Loading F1 calendar data for 2010-2019
- Exploring European circuits distribution
- Analyzing seasonal patterns
- Basic data quality checks

**Next**: Route estimation and distance calculations