# Exploratory Data Analysis
## Swedish Environmental Analytics & Forecasting Platform

This notebook analyzes the environmental datasets for Swedish cities.

### Goals:
- Understand data structure and schema
- Analyze missing data patterns
- visualize temporal patterns
- Analyze correlations

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add backend to path
sys.path.append(os.path.abspath(os.path.join('../')))

from backend.data.loader import DataLoader
from backend.data.processor import DataProcessor

### Load Data

In [None]:
loader = DataLoader()
# Load sample of raw data for quick analysis
df_raw = loader.load_raw_data(sample=True, n_rows=100000)
df_raw = DataProcessor.clean_raw_data(df_raw)
print(f"Raw Data Shape: {df_raw.shape}")
df_raw.head()

### Data Structure & Missing Values

In [None]:
df_raw.info()

In [None]:
missing = df_raw.isnull().sum()
missing[missing > 0].plot(kind='bar', figsize=(10, 6))
plt.title("Missing Values")
plt.show()

### Temperature Trends

In [None]:
plt.figure(figsize=(15, 6))
cities = df_raw['city'].unique()[:5]
for city in cities:
    subset = df_raw[df_raw['city'] == city]
    plt.plot(subset['timestamp'], subset['temperature'], label=city, alpha=0.7)
plt.legend()
plt.title("Temperature Trends for Top 5 Cities")
plt.show()

### Correlations

In [None]:
numerical_cols = ['temperature', 'feels_like', 'pressure', 'humidity', 'wind_speed']
corr = df_raw[numerical_cols].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix")
plt.show()