# Initial Data Exploration - IPL Dataset

This notebook provides a quick overview of the IPL dataset structure and basic statistics.

In [None]:
import pandas as pd
import numpy as np

## Load Dataset

In [None]:
df = pd.read_csv('../data/ipl_raw.csv')

## Dataset Overview

In [None]:
print(f"Dataset Shape: {df.shape}")
print(f"Total Records: {df.shape[0]:,}")
print(f"Total Features: {df.shape[1]}")

## Column Information

In [None]:
df.info()

## Descriptive Statistics

In [None]:
df.describe()

## First Few Rows

In [None]:
df.head()

## Missing Values

In [None]:
missing = df.isnull().sum()
print(missing[missing > 0])
if missing.sum() == 0:
    print("No missing values found!")

## Unique Values - Categorical Columns

In [None]:
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    unique_count = df[col].nunique()
    print(f"{col}: {unique_count} unique values")
    if unique_count < 20:
        print(f"  Values: {df[col].unique()}")
    print()

## Season Distribution

In [None]:
df['season'].value_counts().sort_index()

## Match Type Distribution

In [None]:
df['match_type'].value_counts()

## Top 10 Venues by Match Count

In [None]:
df['venue'].value_counts().head(10)