# Exploratory Data Analysis (EDA)
Day 2 â€” Global Partner Dataset

This notebook explores the enriched partner dataset, including:
- Country distribution
- Revenue distribution
- Fit score relationships
- Top partner candidates
- Summary statistics


In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [None]:
df = pd.read_csv('data/processed/partners_enriched.csv')
df.head()


In [None]:
fig1 = px.bar(
    df['country'].value_counts().reset_index(),
    x='country', y='count',
    title='Partners by Country',
    labels={'country': 'Country', 'count': 'Number of Partners'},
    color='country'
)
fig1.show()


In [None]:
fig2 = px.box(
    df, x='country', y='revenue_usd',
    title='Revenue Distribution by Country',
    labels={'revenue_usd': 'Estimated Revenue (USD)'},
    color='country'
)
fig2.update_yaxis(type='log')
fig2.show()


In [None]:
fig3 = px.scatter(
    df, x='kaycore_fit_score', y='revenue_usd',
    color='country', size='employees',
    hover_data=['name', 'clutch_rating'],
    title='Partnership Fit vs Revenue Potential',
    labels={'kaycore_fit_score': 'Kaycore Fit Score', 'revenue_usd': 'Revenue (USD)'}
)
fig3.show()


In [None]:
top20 = df.nlargest(20, 'kaycore_fit_score')
fig4 = px.bar(
    top20, x='name', y='kaycore_fit_score',
    color='country',
    title='Top 20 Partnership Candidates',
    labels={'kaycore_fit_score': 'Fit Score'}
)
fig4.update_xaxis(tickangle=-45)
fig4.show()


In [None]:
print("\nSummary by Priority:")
df.groupby('partnership_priority')['revenue_usd'].agg(['count', 'mean', 'sum'])
