<h1>Visualize your Data</h1>

<h3>Data Visualization Libraries</h3>
<p>
    <ul>
        <li><a href="https://matplotlib.org/" target ="_blank">MatPlotLib</a> - General-purpose plotting</li>
        <li><a href="https://seaborn.pydata.org/" target ="_blank">Seaborn</a> - Statistical graphics and beautiful themes</li>
        <li><a href = "https://plotly.com/python/" target ="_blank">Plotly</a> - Interactive and web-based plots</li>
        <li><a href="https://geopandas.org/en/stable/index.html" target ="_blank">Geopandas</a> - Geospatial data visualization</li>
    </ul>
</p>

In [None]:
#load pandas

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

<h3>Data</h3>

In [None]:
scotus = pd.read_csv("scotus_approval.csv")

# set datatime
scotus["date"] = pd.to_datetime(scotus["date"])

In [None]:
# filter pollster to YouGov

scotus = scotus[scotus["pollster"] == "YouGov"]
scotus

### Line Chart

In [None]:
#plot a line chart of date and per_yes

scotus.plot(x="date", y="per_yes")

plt.show()

In [None]:
scotus = scotus.sort_values(by='date', ascending=True)

In [None]:
# set date and per_yes 

scotus.plot(x="date", y="per_yes")

# Show the plot
plt.show()

In [None]:
# Change line color and size
scotus.plot(
    x='date',
    y='per_yes',
    color= 'coral',
    linewidth= 3 #default is 1.5
)

plt.show()

In [None]:
# Change line color and size
scotus.plot(
    x='date',
    y='per_yes',
    color= 'coral',
    linewidth= 3
)

#Add information to plot
plt.title("SCOTUS Approval in 2023", loc="left")
plt.suptitle("polls from YouGov", y=0, color="gray")
plt.ylabel("Approval")

plt.show()

In [None]:
sns.set_theme(style="whitegrid")

#sns.set_theme(style="white")


scotus.plot(
    x='date',
    y='per_yes',
    color= 'coral',
    linewidth= 3 #default is 1.5
)

plt.title("SCOTUS Approval in 2023", loc="left")
plt.suptitle("polls from YouGov", y=0, color="gray")
plt.ylabel("Approval")

#sns.despine(left=True, bottom=True, right=True, top=True)

plt.show()

In [None]:
scotus.plot(
    x='date',
    y='per_yes',
    color= 'coral',
    linewidth= 3
)

plt.title("SCOTUS Approval in 2023", loc="left")
plt.suptitle("polls from YouGov", y=0, color="gray")
plt.ylabel("Approval")

sns.despine(left=True, bottom=True, right=True, top=True)

ax = plt.gca()
ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %d"))

plt.show()

#### 🗓️ Date-related directives

| Code | Meaning | Example |
|------|----------|----------|
| `%Y` | 4-digit year | 2025 |
| `%y` | 2-digit year | 25 |
| `%m` | Month number (01–12) | 10 |
| `%B` | Full month name | October |
| `%b` | Abbreviated month name | Oct |
| `%d` | Day of the month (01–31) | 15 |
| `%a` | Abbreviated weekday name | Wed |
| `%A` | Full weekday name | Wednesday |
| `%w` | Weekday number (0=Sunday, 6=Saturday) | 3 |
| `%j` | Day of year (001–366) | 288 |


In [None]:
scotus.plot(
    x='date',
    y='per_yes',
    color= 'coral',
    linewidth= 3
)

plt.title("SCOTUS Approval in 2023", loc="left")
plt.suptitle("polls from YouGov", y=0, color="gray")
plt.ylabel("Approval")

sns.despine(left=True, bottom=True, right=True, top=True)

ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))

plt.show()

In [None]:
scotus.plot(
    x='date',
    y='per_yes',
    color= 'coral',
    linewidth= 3
)

plt.title("SCOTUS Approval in 2023", loc="left")
plt.suptitle("polls from YouGov", y=0, color="gray")
plt.ylabel("Approval")

sns.despine(left=True, bottom=True, right=True, top=True)

ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))

plt.show()

In [None]:
from statsmodels.nonparametric.smoothers_lowess import lowess

scotus["date_num"] = scotus["date"].map(pd.Timestamp.toordinal)

# Apply LOWESS smoothing
smoothed = lowess(scotus["per_yes"], scotus["date_num"], frac=0.3)

plt.plot(scotus["date"], smoothed[:, 1], color="coral", linewidth=3)

plt.title("SCOTUS Approval in 2023", loc="left")
plt.suptitle("polls from YouGov", y=0, color="gray")
plt.ylabel("Approval")

sns.despine(left=True, bottom=True, right=True, top=True)

ax = plt.gca()
ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %d"))

plt.show()

### Histograms
Histograms allow you to see the distribution of a continuous (dbl or num) variable

In [None]:
demo = pd.read_csv('demographics.csv')

In [None]:
sns.histplot(data=demo, x="age", bins=30)

#### Binning
the binning argument allows you to group continuous data into discrete intervals or bins

In [None]:
sns.histplot(data=demo, x="age", bins=10)

In [None]:
sns.histplot(data=demo, x="age", binwidth=15)

#### Additional optimizations

In [None]:
sns.histplot(
    data=demo, 
    x="age", 
    bins=25,
    edgecolor = 'coral',
    color = 'skyblue',
    alpha=0.5
)

In [None]:
sns.histplot(
    data=demo, 
    x="age", 
    binwidth=1,
    color = 'skyblue',
    alpha=0.8
)

plt.title("Count of Population at Each Age", fontsize = 14, loc="left", color = 'navy')
plt.xlabel("Age", fontsize=12)
plt.ylabel("Count", fontsize=12) 

plt.show()

In [None]:
# Create a 1x3 layout
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
#sharey=True

# Histogram 1 — binwidth = 1
sns.histplot(
    data=demo,
    x="age",
    binwidth=1,
    color="skyblue",
    alpha=0.8,
    ax=axes[0]
)
axes[0].set_title("Binwidth = 1", fontsize=14, loc="left", color="navy")
axes[0].set_xlabel("Age", fontsize=12)
axes[0].set_ylabel("Count", fontsize=12)

# Histogram 2 — binwidth = 5
sns.histplot(
    data=demo,
    x="age",
    binwidth=5,
    color="skyblue",
    alpha=0.8,
    ax=axes[1]
)
axes[1].set_title("Binwidth = 5", fontsize=14, loc="left", color="navy")
axes[1].set_xlabel("Age", fontsize=12)
axes[1].set_ylabel("")

# Histogram 3 — binwidth = 10
sns.histplot(
    data=demo,
    x="age",
    binwidth=10,
    color="skyblue",
    alpha=0.8,
    ax=axes[2]
)
axes[2].set_title("Binwidth = 10", fontsize=14, loc="left", color="navy")
axes[2].set_xlabel("Age", fontsize=12)
axes[2].set_ylabel("")

# Adjust layout spacing
plt.tight_layout()
plt.show()


### Multiple plots

We can add multiple plots into a plot by adding their own element

In [None]:
sns.histplot(
    data=demo, 
    x="age", 
    bins=25,
    edgecolor = 'coral',
    color = 'skyblue',
    alpha=0.5
)

plt.axvline(
    x=40,               # position
    color='navy',       # line color
    linewidth=2,        # line thickness
    linestyle='-'      # optional: dashed style
)


plt.show()

### Faceting

The sns.displot() allows you to break your plot out by categorical variables

In [None]:
# Create faceted histogram by 'inccat'
g = sns.displot(
    data=demo,
    x="age",
    col="inccat",
    col_wrap=2,
    bins=25,
    color="skyblue",
    edgecolor="coral",
    alpha=0.5
)

for ax in g.axes.flatten():
    ax.axvline(x=40, color="navy", linewidth=3)

plt.show()

### Bar Graph
The sns.countplot() function allows you create a bar chart uses the number of cases of each group in a categorical variable

In [None]:
sns.countplot(
    data=demo, 
    x="carcat", 
    order=sorted(demo['carcat'].unique())
)

plt.show()

The sns.barplot() allows you to create a bar plot using a categorical and continuous variable

In [None]:
sns.barplot(
    data=demo,
    x="carcat",
    y="income",
    estimator=sum,   # aggregate income by sum (default = mean)
    color="skyblue",
    order=sorted(demo['carcat'].unique()),
    errorbar=None
)

plt.show()

### Reorder Plot

You can order the bar plots using the order you defined seperately

In [None]:
demo.groupby("carcat")["income"].sum().sort_values()

In [None]:
order=demo.groupby("carcat")["income"].sum().sort_values().index

sns.barplot(
    data=demo,
    x="carcat",
    y="income",
    estimator=sum,  
    color="skyblue",
    order=order,
    errorbar=None
)

plt.show()

### Stacked Bar Plot

You can the argument *stacked=True* to map an additional variable onto individual bars

In [None]:
stacked_data = demo.groupby(["carcat", "ed"])["income"].sum().unstack()
stacked_data

In [None]:
order=stacked_data.sum(axis=1).sort_values().index
print(order)

#reorder rows
stacked_data = stacked_data.loc[order]
stacked_data

In [None]:
stacked_data.plot(
    kind="bar",
    stacked=True,
    color=sns.color_palette("pastel", n_colors=len(stacked_data.columns))
)

plt.title("Total Income by Car Category and Education Level", loc="left", color="navy")
plt.xlabel("Car Category", fontsize=12)
plt.ylabel("Total Income", fontsize=12)
plt.legend(title="Education Level")
plt.xticks(rotation=0)

#plt.tight_layout()
plt.show()

In [None]:
# Example aggregation (sum of income + count of rows)
agg_data = (
    demo.groupby(["carcat", "ed"])
    .agg(
        total_income=("income", "sum"),
        count=("income", "size")
    )
    .reset_index()
)

order = agg_data.groupby("carcat")["total_income"].sum().sort_values().index

In [None]:
agg_data

In [None]:
import plotly.express as px

fig = px.bar(
    agg_data,
    x="carcat",
    y="total_income",
    color="ed",
    category_orders={"carcat": order},
    barmode="stack",
    hover_data={
        "total_income": ":,", #show total_income in form of comma seperated
        "count": True,
        "carcat": False,
    },
    color_discrete_sequence=px.colors.qualitative.Pastel,
    title="Total Income by Car Category and Education Level",
    height=900,
    width=1100
)


fig.update_layout(
    xaxis_title="Car Category",
    yaxis_title="Total Income",
    legend_title="Education Level",
    title_font=dict(size=18, color="navy"),
    plot_bgcolor="white",
    hovermode="x unified"
)

fig.show()
