Xixi Lin_Data Visualization_Per-capita_Energy_Use

In [1]:
#step 1 install plotly

%pip install plotly nbformat


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.3[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
#step2 import 
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import plotly.io as pio

from plotly.subplots import make_subplots

import sys
!{sys.executable} -m pip install requests

pio.renderers.default = "plotly_mimetype"

px.defaults.template = "plotly_white"
px.defaults.width = 800
px.defaults.height = 450



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.3[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m


In [3]:
ENERGY_CSV = "https://ourworldindata.org/grapher/per-capita-energy-use.csv"

import requests
from io import StringIO


r = requests.get(ENERGY_CSV, headers={"User-Agent": "Mozilla/5.0"})
r.raise_for_status()  # 如果还有错误，会在这里提示

df = pd.read_csv(StringIO(r.text))
df.head()

Unnamed: 0,Entity,Code,Year,Per capita energy consumption
0,Afghanistan,AFG,1980,481.20862
1,Afghanistan,AFG,1981,610.639
2,Afghanistan,AFG,1982,717.7664
3,Afghanistan,AFG,1983,905.12665
4,Afghanistan,AFG,1984,887.371


In [8]:
# 自动找到“数值列”（除了 Entity/Code/Year 以外的那列）
value_col = [c for c in df.columns if c not in ["Entity", "Code", "Year"]][0]

# 基础清洗
df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
df = df.dropna(subset=[value_col])

# 只保留国家（有 3 位国家代码的通常是国家；OWID 的地区聚合有时是 OWID_ 开头或空）
countries = df[df["Code"].astype(str).str.len() == 3].copy()

<p style="font-family: Arial; font-size: 20px;">
1. Bar Chart: Top 16 Countries by Per-Capita Energy Use (2024)
</p>

<p style="font-family: Arial; font-size: 15px;">
This bar chart presents the top 16 countries with the highest per-capita energy consumption in 2024. Countries are ranked by energy use (kWh per person), enabling direct comparison. The index relative to the world average (World = 100) provides additional context, showing how much each country exceeds the global benchmark. The reference line marking the world average highlights the significant gap between these top countries and typical global energy use levels.
</p>

In [9]:
df_2024 = countries[countries["Year"] == 2024].copy()

# World average（优先用 World 这一行）
world_row = df[(df["Entity"] == "World") & (df["Year"] == 2024)]
world_avg = float(world_row[value_col].iloc[0]) if len(world_row) else float(df_2024[value_col].mean())

df_2024["Index_vs_World_100"] = df_2024[value_col] / world_avg * 100

top16 = df_2024.sort_values(value_col, ascending=False).head(16).copy()

fig1 = px.bar(
    top16,
    x=value_col,
    y="Entity",
    orientation="h",
    text=value_col,  # 在条形末端显示数值（更高级）
    hover_data={
        "Code": True,
        value_col: ":,.0f",
        "Index_vs_World_100": ":.1f"
    },
    labels={
        value_col: "Primary energy consumption per capita (kWh/person)",
        "Entity": "Country"
    },
    title="Top  countries by per-capita energy use (2024) + index vs world average"
)

#  关键：让国家名全部显示、留白够、图高够
fig1.update_layout(
    height=750,                 # 图更高：20个国家更舒服
    margin=dict(l=220, r=40, t=70, b=50),  # 左边加大，长国家名不被裁
    paper_bgcolor="white",
    plot_bgcolor="white"
)

#  强制显示全部 y 轴 label（不省略）
fig1.update_yaxes(
    categoryorder="total ascending",  # 让条形从小到大排列（更直观）
    tickmode="linear"
)

#  美化：条形上的数字格式 & 位置
fig1.update_traces(
    texttemplate="%{x:,.0f}",
    textposition="outside",
    cliponaxis=False,  # 防止 text 被裁掉
    hovertemplate="<b>%{y}</b><br>kWh/person: %{x:,.0f}<br>Index vs World=100: %{customdata[1]:.1f}<extra></extra>"
)

# ⚡ 可选：加“世界平均”参考线
fig1.add_vline(
    x=world_avg,
    line_width=2,
    line_dash="dash",
    annotation_text="World avg",
    annotation_position="top"
)

fig1.show()

<p style="font-family: Arial; font-size: 20px;">
2. Line Chart: Per-Capita Energy Use in Six Countries (1965–2024)
</p>

<p style="font-family: Arial; font-size: 15px;">
This line chart shows per-capita energy consumption (kWh per person) from 1965 to 2024 for Canada, the United States, China, France, the United Kingdom, and India.
Canada and the United States consistently have the highest energy use, with growth until the early 2000s followed by a gradual decline. France and the UK show moderate levels and a similar pattern of stabilization or decrease in recent years. In contrast, China starts from a very low level but increases rapidly after 2000 due to industrialization. India remains the lowest throughout the period, though its energy use steadily rises over time.
Overall, the chart highlights strong differences between developed and emerging economies and shows how global energy patterns have shifted over the past six decades.
</p>

In [10]:
# 1) 读取 OWID 数据
url = "https://ourworldindata.org/grapher/per-capita-energy-use.csv"
df = pd.read_csv(url)

value_col = [c for c in df.columns if c not in ["Entity", "Code", "Year"]][0]
df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
df = df.dropna(subset=[value_col])

# 2) 只保留你要的国家（注意名称要和 OWID 的 Entity 完全一致）
selected = ["Canada", "United States", "China", "France", "United Kingdom", "India"]
df_sel = df[df["Entity"].isin(selected)].copy()

# 3) Small multiples line chart
fig = px.line(
    df_sel,
    x="Year",
    y=value_col,
    facet_col="Entity",
    facet_col_wrap=3,   # 每行3个小图
    title="Per-capita energy use (kWh/person), 1965–2024 — Selected Countries",
    labels={value_col: "Energy use per person (kWh/person)"}
)


fig2 = px.line(
    df_sel.sort_values("Year"),
    x="Year",
    y=value_col,
    color="Entity",
    title="Per-capita energy use (kWh/person), 1965–2024(Canada, US, China, France, UK, India)",
    labels={value_col: "Energy use per person (kWh/person)", "Entity": "Country"}
)

fig2.update_layout(
    paper_bgcolor="white",
    plot_bgcolor="white",
    margin=dict(l=40, r=20, t=70, b=40)
)

fig2.show()

<p style="font-family: Arial; font-size: 20px;">
3. Violin Plot: Global Energy Use Distribution (1965 vs 2024)
</p>

<p style="font-family: Arial; font-size: 15px;">
The violin plots compare per-capita energy use in 1965 and 2024. In the original scale, the 2024 distribution is clearly shifted to the right, indicating that energy consumption per person has increased across most countries over time. The higher median suggests that the “typical” country now uses more energy than in 1965. At the same time, the longer upper tail in 2024 shows that some high consuming countries have expanded their energy use even further, increasing the gap between countries.

In the log-transformed version, extreme values are compressed, which helps reveal the overall structure of the distribution. Even after adjusting for skewness, the 2024 distribution still remains noticeably higher than 1965. This suggests that the growth in energy consumption is not only driven by a few outliers, but reflects a broader global shift. Overall, the comparison highlights both rising energy demand and persistent inequality in energy use across countries.
</p>

In [11]:
# 1) 读取数据
url = "https://ourworldindata.org/grapher/per-capita-energy-use.csv"
df = pd.read_csv(url)

value_col = [c for c in df.columns if c not in ["Entity", "Code", "Year"]][0]
df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
df = df.dropna(subset=[value_col])

# 2) 只保留国家
countries = df[df["Code"].astype(str).str.len() == 3].copy()

# 3) 只选 1965 和 2024
compare = countries[countries["Year"].isin([1965, 2024])].copy()

# 可选：去极端值（避免一两个国家拉爆图）
low, high = compare[value_col].quantile([0.01, 0.99])
compare = compare[compare[value_col].between(low, high)]

# 4) 画 violin + KDE
fig = px.violin(
    compare,
    x="Year",
    y=value_col,
    color="Year",
    box=True,          # 显示 boxplot
    points="all",      # 显示散点
    hover_name="Entity",
    labels={value_col: "Energy use per person (kWh/person)"},
    title="Distribution of per-capita energy use: 1965 vs 2024"
)

fig.update_layout(
    paper_bgcolor="white",
    plot_bgcolor="white",
    showlegend=False
)

fig.show()

In [8]:
compare["log_energy"] = np.log10(compare[value_col])

fig = px.violin(
    compare,
    x="Year",
    y="log_energy",
    color="Year",
    box=True,
    points="all",
    title="Log distribution of per-capita energy use (1965 vs 2024)"
)
fig.show()

In [9]:
fig = px.histogram(
    compare,
    x=value_col,
    color="Year",
    marginal="violin",
    histnorm="density",
    opacity=0.6,
    barmode="overlay",
    title="KDE-style distribution comparison: 1965 vs 2024"
)
fig.show()