In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

# Load the dataset from Google Drive
df = pd.read_csv('/Users/areeb/Documents/Education/UCL/Cursor/Data Visualisation UCL/Group Assignment/powerplants (global) - global_power_plants.csv')


# Standardize column names by replacing underscores with spaces
df.columns = [col.replace("_", " ") for col in df.columns]

# Remove fully duplicate rows
df.drop_duplicates(inplace=True)

# Identify fuel-related columns
fuel_columns = ["primary fuel", "secondary fuel", "other fuel2", "other fuel3"]

# Drop rows where all fuel columns are missing
df.dropna(subset=fuel_columns, how="all", inplace=True)

# Drop rows with missing capacity in MW
df.dropna(subset=["capacity in MW"], inplace=True)

# Fill missing generation_gwh_2021 values using estimated_generation_gwh_2021 if available
df["generation gwh 2021"].fillna(df["estimated generation gwh 2021"], inplace=True)

# Process 'start date' column
df["start date"] = df["start date"].astype(str).str[:4]  # Extract year
df["start date"] = pd.to_numeric(df["start date"], errors="coerce")  # Convert to numeric
df.dropna(subset=["start date"], inplace=True)  # Remove invalid years
df["start date"] = df["start date"].astype(int)  # Convert to integer

print("\n✅ Data cleaning completed. Ready for further analysis in the notebook.")

# Print first few rows to verify cleaning
print("\nFirst 5 rows of the cleaned dataset:")
print(df.head())

# Print column data types to confirm correctness
print("\nColumn Data Types:")
print(df.dtypes)

# Print remaining missing values
print("\nRemaining Missing Values:")
print(df.isna().sum())



✅ Data cleaning completed. Ready for further analysis in the notebook.

First 5 rows of the cleaned dataset:
   country code country long name of powerplant  capacity in MW  latitude  \
9           ALB      Albania         Bistrica 1            27.0   39.9116   
10          ALB      Albania             Fierza           500.0   42.2514   
11          ALB      Albania              Koman           600.0   42.1033   
12          ALB      Albania         Lanabregas             5.0   41.3428   
13          ALB      Albania            Shkopet            24.0   41.6796   

    longitude primary fuel secondary fuel other fuel2 other fuel3  start date  \
9     20.1047        Hydro            NaN         NaN         NaN        1965   
10    20.0431        Hydro            NaN         NaN         NaN        1978   
11    19.8224        Hydro            NaN         NaN         NaN        1985   
12    19.8964        Hydro            NaN         NaN         NaN        1951   
13    19.8305        H

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["generation gwh 2021"].fillna(df["estimated generation gwh 2021"], inplace=True)


In [3]:
import plotly.express as px

# Interactive Global Power Plant Map
fig = px.scatter_mapbox(
    df,
    lat="latitude",
    lon="longitude",
    hover_name="name of powerplant",
    hover_data=["country long", "capacity in MW", "primary fuel"],  # Fixed column names
    color="primary fuel",  # Fixed column name
    size="capacity in MW",  # Fixed column name
    zoom=1.5,
    height=800,
    title="🌍 Global Distribution of Power Plants by Fuel Type"
)

# Improve map layout
fig.update_layout(
    mapbox_style="carto-positron",
    margin={"r": 0, "t": 40, "l": 0, "b": 0}
)

# Show map
fig.show()


  fig = px.scatter_mapbox(


In [4]:
# Aggregate total power capacity per country
capacity_by_country = df.groupby("country long")["capacity in MW"].sum().reset_index()

# Choropleth Map
fig = px.choropleth(
    capacity_by_country,
    locations="country long",
    locationmode="country names",
    color="capacity in MW",
    title="🌍 Global Distribution of Power Capacity",
    labels={"capacity in MW": "Total Capacity (MW)"},
    color_continuous_scale="Viridis",  # Adjust color scale for visibility
    height=600
)

# Show map
fig.show()

The Global Stage

In [5]:
# Aggregate capacity by primary fuel type
capacity_by_fuel = df.groupby("primary fuel")["capacity in MW"].sum().reset_index()  # Fixed column names

# Select top 10 fuel types
capacity_by_fuel = capacity_by_fuel.sort_values("capacity in MW", ascending=False).head(10)

# Stacked Bar Chart
fig = px.bar(
    capacity_by_fuel,
    x="primary fuel",  # Fixed column name
    y="capacity in MW",  # Fixed column name
    text="capacity in MW",
    title="⚡ Global Power Capacity by Fuel Type",
    labels={"primary fuel": "Fuel Type", "capacity in MW": "Total Capacity (MW)"},
    height=600
)

# Improve visuals
fig.update_traces(texttemplate="%{text:.2s} MW", textposition="outside")
fig.update_layout(xaxis_title="Fuel Type", yaxis_title="Total Capacity (MW)", showlegend=False)

# Show chart
fig.show()

In [6]:
import plotly.express as px

# Aggregate fuel mix
global_fuel_mix = df.groupby("primary fuel")["capacity in MW"].sum().reset_index()

# Calculate percentage contribution
total_capacity = global_fuel_mix["capacity in MW"].sum()
global_fuel_mix["percentage"] = (global_fuel_mix["capacity in MW"] / total_capacity) * 100

# Group small categories under "Other" (threshold <3%)
global_fuel_mix["fuel_category"] = global_fuel_mix["primary fuel"].where(global_fuel_mix["percentage"] >= 3, "Other")

# Recalculate capacity for grouped categories
global_fuel_mix_grouped = global_fuel_mix.groupby("fuel_category")["capacity in MW"].sum().reset_index()

# Custom color mapping (Unique colors per fuel type)
color_map = {
    "Solar": "#2ca02c",  # Green
    "Wind": "#1f77b4",  # Blue
    "Hydro": "#17becf",  # Light Blue
    "Biomass": "#8c564b",  # Brown
    "Geothermal": "#ff7f0e",  # Orange
    "Wave and Tidal": "#aec7e8",  # Light Blue
    "Coal": "#d62728",  # Red
    "Oil": "#ff9896",  # Light Red
    "Gas": "#e377c2",  # Pink
    "Nuclear": "#9467bd",  # Purple
    "Petcoke": "#7f7f7f",  # Grey
    "Waste": "#bcbd22",  # Yellow-Green
    "Cogeneration": "#c49c94",  # Beige
    "Storage": "#f7b6d2",  # Light Pink
    "Other": "#cccccc"  # Light Grey for grouped small categories
}

# Pie Chart
fig = px.pie(
    global_fuel_mix_grouped,
    names="fuel_category",
    values="capacity in MW",
    title="🌍 Global Energy Mix by Fuel Type (Small Categories Grouped)",
    height=600,
    color="fuel_category",
    color_discrete_map=color_map
)

# Show chart
fig.show()


In [7]:
import plotly.express as px

# Define renewable and non-renewable fuel types
renewable_fuels = {"Solar", "Hydro", "Wind", "Biomass", "Geothermal", "Wave and Tidal"}
non_renewable_fuels = {"Gas", "Coal", "Oil", "Nuclear", "Petcoke", "Waste", "Cogeneration", "Storage"}

# Categorize fuels into Renewable or Non-Renewable
df["energy_category"] = df["primary fuel"].apply(lambda x: "Renewable" if x in renewable_fuels else "Non-Renewable")

# Aggregate total capacity by category
renewable_vs_non = df.groupby("energy_category")["capacity in MW"].sum().reset_index()

# Pie Chart
fig = px.pie(
    renewable_vs_non,
    names="energy_category",
    values="capacity in MW",
    title="🌍 Global Renewable vs Non-Renewable Energy Share",
    height=600,
    color="energy_category",
    color_discrete_map={"Renewable": "#2ca02c", "Non-Renewable": "#d62728"}  # Green for Renewable, Red for Non-Renewable
)

# Show chart
fig.show()


In [8]:
# Categorize power plants into Renewable / Non-Renewable
renewable_fuels = {"Solar", "Hydro", "Wind", "Biomass", "Geothermal", "Wave and Tidal"}
df["energy_category"] = df["primary fuel"].apply(lambda x: "Renewable" if x in renewable_fuels else "Non-Renewable")

# Aggregate total capacity
capacity_by_category = df.groupby("energy_category")["capacity in MW"].sum().reset_index()

# Bar chart
fig = px.bar(
    capacity_by_category,
    x="energy_category",
    y="capacity in MW",
    text="capacity in MW",
    title="🔋 Global Renewable vs Non-Renewable Energy Capacity",
    labels={"capacity in MW": "Total Capacity (MW)"},
    height=500
)

# Improve visuals
fig.update_traces(texttemplate="%{text:.2s} MW", textposition="outside")
fig.update_layout(xaxis_title="Energy Category", yaxis_title="Total Capacity (MW)", showlegend=False)

# Show chart
fig.show()

In [9]:
import plotly.express as px

# Aggregate power generation by country
generation_by_country = df.groupby("country long")["generation gwh 2021"].sum().reset_index()

# Select top 10 energy-producing countries
generation_by_country = generation_by_country.sort_values("generation gwh 2021", ascending=False).head(10)

# Bar chart
fig = px.bar(
    generation_by_country,
    x="country long",
    y="generation gwh 2021",
    text="generation gwh 2021",
    title="🌍 Top 10 Energy-Producing Countries",
    labels={"country long": "Country", "generation gwh 2021": "Total Generation (GWh)"},
    height=600
)

# Improve visuals
fig.update_traces(texttemplate="%{text:.2s} GWh", textposition="outside")
fig.update_layout(xaxis_title="Country", yaxis_title="Total Generation (GWh)", showlegend=False)

# Show chart
fig.show()


In [10]:
import plotly.express as px

# Define renewable fuels
renewable_fuels = {"Solar", "Hydro", "Wind", "Biomass", "Geothermal", "Wave and Tidal"}

# Add a column classifying energy as Renewable/Non-Renewable
df["energy_category"] = df["primary fuel"].apply(lambda x: "Renewable" if x in renewable_fuels else "Non-Renewable")

# Aggregate total renewable energy capacity by country
renewable_by_country = df[df["energy_category"] == "Renewable"].groupby("country long")["capacity in MW"].sum().reset_index()

# Select top 10 countries
renewable_by_country = renewable_by_country.sort_values("capacity in MW", ascending=False).head(10)

# Bar chart
fig = px.bar(
    renewable_by_country,
    x="country long",
    y="capacity in MW",
    text="capacity in MW",
    title="🌱 Top 10 Countries by Renewable Energy Capacity",
    labels={"country long": "Country", "capacity in MW": "Renewable Capacity (MW)"},
    height=600
)

# Improve visuals
fig.update_traces(texttemplate="%{text:.2s} MW", textposition="outside")
fig.update_layout(xaxis_title="Country", yaxis_title="Renewable Capacity (MW)", showlegend=False)

# Show chart
fig.show()


In [11]:
import plotly.express as px

# Aggregate total installed capacity and total generation per country
efficiency_data = df.groupby("country long").agg({"capacity in MW": "sum", "generation gwh 2021": "sum"}).reset_index()

# Calculate efficiency (GWh per MW)
efficiency_data["Efficiency (GWh per MW)"] = efficiency_data["generation gwh 2021"] / efficiency_data["capacity in MW"]

# Select top 10 most efficient countries
efficiency_data = efficiency_data.sort_values("Efficiency (GWh per MW)", ascending=False).head(10)

# Bar chart
fig = px.bar(
    efficiency_data,
    x="country long",
    y="Efficiency (GWh per MW)",
    text="Efficiency (GWh per MW)",
    title="🔌 Top 10 Most Energy Efficient Countries (GWh per MW)",
    labels={"country long": "Country", "Efficiency (GWh per MW)": "GWh per MW"},
    height=600
)

# Improve visuals
fig.update_traces(texttemplate="%{text:.2f} GWh/MW", textposition="outside")
fig.update_layout(xaxis_title="Country", yaxis_title="GWh per MW", showlegend=False)

# Show chart
fig.show()


In [12]:
import plotly.express as px

# Aggregate global electricity generation by year
generation_by_year = df.groupby("start date")["generation gwh 2021"].sum().reset_index()

# Line Chart
fig = px.line(
    generation_by_year,
    x="start date",
    y="generation gwh 2021",
    title="📈 Global Electricity Generation Growth Over Time",
    labels={"start date": "Year", "generation gwh 2021": "Total Generation (GWh)"},
    height=600
)

# Improve layout
fig.update_layout(xaxis_title="Year", yaxis_title="Total Generation (GWh)")

# Show chart
fig.show()


In [13]:
# Define renewable and non-renewable fuel types
renewable_fuels = {"Solar", "Hydro", "Wind", "Biomass", "Geothermal", "Wave and Tidal"}
non_renewable_fuels = {"Gas", "Coal", "Oil", "Nuclear", "Petcoke", "Waste", "Cogeneration", "Storage"}

# Filter data for US & China
df_us = df[df["country long"] == "United States of America"]
df_china = df[df["country long"] == "China"]

# Aggregate energy mix for both countries
us_fuel_mix = df_us.groupby("primary fuel")["capacity in MW"].sum().reset_index()
china_fuel_mix = df_china.groupby("primary fuel")["capacity in MW"].sum().reset_index()

# Add country labels
us_fuel_mix["Country"] = "United States"
china_fuel_mix["Country"] = "China"

# Combine both datasets
fuel_mix = pd.concat([us_fuel_mix, china_fuel_mix])

# Assign colors: Renewable = Green/Blue shades, Non-Renewable = Red/Orange shades
color_map = {
    "Solar": "#2ca02c",  # Green
    "Wind": "#1f77b4",  # Blue
    "Hydro": "#17becf",  # Light Blue
    "Biomass": "#8c564b",  # Brown
    "Geothermal": "#ff7f0e",  # Orange
    "Wave and Tidal": "#aec7e8",  # Light Blue
    "Coal": "#d62728",  # Red
    "Oil": "#ff9896",  # Light Red
    "Gas": "#e377c2",  # Pink
    "Nuclear": "#9467bd",  # Purple
    "Petcoke": "#7f7f7f",  # Grey
    "Waste": "#bcbd22",  # Yellow-Green
    "Cogeneration": "#c49c94",  # Beige
    "Storage": "#f7b6d2",  # Light Pink
}

# Stacked Bar Chart
fig = px.bar(
    fuel_mix,
    x="Country",
    y="capacity in MW",
    color="primary fuel",
    title="🇺🇸 US vs 🇨🇳 China Energy Mix (Stacked Bar Chart with Renewable & Non-Renewable Colors)",
    labels={"capacity in MW": "Total Capacity (MW)", "primary fuel": "Fuel Type"},
    height=600,
    color_discrete_map=color_map
)

# Improve layout
fig.update_layout(barmode="stack", xaxis_title="Country", yaxis_title="Total Capacity (MW)")

# Show chart
fig.show()

US Energy Landscape


In [14]:
import plotly.express as px

# Filter US data
df_us = df[df["country long"] == "United States of America"]

# Interactive US Power Plants Map
fig = px.scatter_mapbox(
    df_us,
    lat="latitude",
    lon="longitude",
    hover_name="name of powerplant",
    hover_data=["capacity in MW", "primary fuel"],
    color="primary fuel",
    size="capacity in MW",
    zoom=3,
    height=700,
    title="🗺️ US Power Plants Distribution (Scatter Map)"
)

# Improve map layout
fig.update_layout(
    mapbox_style="carto-positron",
    margin={"r": 0, "t": 40, "l": 0, "b": 0},
    mapbox_zoom=3,
    mapbox_center={"lat": 37.0902, "lon": -95.7129}  # Center on the US
)

# Show map
fig.show()



*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [15]:
import plotly.express as px

# Define renewable and non-renewable fuel types
renewable_fuels = {"Solar", "Hydro", "Wind", "Biomass", "Geothermal", "Wave and Tidal"}
non_renewable_fuels = {"Gas", "Coal", "Oil", "Nuclear", "Petcoke", "Waste", "Cogeneration", "Storage"}

# Filter US data
df_us = df[df["country long"] == "United States of America"]

# Categorize fuel types
df_us["energy_category"] = df_us["primary fuel"].apply(lambda x: "Renewable" if x in renewable_fuels else "Non-Renewable")

# Aggregate energy mix for the US
us_fuel_mix = df_us.groupby(["primary fuel", "energy_category"])["capacity in MW"].sum().reset_index()

# Custom color mapping
color_map = {
    "Solar": "#2ca02c",  # Green
    "Wind": "#1f77b4",  # Blue
    "Hydro": "#17becf",  # Light Blue
    "Biomass": "#8c564b",  # Brown
    "Geothermal": "#ff7f0e",  # Orange
    "Wave and Tidal": "#aec7e8",  # Light Blue
    "Coal": "#d62728",  # Red
    "Oil": "#ff9896",  # Light Red
    "Gas": "#e377c2",  # Pink
    "Nuclear": "#9467bd",  # Purple
    "Petcoke": "#7f7f7f",  # Grey
    "Waste": "#bcbd22",  # Yellow-Green
    "Cogeneration": "#c49c94",  # Beige
    "Storage": "#f7b6d2",  # Light Pink
}

# Stacked Bar Chart
fig = px.bar(
    us_fuel_mix,
    x="energy_category",
    y="capacity in MW",
    color="primary fuel",
    title="🇺🇸 US Energy Mix by Fuel Type (Stacked Bar Chart)",
    labels={"capacity in MW": "Total Capacity (MW)", "energy_category": "Energy Type"},
    height=600,
    color_discrete_map=color_map
)

# Improve layout
fig.update_layout(barmode="stack", xaxis_title="Energy Type", yaxis_title="Total Capacity (MW)")

# Show chart
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [16]:
import plotly.express as px

# Aggregate total capacity by category
us_renewable_vs_non = df_us.groupby("energy_category")["capacity in MW"].sum().reset_index()

# Pie Chart
fig = px.pie(
    us_renewable_vs_non,
    names="energy_category",
    values="capacity in MW",
    title="🌍 US Renewable vs Non-Renewable Energy Share",
    height=600,
    color="energy_category",
    color_discrete_map={"Renewable": "#2ca02c", "Non-Renewable": "#d62728"}  # Green for Renewable, Red for Non-Renewable
)

# Show chart
fig.show()


In [17]:
import plotly.express as px

# Filter US data
df_us = df[df["country long"] == "United States of America"]

# Energy Capacity Heatmap
fig = px.density_mapbox(
    df_us,
    lat="latitude",
    lon="longitude",
    z="capacity in MW",
    radius=10,
    title="🔥 US Energy Capacity Density (Heatmap)",
    mapbox_style="carto-positron",
    zoom=3,
    height=700
)

# Show map
fig.show()



*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [18]:
import plotly.express as px

# Filter US data
df_us = df[df["country long"] == "United States of America"]

# Aggregate total electricity generation by year
us_generation_by_year = df_us.groupby("start date")["generation gwh 2021"].sum().reset_index()

# Line Chart
fig = px.line(
    us_generation_by_year,
    x="start date",
    y="generation gwh 2021",
    title="📈 US Energy Production Over Time",
    labels={"start date": "Year", "generation gwh 2021": "Total Generation (GWh)"},
    height=600
)

# Improve layout
fig.update_layout(xaxis_title="Year", yaxis_title="Total Generation (GWh)")

# Show chart
fig.show()


In [19]:
import plotly.express as px

# Filter renewable energy sources
renewable_fuels = {"Solar", "Hydro", "Wind", "Biomass", "Geothermal", "Wave and Tidal"}
df_us["energy_category"] = df_us["primary fuel"].apply(lambda x: "Renewable" if x in renewable_fuels else "Non-Renewable")

# Aggregate total renewable energy capacity by year
us_renewable_growth = df_us[df_us["energy_category"] == "Renewable"].groupby("start date")["capacity in MW"].sum().reset_index()

# Line Chart
fig = px.line(
    us_renewable_growth,
    x="start date",
    y="capacity in MW",
    title="🌱 US Renewable Energy Growth Over Time",
    labels={"start date": "Year", "capacity in MW": "Total Renewable Capacity (MW)"},
    height=600
)

# Show chart
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [20]:
import plotly.express as px

# Aggregate total non-renewable energy capacity by year
us_nonrenewable_growth = df_us[df_us["energy_category"] == "Non-Renewable"].groupby("start date")["capacity in MW"].sum().reset_index()

# Line Chart
fig = px.line(
    us_nonrenewable_growth,
    x="start date",
    y="capacity in MW",
    title="⛽ US Non-Renewable Energy Dependence Over Time",
    labels={"start date": "Year", "capacity in MW": "Total Non-Renewable Capacity (MW)"},
    height=600
)

# Show chart
fig.show()


In [21]:
import plotly.express as px

# Population data (approximate for 2023)
populations = {
    "United States of America": 331_000_000,
    "China": 1_412_000_000
}

# Calculate MW per capita
us_mw_per_capita = df[df["country long"] == "United States of America"]["capacity in MW"].sum() / populations["United States of America"]
china_mw_per_capita = df[df["country long"] == "China"]["capacity in MW"].sum() / populations["China"]

# Create DataFrame
efficiency_df = pd.DataFrame({
    "Country": ["United States", "China"],
    "MW per Capita": [us_mw_per_capita, china_mw_per_capita]
})

# Bar Chart
fig = px.bar(
    efficiency_df,
    x="Country",
    y="MW per Capita",
    text="MW per Capita",
    title="⚡ Energy Efficiency: US vs China (MW per Capita)",
    labels={"MW per Capita": "Capacity (MW) per Capita"},
    height=500
)

# Improve visuals
fig.update_traces(texttemplate="%{text:.4f} MW", textposition="outside")
fig.update_layout(showlegend=False, xaxis_title="Country", yaxis_title="MW per Capita")

# Show chart
fig.show()


In [22]:
import plotly.express as px

# Define renewable and non-renewable fuel types
renewable_fuels = {"Solar", "Hydro", "Wind", "Biomass", "Geothermal", "Wave and Tidal"}
non_renewable_fuels = {"Gas", "Coal", "Oil", "Nuclear", "Petcoke", "Waste", "Cogeneration", "Storage"}

# Filter data for US & China
df_us = df[df["country long"] == "United States of America"]
df_china = df[df["country long"] == "China"]

# Categorize fuel types
df_us["energy_category"] = df_us["primary fuel"].apply(lambda x: "Renewable" if x in renewable_fuels else "Non-Renewable")
df_china["energy_category"] = df_china["primary fuel"].apply(lambda x: "Renewable" if x in renewable_fuels else "Non-Renewable")

# Aggregate total capacity by energy category
us_energy_split = df_us.groupby("energy_category")["capacity in MW"].sum().reset_index()
china_energy_split = df_china.groupby("energy_category")["capacity in MW"].sum().reset_index()

# Add country labels
us_energy_split["Country"] = "United States"
china_energy_split["Country"] = "China"

# Combine both datasets
energy_split = pd.concat([us_energy_split, china_energy_split])

# Grouped Bar Chart
fig = px.bar(
    energy_split,
    x="Country",
    y="capacity in MW",
    color="energy_category",
    title="🔋 US vs China: Renewable vs Non-Renewable Energy Split",
    labels={"capacity in MW": "Total Capacity (MW)", "energy_category": "Energy Type"},
    height=600,
    barmode="group"
)

# Improve layout
fig.update_layout(xaxis_title="Country", yaxis_title="Total Capacity (MW)")

# Show chart
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [23]:
import plotly.express as px

# Aggregate total installed capacity for US & China
us_capacity = df[df["country long"] == "United States of America"]["capacity in MW"].sum()
china_capacity = df[df["country long"] == "China"]["capacity in MW"].sum()

# Create DataFrame
capacity_df = pd.DataFrame({"Country": ["United States", "China"], "Total Capacity (MW)": [us_capacity, china_capacity]})

# Bar Chart
fig = px.bar(
    capacity_df,
    x="Country",
    y="Total Capacity (MW)",
    text="Total Capacity (MW)",
    title="⚡ US vs China: Total Installed Capacity (MW)",
    height=500
)

# Improve visuals
fig.update_traces(texttemplate="%{text:.2s} MW", textposition="outside")
fig.update_layout(showlegend=False)

# Show chart
fig.show()


In [24]:
import plotly.express as px

# Filter US & China data
df_us_china = df[df["country long"].isin(["United States of America", "China"])]

# Interactive Power Plant Map
fig = px.scatter_mapbox(
    df_us_china,
    lat="latitude",
    lon="longitude",
    hover_name="name of powerplant",
    hover_data=["capacity in MW", "primary fuel"],
    color="primary fuel",
    size="capacity in MW",
    zoom=2,
    height=700,
    title="🗺️ US vs China Power Plants Distribution (Scatter Map)"
)

# Improve map layout
fig.update_layout(
    mapbox_style="carto-positron",
    margin={"r": 0, "t": 40, "l": 0, "b": 0}
)

# Show map
fig.show()



*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/

