In [None]:
import pandas as pd

# Load CSV (skip metadata rows)
df_raw = pd.read_csv(
    "renewable_electricity.csv",
    skiprows=4
)

print(df_raw.shape)
df_raw.head()


In [None]:
# World Bank year columns are numeric strings
year_cols = [col for col in df_raw.columns if col.isdigit()]

print(year_cols[:5], "...", year_cols[-5:])


In [None]:
df_transformed = df_raw.melt(
    id_vars=["Country Name", "Country Code"],
    value_vars=year_cols,
    var_name="year",
    value_name="renewable_electricity_percent"
)

df_transformed.head()


In [None]:
df_transformed = df_transformed.dropna().copy()
df_transformed["year"] = df_transformed["year"].astype(int)

df_transformed.info()


In [None]:
df_transformed.to_csv(
    "renewable_electricity_processed.csv",
    index=False
)

print("New transformed CSV created successfully")


In [None]:
import pandas as pd

df_processed = pd.read_csv("renewable_electricity_processed.csv")

df_processed.head()


In [None]:
import sqlite3

conn = sqlite3.connect("electricity.db")
print("Connected to SQLite")


In [None]:
df_processed.to_sql(
    "renewable_electricity",
    conn,
    if_exists="replace",
    index=False
)

print("Processed CSV loaded into SQLite successfully")


In [None]:
query = "SELECT * FROM renewable_electricity LIMIT 5"
pd.read_sql(query, conn)


In [None]:
conn.close()
print("SQLite connection closed")


In [None]:
import requests

url = "https://api.worldbank.org/v2/country/all/indicator/EG.USE.ELEC.KH.PC?format=json&per_page=20000"

response = requests.get(url)
json_data = response.json()

type(json_data)


In [None]:
import pandas as pd

records = []

for item in json_data[1]:
    records.append({
        "country_name": item["country"]["value"],
        "country_code": item["country"]["id"],
        "year": item["date"],
        "electricity_use_kwh_per_capita": item["value"]
    })

df_json = pd.DataFrame(records)

df_json.head()


In [None]:
df_json = df_json.dropna().copy()
df_json["year"] = df_json["year"].astype(int)

df_json.info()


In [None]:
import pymongo
from pymongo import MongoClient

# Connect to local MongoDB
client = MongoClient("mongodb+srv://taqApdvAdmin:T%40uq33r7861@electricitydatabase.rodgmrs.mongodb.net/?appName=electricityDatabase")

# Create / use database
db = client["electricity_db"]

print("Connected to MongoDB")



In [None]:
collection = db["electricity_use_per_capita"]

print("Collection selected")


In [None]:
records = df_json.to_dict(orient="records")

len(records), records[0]


In [None]:
collection.delete_many({})
print("Old records removed (if any)")


In [None]:
result = collection.insert_many(records)

print(f"Inserted {len(result.inserted_ids)} documents into MongoDB")


In [None]:
collection.find_one()


In [None]:
import pandas as pd

url = "https://api.worldbank.org/v2/country/all/indicator/EG.ELC.LOSS.ZS?per_page=20000"

# Read XML directly into DataFrame
df_losses_xml = pd.read_xml(url)

# Keep only required columns and rename properly
df_losses_xml = df_losses_xml[[
    "country", "countryiso3code", "date", "value"
]].rename(columns={
    "country": "country_name",
    "countryiso3code": "country_code",
    "date": "year",
    "value": "electricity_losses_pct"
})

# Drop missing values and convert types
df_losses_xml = df_losses_xml.dropna()
df_losses_xml["year"] = df_losses_xml["year"].astype(int)
df_losses_xml["electricity_losses_pct"] = df_losses_xml["electricity_losses_pct"].astype(float)

print("Rows:", len(df_losses_xml))
print(df_losses_xml.head())


In [None]:
df_losses_xml.to_csv(
    "electricity_losses_pct_xml_processed.csv",
    index=False
)

print("Electricity losses XML â†’ CSV written successfully")


In [None]:
import sqlite3

conn = sqlite3.connect("electricity.db")

df_losses_xml.to_sql(
    "electricity_losses_pct",
    conn,
    if_exists="replace",
    index=False
)

conn.close()

print("XML-equivalent data inserted into SQLite")


In [None]:
conn = sqlite3.connect("electricity.db")
pd.read_sql("SELECT COUNT(*) AS total FROM electricity_losses_pct", conn)


In [None]:
import sqlite3
import pandas as pd
from pymongo import MongoClient
import pycountry


In [None]:
# Connect to SQLite
sqlite_conn = sqlite3.connect("electricity.db")

# -----------------------------
# Renewable electricity (CSV)
# -----------------------------
df_renewable = pd.read_sql(
    """
    SELECT
        "Country Code" AS country_code,
        year,
        renewable_electricity_percent
    FROM renewable_electricity
    """,
    sqlite_conn
)

# -----------------------------
# Electricity losses (XML-equivalent)
# -----------------------------
df_losses = pd.read_sql(
    """
    SELECT
        country_code,
        year,
        electricity_losses_pct
    FROM electricity_losses_pct
    """,
    sqlite_conn
)

sqlite_conn.close()

print("Renewable:", df_renewable.shape)
print("Losses:", df_losses.shape)


In [None]:
client = MongoClient(
    "mongodb+srv://taqApdvAdmin:T%40uq33r7861@electricitydatabase.rodgmrs.mongodb.net/?appName=electricityDatabase"
)

db = client["electricity_db"]
collection = db["electricity_use_per_capita"]

mongo_data = list(collection.find(
    {},
    {
        "_id": 0,
        "country_code": 1,
        "year": 1,
        "electricity_use_kwh_per_capita": 1
    }
))

df_consumption = pd.DataFrame(mongo_data)

print("Consumption:", df_consumption.shape)


In [None]:
import pycountry

def iso2_to_iso3(code):
    try:
        return pycountry.countries.get(alpha_2=code).alpha_3
    except:
        return None

def is_valid_iso3(code):
    try:
        return pycountry.countries.get(alpha_3=code) is not None
    except:
        return False


In [None]:
df_losses["iso3"] = df_losses["country_code"].apply(
    lambda x: iso2_to_iso3(x) if len(x) == 2 else x
)

df_losses = df_losses[df_losses["iso3"].apply(is_valid_iso3)]

df_losses = df_losses.drop(columns=["country_code"])
df_losses = df_losses.rename(columns={"iso3": "country_code"})

print("Losses after ISO fix:", df_losses.shape)


In [None]:
df_renewable["iso3"] = df_renewable["country_code"].apply(
    lambda x: iso2_to_iso3(x) if len(x) == 2 else x
)

df_renewable = df_renewable[df_renewable["iso3"].apply(is_valid_iso3)]

df_renewable = df_renewable.drop(columns=["country_code"])
df_renewable = df_renewable.rename(columns={"iso3": "country_code"})

print("Renewable after ISO fix:", df_renewable.shape)


In [None]:
df_consumption["iso3"] = df_consumption["country_code"].apply(
    lambda x: iso2_to_iso3(x) if len(x) == 2 else x
)

df_consumption = df_consumption[df_consumption["iso3"].apply(is_valid_iso3)]

df_consumption = df_consumption.drop(columns=["country_code"])
df_consumption = df_consumption.rename(columns={"iso3": "country_code"})

print("Consumption after ISO fix:", df_consumption.shape)


In [None]:
df_sqlite_merged = pd.merge(
    df_renewable,
    df_losses,
    on=["country_code", "year"],
    how="inner"
)

print("SQLite merged shape:", df_sqlite_merged.shape)
df_sqlite_merged.head()


In [None]:
df_final_integrated = pd.merge(
    df_sqlite_merged,
    df_consumption,
    on=["country_code", "year"],
    how="inner"
)

print("Final integrated shape:", df_final_integrated.shape)
df_final_integrated.head()


In [None]:
df_final_integrated.to_csv(
    "integrated_electricity_dataset.csv",
    index=False
)

print("Final integrated dataset saved as CSV")


In [None]:
import sqlite3

conn = sqlite3.connect("electricity.db")

df_final_integrated.to_sql(
    "integrated_electricity_data",
    conn,
    if_exists="replace",
    index=False
)

conn.close()

print("Final integrated dataset saved in SQLite")


In [None]:
import pandas as pd

df = pd.read_csv("integrated_electricity_dataset.csv")
print(df.shape)
df.head()


In [None]:
df.isnull().sum()


In [None]:
df.describe()

In [None]:
df["year"].min(), df["year"].max()
