In [1]:
import pandas as pd
import numpy as np
from data_preprocess import Data

pop_df = Data.get_pop_df()
pop_df.head(5)

Using cached file: data\WPP2024_Demographic_Indicators_Medium.csv.gz


Unnamed: 0,Time,ISO2_code,Population
0,1970,BI,3487370
1,1971,BI,3573495
2,1972,BI,3661084
3,1973,BI,3573171
4,1974,BI,3650811


In [2]:
from database import sessions
from database.models import DataEntry
from sqlalchemy.exc import IntegrityError

with sessions.begin() as session:
    for _, row in pop_df.iterrows():
        try:
            record_with_data = DataEntry(
                country_code=row["ISO2_code"],
                year=int(row["Time"]),
                population=int(row["Population"])
            )
            session.add(record_with_data)

        except IntegrityError as e:
            print(f"Duplicate: {row['ISO2_code']} {row['Time']} -> {e}")
            session.rollback()  # rollback the failed insert to keep transaction clean

    print("All rows processed.")


All rows processed.


In [3]:
from database import sessions  
from database.models import DataEntry  
from sqlalchemy import select

# Use a context manager for the session  
with sessions() as session:  
    # Get all DataEntry records for a specific country  
    us_records = session.scalars(  
        select(DataEntry).where(DataEntry.country_code == "US")  
    ).all()  
    print(f"Found {len(us_records)} records for the United States.")

    # Find records where GDP is not available (is None)  
    unknown_gdp_records = session.scalars(  
        select(DataEntry).where(DataEntry.gdp == None)  
    ).all()  
    print(f"Found {len(unknown_gdp_records)} records with unknown GDP.")

    # Find records with a population greater than 1 billion  
    large_population_records = session.scalars(  
        select(DataEntry).where(DataEntry.population > 1000000000)  
    ).all()  
    for record in large_population_records:  
        print(f"  \- Record found for year {record.year} with population {record.population}")

Found 55 records for the United States.
Found 13035 records with unknown GDP.
  \- Record found for year 1982 with population 1005205011
  \- Record found for year 1983 with population 1021971348
  \- Record found for year 1984 with population 1036158564
  \- Record found for year 1985 with population 1051540712
  \- Record found for year 1986 with population 1068074929
  \- Record found for year 1987 with population 1086392285
  \- Record found for year 1988 with population 1106042607
  \- Record found for year 1989 with population 1124445724
  \- Record found for year 1990 with population 1143536292
  \- Record found for year 1991 with population 1163629156
  \- Record found for year 1992 with population 1177947900
  \- Record found for year 1993 with population 1191200574
  \- Record found for year 1994 with population 1203416576
  \- Record found for year 1995 with population 1214589617
  \- Record found for year 1996 with population 1225678238
  \- Record found for year 1997 with 

  print(f"  \- Record found for year {record.year} with population {record.population}")
