In [1]:
import os
from dotenv import load_dotenv
import pandas as pd
from sqlalchemy import create_engine

### Import txt file and manage data - Bus stops

In [2]:
# import txt file
column_names = [
    "route_id",
    "var_1",
    "stop_name",
    "stop_id",
    "latitude",
    "longitude",
    "var_3",
    "var_4",
]
df_stops = pd.read_csv("../data/CTA_STOP_XFERS.txt", header=None, names=column_names)

# remove unnecessary fields and add necessary fields
df_stops = df_stops[["stop_id", "route_id", "stop_name", "latitude", "longitude"]]
df_stops["type"] = "CTA"

# keep only data within Hyde Park, if needed
"""
HP_NORTH_BOUND = 41.809647
HP_SOUTH_BOUND = 41.780482
HP_WEST_BOUND = -87.615877
HP_EAST_BOUND = -87.579056
df = df[
    (df['latitude'] >= HP_SOUTH_BOUND) & (df['latitude'] <= HP_NORTH_BOUND) &
    (df['longitude'] >= HP_WEST_BOUND) & (df['longitude'] <= HP_EAST_BOUND)
]
"""
df_stops

Unnamed: 0,stop_id,route_id,stop_name,latitude,longitude,type
0,17298,152,Addison & Halsted,41.947298,-87.649178,CTA
1,12513,152,Addison & Hamlin,41.946408,-87.722580,CTA
2,17358,152,Addison & Harlem,41.945240,-87.806693,CTA
3,12530,152,Addison & Hoyne,41.946782,-87.681080,CTA
4,12510,152,Addison & Karlov,41.946313,-87.729287,CTA
...,...,...,...,...,...,...
14664,4164,X9,Ashland Orange Line Station,41.839099,-87.665390,CTA
14665,6179,X9,Ashland/63rd Street (Green Line),41.778936,-87.664081,CTA
14666,5668,X9,Irving Park & Clark,41.954310,-87.662413,CTA
14667,15930,X9,Irving Park & Fremont,41.954603,-87.651532,CTA


### Import csv file and manage data - Routes

In [3]:
# import file
column_names = [
    "geometry",
    "route_id",
    "route_name",
    "weekday",
    "saturday",
    "sunday",
]
df_routes = pd.read_csv("../data/CTA_-_Bus_Routes_20250514.csv")
df_routes.columns = column_names

# remove unnecessary fields and add necessary fields
df_routes = df_routes[["route_id", "route_name", "geometry"]]
df_routes["type"] = "CTA"
df_routes

Unnamed: 0,route_id,route_name,geometry,type
0,12,ROOSEVELT,MULTILINESTRING ((-87.64790999981973 41.867129...,CTA
1,121,UNION/STREETERVILLE EXPRESS,MULTILINESTRING ((-87.62449999918586 41.888329...,CTA
2,1,BRONZEVILLE/UNION STATION,MULTILINESTRING ((-87.62325999972036 41.831030...,CTA
3,108,HALSTED/95TH,MULTILINESTRING ((-87.59052999958188 41.655770...,CTA
4,11,LINCOLN,MULTILINESTRING ((-87.68882999971883 41.966760...,CTA
...,...,...,...,...
122,120,OGILVIE/STREETERVILLE EXPRESS,MULTILINESTRING ((-87.62449999918586 41.888329...,CTA
123,126,JACKSON,MULTILINESTRING ((-87.77390000023009 41.876470...,CTA
124,20,MADISON,MULTILINESTRING ((-87.77390694614428 41.880080...,CTA
125,128,SOLDIER FIELD EXPRESS,MULTILINESTRING ((-87.61698999997792 41.865019...,CTA


### Ingest data into SQL

In [4]:
# Load .env
load_dotenv("../.env")
db_url = os.getenv("DATABASE_URL")

# Create SQLAlchemy engine
engine = create_engine(db_url)

# Create table and ingest the data to corresponding table
df_stops.to_sql("apt_app_cta_stops", engine, if_exists="replace", index=False)
df_routes.to_sql("apt_app_cta_routes", engine, if_exists="replace", index=False)

127

### Check SQL data - Bus Stops

In [5]:
# Count how many rows has the SQL table
db_count = pd.read_sql("SELECT COUNT(*) AS count FROM apt_app_cta_stops", engine).iloc[0, 0]
print("Total rows:", db_count)

# head
df_preview_stops = pd.read_sql("SELECT * FROM apt_app_cta_stops LIMIT 5", engine)
print(df_preview_stops)

Total rows: 14669
   stop_id route_id          stop_name   latitude  longitude type
0    17298      152  Addison & Halsted  41.947298 -87.649178  CTA
1    12513      152   Addison & Hamlin  41.946408 -87.722580  CTA
2    17358      152   Addison & Harlem  41.945240 -87.806693  CTA
3    12530      152    Addison & Hoyne  41.946782 -87.681080  CTA
4    12510      152   Addison & Karlov  41.946313 -87.729287  CTA


### Check SQL data - Bus Routes

In [6]:
# Count how many rows has the SQL table
db_count = pd.read_sql("SELECT COUNT(*) AS count FROM apt_app_cta_routes", engine).iloc[0, 0]
print("Total rows:", db_count)

# head
df_preview_stops = pd.read_sql("SELECT * FROM apt_app_cta_routes LIMIT 5", engine)
print(df_preview_stops)

Total rows: 127
  route_id                   route_name  \
0       12                    ROOSEVELT   
1      121  UNION/STREETERVILLE EXPRESS   
2        1    BRONZEVILLE/UNION STATION   
3      108                 HALSTED/95TH   
4       11                      LINCOLN   

                                            geometry type  
0  MULTILINESTRING ((-87.64790999981973 41.867129...  CTA  
1  MULTILINESTRING ((-87.62449999918586 41.888329...  CTA  
2  MULTILINESTRING ((-87.62325999972036 41.831030...  CTA  
3  MULTILINESTRING ((-87.59052999958188 41.655770...  CTA  
4  MULTILINESTRING ((-87.68882999971883 41.966760...  CTA  
