In [69]:
# Creamos un dataframe que tenga todos los ID de paraderos de Santiago usando
# https://raw.githubusercontent.com/JoseDTPM/geojson-Transantiago/refs/heads/main/Paraderos-Santiago-Chile.geojsonl.json

from os import listdir
listdir("data")
import pandas as pd

file = "data/Paraderos-Santiago-Chile.geojsonl.json"
df = pd.read_json(file, lines=True)
properties_df = pd.json_normalize(df["properties"])

# Seleccionamos la columna que corresponde al ID de los paraderos.
stops_df = properties_df[["SIMT", "X", "Y"]]
stops_df_clean = pd.DataFrame()
stops_df_clean["SIMT"] = stops_df["SIMT"].unique()
stops_df_1000 = stops_df.sample(n = 1000).reset_index()
print(f"cant. normal: {len(stops_df)}\ncant. limpio: {len(stops_df_clean)}\ncant. a analizar: {len(stops_df_1000)}")
stops_df_1000

cant. normal: 11798
cant. limpio: 11798
cant. a analizar: 1000


Unnamed: 0,index,SIMT,X,Y
0,1730,PF52,351626.610,6284271.040
1,8994,PA710,347630.883,6298554.147
2,2761,PI318,335719.818,6287342.718
3,7121,PB853,344278.485,6302552.942
4,10092,PD788,354075.541,6296280.826
...,...,...,...,...
995,5498,PH297,344496.880,6293292.040
996,2164,PE339,354558.394,6285458.228
997,6977,PB835,342659.670,6306259.319
998,6846,PB811,343518.581,6301716.287


In [70]:
import requests
import time
from datetime import datetime

timeanddate = datetime.now()

timeanddate = timeanddate.strftime("%d-%m-%Y-%H:%M:%S")

def bus_routes(stop):
    all_routes_data = []
    total_stops = len(stop)

    print(f"\ntotal {total_stops} paradas")
    i = 0
    for code in stops_df["SIMT"]:
        if i > 10:
            break
        i += 1
        url = f"https://api.xor.cl/red/bus-stop/{code}"
        response = requests.get(url)
        data = response.json()
        
        for service in data.get("services", []):
                for bus in service.get("buses", []):
                    all_routes_data.append({
                        "bus_stop_code": code,
                        "route_id": service.get("id"),
                        "bus_id": bus.get("id"),
                        "meters_distance": bus.get("meters_distance"),
                        "min_arrival_time": bus.get("min_arrival_time"),
                        "max_arrival_time": bus.get("max_arrival_time")
            })
    bus_routes = pd.DataFrame(all_routes_data)                
    final_df = pd.merge(
    bus_routes,
    stops_df,
    left_on="bus_stop_code",
    right_on="SIMT",
    how="left"
    )

    final_df = final_df.drop(columns=["SIMT"])
    return final_df

sample_codes = stops_df

final_bus_df = bus_routes(sample_codes)
final_bus_df
#final_bus_df.to_csv(f"datos_{timeanddate}.csv", index=False)



total 11798 paradas


Unnamed: 0,bus_stop_code,route_id,bus_id,meters_distance,min_arrival_time,max_arrival_time,X,Y
0,PI1923,I35,SFGY-80,870,0,5,330792.0,6283751.0
1,PI1923,I35,SHXG-69,6540,19,27,330792.0,6283751.0
2,PI1935,I35,SFGY-80,450,0,3,331179.0,6283855.0
3,PI1935,I35,SHXG-69,6114,17,25,331179.0,6283855.0
4,PI1926,I35,SFGY-80,1835,7,9,331245.0,6283815.0
5,PI1926,I35,SHXG-69,7494,25,60,331245.0,6283815.0
6,PI1925,I35,SFPF-87,1718,6,8,331266.0,6283832.0
7,PI1927,I35,SFGY-80,2182,8,10,331450.0,6283572.0
8,PI1927,I35,SHXG-69,7829,25,60,331450.0,6283572.0
9,PI1936,I35,SFPF-87,1391,0,4,331433.0,6283612.0


In [71]:
def get_metro_data():
    url = "https://api.xor.cl/red/metro-network"
    all_stations_data = []
    response = requests.get(url)
    lines_data = response.json()

    lines_list = lines_data.get("lines", [])

    for line in lines_list:
        line_id = line.get("id")
        
        stations = line.get("stations", [])
        
        for station in stations:
            current_datetime = datetime.now()
            all_stations_data.append({
                "line_id": line_id,
                "station_id": station.get("id"),
                "name": station.get("name"),
                "status_code": station.get("status"),
                "status_description": station.get("description"),
                "date": current_datetime
            })
    
    return pd.DataFrame(all_stations_data)
    
metro_df = get_metro_data()
metro_df
#metro_df.to_csv(f"metro_stations_{timeanddate}.csv", index=False)

Unnamed: 0,line_id,station_id,name,status_code,status_description,date
0,L1,san-pablo,San Pablo,0,Estación Operativa,2025-10-12 20:55:36.793366
1,L1,neptuno,Neptuno,0,Estación Operativa,2025-10-12 20:55:36.793373
2,L1,pajaritos,Pajaritos,0,Estación Operativa,2025-10-12 20:55:36.793374
3,L1,las-rejas,Las Rejas,0,Estación Operativa,2025-10-12 20:55:36.793375
4,L1,ecuador,Ecuador,0,Estación Operativa,2025-10-12 20:55:36.793376
...,...,...,...,...,...,...
138,L6,nuble,Ñuble,0,Estación Operativa,2025-10-12 20:55:36.793458
139,L6,estadio-nacional,Estadio Nacional,0,Estación Operativa,2025-10-12 20:55:36.793459
140,L6,nunoa,Ñuñoa,0,Estación Operativa,2025-10-12 20:55:36.793459
141,L6,ines-de-suarez,Inés de Suárez,0,Estación Operativa,2025-10-12 20:55:36.793460
