**The data used in the Notebook was taken from the Medellin Metro open data website.** https://datosabiertos-metrodemedellin.opendata.arcgis.com/search?categories=%252Fcategories%252Fmovilidad

In [2]:
#IMPORT LIBRARIES
import pandas as pd
import geopandas as gpd
import shapely
from shapely.geometry import LineString

In [3]:
# IMPORT THE DATA INTO THE STRUCTURE OF A DATAFRAME
df = pd.read_csv('data/Estaciones_Sistema_Metro.csv')
df_stations = df[df['sentido']!='B']

In [4]:
df_stations.info()

<class 'pandas.core.frame.DataFrame'>
Index: 113 entries, 0 to 153
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   X         113 non-null    float64
 1   Y         113 non-null    float64
 2   objectid  113 non-null    int64  
 3   linea     113 non-null    object 
 4   estacion  113 non-null    int64  
 5   sentido   113 non-null    object 
 6   sistema   113 non-null    object 
 7   label     113 non-null    object 
 8   tipo      113 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 8.8+ KB


To create a LineString object we have to arrange the points(x,y) in a specific order, given by the dictionary sorter_lines

In [6]:
sorter_lines = { 
    "1": [76,87,88,89,90,91,92,94,69,68,67,66,99, 100, 101, 102, 103, 104, 105, 106, 58],
    "2": [112,114,116,118,120,135,84,83,82,81,80,79,77,121,122,124,126,128,130,132,134],
    "A": [22,21,20,1,2,3,4,5,6,7,8,10,11,12,13,14,23,24,25,26,27],
    "B":[28,9,15,16,17,18,19],
    "H":[37,39,43],
    "J":[35,34,33,44],
    "K": [30,31,32,41],
    "L":[29,40],
    "M":[36,38,42],
    "O":[140,141,142,143,144,145,146,147,148,149,150,151,152,153],
    "P":[45,46,47,48],
    "T":[52,49,51,56,55,50,53,54,57] 
} #A dictionary that stores the order of every station point, so we can create the LineString object.

def create_geodataframe_file(df_stations):
    unique_lines = df_stations['linea'].unique()
    geojson_filename = f'data/Exported_data/Metro_lines_GeoInfo.geojson' #path for the new GeoDataframe

    list_gdf =[] #A list with the information we need: Line name, and Geometry information as a LineString object
    
    for line in unique_lines:
        line_df = df_stations[df_stations['linea'] == line]

        new_df = pd.DataFrame({ #Creating a new dataFrame for each line, with only the information we need to create the Point(x,y) object, and the LineString object.
            'Latitude': line_df['Y'].astype(float),
            'Longitude': line_df['X'].astype(float),
            'Station': line_df['label'],
            'Order_id': line_df['objectid']
        })
        
        new_df['Order_id'] = pd.Categorical(new_df['Order_id'], categories=sorter_lines[line], ordered=True) #we need to convert it to a categorical value in order to sort it
        new_df = new_df.sort_values(["Order_id"])
        new_df['Order_id'] = new_df['Order_id'].astype(str) #we need to convert it as a str type so we can create the GeoDataframe

        lat_long_list = gpd.points_from_xy(new_df.Longitude, new_df.Latitude) #Point(x,y) list
        
        # Convert the DataFrame to a GeoDataFrame
        gdf = gpd.GeoDataFrame(new_df, geometry=lat_long_list)      

        #Create a linestring object from the ordered points
        line_geom = LineString(gdf.geometry.tolist())

        # Create a new GeoDataFrame with a single row representing each line
        list_gdf.append({'Service_Line': f'Line {line}', "geometry": line_geom})

    main_gdf = gpd.GeoDataFrame(list_gdf, crs="EPSG:4326") #The GeoDataFrame with a row for each metro line. EPSG 4326 provides a full coordinate reference system

    # Export the GeoDataFrame as a GeoJson file
    main_gdf.to_file(geojson_filename, driver="GeoJSON")

create_geodataframe_file(df_stations)