In [28]:
import pandas as pd
import folium
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = "plotly_white"

In [29]:
m = folium.Map( tiles="cartodb positron")

In [30]:
metro_data = pd.read_csv("Delhi-Metro-Network.csv")

In [31]:
metro_data.head()

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.61932,77.03326


In [32]:
metro_data.shape

(285, 8)

In [33]:
# checking for missing values
missing_values = metro_data.isnull().sum()
print(missing_values)


Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64


In [34]:
# checking data types
data_types = metro_data.dtypes
print(data_types)

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object


In [35]:
# converting 'Opening Date' to datetime format
metro_data['Opening Date'] = pd.to_datetime(metro_data['Opening Date'])

<h1 style="color:purple">Geospatial Analysis</h1>

I’ll start by visualizing the locations of the metro stations on a map. It will give us an insight into the geographical distribution of the stations across Delhi. We will use the latitude and longitude data to plot each station.

In [37]:
# defining a color scheme for the metro lines
line_colors = {
    'Red line': 'red',
    'Blue line': 'blue',
    'Yellow line': 'beige',
    'Green line': 'green',
    'Voilet line': 'purple',
    'Pink line': 'pink',
    'Magenta line': 'darkred',
    'Orange line': 'orange',
    'Rapid Metro': 'cadetblue',
    'Aqua line': 'black',
    'Green line branch': 'lightgreen',
    'Blue line branch': 'lightblue',
    'Gray line': 'lightgray'
}


In [118]:
delhi_map_with_line_tooltip = folium.Map(location=[28.7041, 77.1025], zoom_start=11)

# Adding colored marker for each metro station with line name in tooltip
for index, row in metro_data.iterrows():
    line = row['Line']
    station_Name = row['Station Name']
    color = line_colors.get(row['Line'], 'black')
    #print(f"Line - {line} --> {station_Name} --> {color}")
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=station_Name,
        tooltip=f"{station_Name}, {line}",
        icon=folium.Icon(color=color)
    ).add_to(delhi_map_with_line_tooltip)

In [119]:
# Displaying the updated map
delhi_map_with_line_tooltip

In [53]:
metro_data.Line.value_counts()

Blue line            49
Pink line            38
Yellow line          37
Voilet line          34
Red line             29
Magenta line         25
Aqua line            21
Green line           21
Rapid Metro          11
Blue line branch      8
Orange line           6
Gray line             3
Green line branch     3
Name: Line, dtype: int64

<h1 style="color:purple">Temporal Analysis</h1>

Now, I will analyze the growth of the Delhi Metro network over time. I’ll look at how many stations were opened each year and visualize this growth. It can provide insights into the pace of metro network expansion and its development phases.

In [79]:
metro_data['Opening year'] = metro_data['Opening Date'].dt.year

# Counting numbers of stations opened each year
stations_per_year = metro_data['Opening year'].value_counts().sort_index()

stations_per_year_df = stations_per_year.reset_index()
stations_per_year_df.columns = ['Year', 'Number of Stations']

fig = px.bar(stations_per_year_df, 
             x='Year', 
             y='Number of Stations',
             title="Number of Metro Stations Opened Each Year in Delhi",
             labels={'Year': 'Year', 'Number of Stations': 'Number of Stations Opened'}
            )
fig.update_layout(xaxis_tickangle=-45, xaxis=dict(tickmode='linear'),
                  yaxis=dict(title='Number of Stations Opened'),
                  xaxis_title="Year")

fig.show()

<h3> Some years show a significant number of new station openings, indicating phases of rapid network expansion.
Conversely, there are years with few or no new stations, which could be due to various factors like planning, funding, or construction challenges.</h3>


<h1 style="color:purple" >Line Analysis</h1>

Now, I’ll analyze the various metro lines in terms of the number of stations they have and the average distance between stations. It will give us insights into the characteristics of each metro line, such as which lines are more extensive or denser.

In [152]:
stations_per_line = metro_data['Line'].value_counts()
stations_per_line

Blue line            49
Pink line            38
Yellow line          37
Voilet line          34
Red line             29
Magenta line         25
Aqua line            21
Green line           21
Rapid Metro          11
Blue line branch      8
Orange line           6
Gray line             3
Green line branch     3
Name: Line, dtype: int64

In [155]:
total_distance_per_line = metro_data.groupby('Line')['Distance from Start (km)'].max()
avg_distance_per_line = total_distance_per_line / (stations_per_line -1 )
avg_distance_per_line

Aqua line            1.355000
Blue line            1.097917
Blue line branch     1.157143
Gray line            1.950000
Green line           1.240000
Green line branch    1.050000
Magenta line         1.379167
Orange line          4.160000
Pink line            1.421622
Rapid Metro          1.000000
Red line             1.167857
Voilet line          1.318182
Yellow line          1.269444
dtype: float64

In [159]:
line_analysis = pd.DataFrame({
    'Line': stations_per_line.index,
    'Number of Stations': stations_per_line.values,
    'Average Distance Between Stations (km)': avg_distance_per_line
})

# sorting the DataFrame by the number of stations
line_analysis = line_analysis.sort_values(by='Number of Stations', ascending=False)
line_analysis.reset_index(drop=True, inplace=True)
line_analysis

Unnamed: 0,Line,Number of Stations,Average Distance Between Stations (km)
0,Blue line,49,1.355
1,Pink line,38,1.097917
2,Yellow line,37,1.157143
3,Voilet line,34,1.95
4,Red line,29,1.24
5,Magenta line,25,1.05
6,Aqua line,21,1.379167
7,Green line,21,4.16
8,Rapid Metro,11,1.421622
9,Blue line branch,8,1.0


In [176]:
# creating subplots
fig = make_subplots(rows=1, cols=2,
                   subplot_titles=('Number of Stations Per Metro Line',
                                   'Average Distance Between Stations Per Metro Line'),
                    horizontal_spacing=0.2)
# plot no of stations per line
fig.add_trace(
    go.Bar(
        y=line_analysis['Line'], x=line_analysis['Number of Stations'],
        orientation='h', name='Number of Stations', marker_color='crimson'), 
    row=1, col=1
)

# plot for Average Distance Between Stations
fig.add_trace(
    go.Bar(
        y=line_analysis['Line'], x=line_analysis['Average Distance Between Stations (km)'],
        orientation='h', name='Average Distance (km)', marker_color='navy'),
    row=1, col=2
)

# Update X-axis properties
fig.update_xaxes(title_text="Number of Stations", row=1, col=1)
fig.update_xaxes(title_text="Average Distance Between Stations (km)",  row=1, col=2)

# Update Y-axix properties
fig.update_yaxes(title_text="Metro Lines", row=1, col=1)
fig.update_yaxes(title_text="",  row=1, col=2)

# update layout
fig.update_layout(title_text="Metro Line Analysis", template="plotly_white")

fig.show()

<h1 style="color:purple" >Station Layout Analysis</h1>

I’ll explore the station layouts (Elevated, Ground Level, Underground). I’ll analyze the distribution of these layouts across the network and see if there are any patterns or trends, such as certain lines favouring a particular layout.

In [180]:
layout_counts = metro_data['Station Layout'].value_counts()
layout_counts

Elevated       214
Underground     68
At-Grade         3
Name: Station Layout, dtype: int64

In [188]:
# creating the bar plot using Plotly
fig = px.bar(x=layout_counts.index, y=layout_counts.values,
             labels={'x': 'Station Layout Type', 'y': 'Number of Stations'},
             title='Distribution of Delhi Metro Station Layouts',
             color=layout_counts.index,
             color_continuous_scale='pastel')

# updating layout for better presentation
fig.update_layout(xaxis_title="Station Layout",
                  yaxis_title="Number of Stations",
                  coloraxis_showscale=False,
                  template="plotly_white")

fig.show()

The bar chart and the counts show the distribution of different station layouts in the Delhi Metro network.

# Observations:

<h3> Elevated Stations</h3> The majority of the stations are Elevated. It is a common design choice in urban areas to save space and reduce land acquisition issues.

<h3>Underground Stations</h3> The Underground stations are fewer compared to elevated ones. These are likely in densely populated or central areas where above-ground construction is less feasible.

<h3>At-Grade Stations</h3> There are only a few At-Grade (ground level) stations, suggesting they are less common in the network, possibly due to land and traffic considerations.