In [36]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as plt
from IPython.display import display
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots
import awoc

### Выгружаем данные

In [37]:
airport_raw = pd.read_csv('https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat',
                         names = ['Airport ID', 'Name', 'City', 'Country', 'IATA', 'ICAO', 'Latitude', 'Longitude',
                      'Altitude', 'Timezone', 'DST', 'Tz database time zone', 'Type', 'Source'])

In [51]:
road_raw = pd.read_csv('https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat',
                      names = ['Airline', 'Airline ID', 'Source airport', 'Source airport ID', 'Destination airport',
                              'Destination airport ID', 'Codeshare', 'Stops', 'Equipment'])

# Задание 
## Найти топ 10 аэропортов по сумее взлет/посадок в мире

In [39]:
#замена ячеек без данных
airport_raw.IATA.replace('\\N', np.nan, inplace = True)

In [40]:
#очистка данных
airport_raw.dropna(subset=['IATA'], inplace = True)

In [41]:
# поиск топ 10 IATA
data = pd.DataFrame(road_raw['Source airport'].value_counts(), index = airport_raw['IATA'].unique())
data['Destination airport'] = road_raw['Destination airport'].value_counts()
data['total'] = data['Source airport'] + data['Destination airport']
answer_1 = data.nlargest(10, 'total')

In [42]:
# переход от IATA к стране и названию
data1 = pd.DataFrame(answer_1.index, columns = ['IATA'])
data1 = pd.merge(data1, airport_raw,
                    on='IATA')
answer_1.index = [data1['Country'],data1['Name']]

In [43]:
# Ответ:
display(answer_1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Source airport,Destination airport,total
Country,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
United States,Hartsfield Jackson Atlanta International Airport,915.0,911.0,1826.0
United States,Chicago O'Hare International Airport,558.0,550.0,1108.0
China,Beijing Capital International Airport,535.0,534.0,1069.0
United Kingdom,London Heathrow Airport,527.0,524.0,1051.0
France,Charles de Gaulle International Airport,524.0,517.0,1041.0
Germany,Frankfurt am Main Airport,497.0,493.0,990.0
United States,Los Angeles International Airport,492.0,498.0,990.0
United States,Dallas Fort Worth International Airport,469.0,467.0,936.0
United States,John F Kennedy International Airport,456.0,455.0,911.0
Netherlands,Amsterdam Airport Schiphol,453.0,450.0,903.0


# Задание
## Всем аэропортам в датасете сопоставить материк, на котором они находятся

In [44]:
world = awoc.AWOC()

In [45]:
def get_continent(country):
    try: continent = world.get_country_continent_name(country)
    except NameError: continent = np.nan
    return continent

In [46]:
airport_raw['continent'] = [get_continent(i) for i in airport_raw['Country']]

# Задание
## Изобразить все аэропорты на карте каждого континента

In [47]:
# Азия
fig_1 = go.Figure(go.Scattermapbox(lat = airport_raw[airport_raw['continent'] == 'Asia']['Latitude'], 
                                   lon=airport_raw[airport_raw['continent'] == 'Asia']['Longitude'],
                                  text = airport_raw['IATA'], marker = dict(size=8, color='rgb(0, 177, 172)', opacity=0.7)))
                 
map_center_1 = go.layout.mapbox.Center(lat=23, 
                                     lon=85)
fig_1.update_layout(mapbox_style="open-street-map", mapbox = dict(center = map_center_1, zoom=1.5), title = 'Аэропорты Азии')
fig_1.show()

# Австралия и Океания
fig_2 = go.Figure(go.Scattermapbox(lat = airport_raw[airport_raw['continent'] == 'Oceania']['Latitude'], 
                                   lon=airport_raw[airport_raw['continent'] == 'Oceania']['Longitude'],
                                  text = airport_raw['IATA'], marker = dict( size=8, color='purple', opacity=0.7)))
                 
map_center_2 = go.layout.mapbox.Center(lat= -17.6, 
                                     lon = 179)
fig_2.update_layout(mapbox_style="open-street-map", mapbox = dict(center = map_center_2, zoom=1.5),
                    title = 'Аэропорты Австралии и Океании')
fig_2.show()

# Северная Америка
fig_3 = go.Figure(go.Scattermapbox(lat = airport_raw[airport_raw['continent'] == 'North America']['Latitude'], 
                                   lon=airport_raw[airport_raw['continent'] == 'North America']['Longitude'],
                                  text = airport_raw['IATA'], marker = dict(size=8, color='yellow', opacity=0.7)))
                 
map_center_3 = go.layout.mapbox.Center(lat= 50, 
                                     lon = -104)
fig_3.update_layout(mapbox_style="open-street-map", mapbox = dict(center = map_center_3, zoom=1.5),
                    title = 'Аэропорты Северной Америки')
fig_3.show()

# Европа
fig_4 = go.Figure(go.Scattermapbox(lat = airport_raw[airport_raw['continent'] == 'Europe']['Latitude'], 
                                   lon=airport_raw[airport_raw['continent'] == 'Europe']['Longitude'],
                                  text = airport_raw['IATA'], marker = dict(size=8, color='red', opacity=0.8)))
                 
map_center_4 = go.layout.mapbox.Center(lat= 62, 
                                     lon = 65)
fig_4.update_layout(mapbox_style="open-street-map", mapbox = dict(center = map_center_4, zoom=1),
                    title = 'Аэропорты Европы')
fig_4.show()

# Африка
fig_5 = go.Figure(go.Scattermapbox(lat = airport_raw[airport_raw['continent'] == 'Africa']['Latitude'], 
                                   lon=airport_raw[airport_raw['continent'] == 'Africa']['Longitude'],
                                  text = airport_raw['IATA'], marker = dict(size=8, color='black' )))
                 
map_center_5 = go.layout.mapbox.Center(lat= 1, 
                                     lon = 12)
fig_5.update_layout(mapbox_style="open-street-map", mapbox = dict(center = map_center_5, zoom=1.5),
                    title = 'Аэропорты Африки')
fig_5.show()

# Южная Америка
fig_6 = go.Figure(go.Scattermapbox(lat = airport_raw[airport_raw['continent'] == 'South America']['Latitude'], 
                                   lon=airport_raw[airport_raw['continent'] == 'South America']['Longitude'],
                                  text = airport_raw['IATA'], marker = dict(size=8, color='green')))
                 
map_center_6 = go.layout.mapbox.Center(lat= -24, 
                                     lon = -63)
fig_6.update_layout(mapbox_style="open-street-map", mapbox = dict(center = map_center_6, zoom=1.5),
                    title = 'Аэропорты Южной Америки')
fig_6.show()

# Антарктика
fig_7 = go.Figure(go.Scattermapbox(lat = airport_raw[airport_raw['continent'] == 'Antarctica']['Latitude'], 
                                   lon=airport_raw[airport_raw['continent'] == 'Antarctica']['Longitude'],
                                  text = airport_raw['IATA'], marker = dict(size=8, color='brown')))
                 
map_center_7 = go.layout.mapbox.Center(lat= -62, 
                                     lon = -58)
fig_7.update_layout(mapbox_style="open-street-map", mapbox = dict(center = map_center_7, zoom=3),
                    title = 'Аэропорты Антарктики')
fig_7.show()

# Задание
## Найти топ-5 самых крупных аэропортов на каждом материке и отметить их на карте мира, после чего соединить зеленой линией межматериковые перелеты, если свзяь есть в обе стороны. Если связь только в одну сторону, то соединить красной линией

In [48]:
data = pd.DataFrame(road_raw['Source airport'].value_counts(), index = airport_raw['IATA'].unique())
data['Destination airport'] = road_raw['Destination airport'].value_counts()
data['total'] = data['Source airport'] + data['Destination airport']
data1 = pd.DataFrame(data.index, columns = ['IATA'])
data1 = pd.merge(data1, airport_raw,
                    on='IATA')
data3 = {'IATA': data1['IATA'], 'continent': data1['continent'], 'total': data['total'].reset_index(drop= True),
        'Latitude': data1['Latitude'], 'Longitude': data1['Longitude']}
data3 = pd.DataFrame(data3)

In [49]:
#находим самые крупные аэропорты
Europe = data3[data3['continent'] == 'Europe'].nlargest(5, 'total')
Oceania = data3[data3['continent'] == 'Oceania'].nlargest(5, 'total')
North_America = data3[data3['continent'] == 'North America'].nlargest(5, 'total')
Africa = data3[data3['continent'] == 'Africa'].nlargest(5, 'total')
South_America = data3[data3['continent'] == 'South America'].nlargest(5, 'total')
Asia = data3[data3['continent'] == 'Asia'].nlargest(5, 'total')
Antarctica = data3[data3['continent'] == 'Antarctica'].nlargest(5, 'total')


In [143]:
#строим график и находим связи между аэропортами
fig = go.Figure()
fig.add_trace(go.Scattermapbox(text = Europe['IATA'], name = 'Europe', lat = Europe['Latitude'],
                 lon=Europe['Longitude'],
                 marker = dict(size=8,
                    color='rgb(0, 177, 172)', opacity=0.7)))
fig.add_trace(go.Scattermapbox(text = Oceania['IATA'], name = 'Oceania', lat = Oceania['Latitude'],
                 lon=Oceania['Longitude'],
                 marker = dict(size=8,
                    color='red')))
fig.add_trace(go.Scattermapbox(text = North_America['IATA'], name = 'North America', lat = North_America['Latitude'],
                 lon=North_America['Longitude'],
                 marker = dict(size=8,
                    color='brown')))
fig.add_trace(go.Scattermapbox(text = Africa['IATA'], name = 'Africa', lat = Africa['Latitude'],
                 lon=Africa['Longitude'],
                 marker = dict(size=8,
                    color='blue')))
fig.add_trace(go.Scattermapbox(text = South_America['IATA'], name = 'South America', lat = South_America['Latitude'],
                 lon=South_America['Longitude'],
                 marker = dict(size=8,
                    color='purple')))
fig.add_trace(go.Scattermapbox(text = Asia['IATA'], name = 'Asia', lat = Asia['Latitude'],
                 lon=Asia['Longitude'],
                 marker = dict(size=8,
                    color='yellow')))
fig.add_trace(go.Scattermapbox(text = Antarctica['IATA'], name = 'Antarctica', lat = Antarctica['Latitude'],
                 lon=Antarctica['Longitude'],
                 marker = dict(size=8,
                    color='green')))
the_biggest = np.concatenate([Europe['IATA'].reset_index(drop = True), Oceania['IATA'].reset_index(drop = True),
                             North_America['IATA'].reset_index(drop = True),Africa['IATA'].reset_index(drop = True),
                             South_America['IATA'].reset_index(drop = True),Asia['IATA'].reset_index(drop = True),
                             Antarctica['IATA'].reset_index(drop = True)])
# цикл для построения красных и зеленых линий, соединяющих аэропорты
for i in range(0,len(the_biggest)):
    for j in range(i,len(the_biggest)):
        a = airport_raw[airport_raw['IATA'] == the_biggest[i]]['continent'].reset_index(drop = True) == airport_raw[airport_raw['IATA'] == the_biggest[j]]['continent'].reset_index(drop = True)
        if ((len(road_raw[(road_raw['Destination airport'] == the_biggest[i]) & (road_raw['Source airport'] == the_biggest[j])]) > 0) and
        (len(road_raw[(road_raw['Destination airport'] == the_biggest[j]) & (road_raw['Source airport'] == the_biggest[i])])>0)
        and (a[0] == False)):
            fig.add_trace(go.Scattermapbox(mode = "lines", hoverinfo='skip', showlegend=False, marker = dict(color='green', opacity= 0.05, size = 0.05),
            lat=airport_raw[airport_raw['IATA'].isin([the_biggest[i], the_biggest[j]])]['Latitude'],
            lon=airport_raw[airport_raw['IATA'].isin([the_biggest[i], the_biggest[j]])]['Longitude']))
        elif (((len(road_raw[(road_raw['Destination airport'] == the_biggest[i]) & (road_raw['Source airport'] == the_biggest[j])]) > 0) or
        (len(road_raw[(road_raw['Destination airport'] == the_biggest[j]) & (road_raw['Source airport'] == the_biggest[i])])>0))
        and (a[0] == False)):
            fig.add_trace(go.Scattermapbox(mode = "lines", hoverinfo='skip', showlegend=False, marker = dict(color='red', opacity= 0.05, size = 0.05),
            lat=airport_raw[airport_raw['IATA'].isin([the_biggest[i], the_biggest[j]])]['Latitude'],
            lon=airport_raw[airport_raw['IATA'].isin([the_biggest[i], the_biggest[j]])]['Longitude']))
fig.update_layout(mapbox_style="open-street-map", title = '5 крупнейших аэропортов на каждом материке', mapbox = dict(center = go.layout.mapbox.Center(lat=1, lon=1), zoom=0.3))
fig.show()

# Задание
## Поискать ошибки в датасете

### Самолёт вылетел и приземлился в одно и то же место

In [None]:
road_raw = pd.read_csv('https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat',
                      names = ['Airline', 'Airline ID', 'Source airport', 'Source airport ID', 'Destination airport',
                              'Destination airport ID', 'Codeshare', 'Stops', 'Equipment'])
road_raw[road_raw['Destination airport'] == road_raw['Source airport']]

### Неверные координаты аэропорта (координаты указывают на нахождение на неправильном континенте)

In [None]:
airport_raw = pd.read_csv('https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat',
                         names = ['Airport ID', 'Name', 'City', 'Country', 'IATA', 'ICAO', 'Latitude', 'Longitude',
                      'Altitude', 'Timezone', 'DST', 'Tz database time zone', 'Type', 'Source']
airport_raw[airport_raw['IATA'] == 'BHD']

### Отсутствие данных об аэропортах

In [None]:
airport_raw[airport_raw['IATA'] == '\\N']