In [99]:
import os
import time

import numpy as np
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt

In [100]:
def check_code_info(data):
     
    print('Data shape:',  data.shape, '\n',
          'Country: \n',
          '  Number of non-nan records:', data['country_code'].dropna().shape, '\n',
          '  Number of unique records:',  data['country_code'].dropna().drop_duplicates().shape, '\n',
          'IATA number: \n',
          '  Number of non-nan records:', data['iata'].dropna().shape, '\n',
          '  Number of unique records:',  data['iata'].dropna().drop_duplicates().shape, '\n',
          'ICAO number: \n',
          '  Number of non-nan records:', data['icao'].dropna().shape, '\n',
          '  Number of unique records:',  data['icao'].dropna().drop_duplicates().shape, '\n'
         )

# 1. Dataset 1

Source: [github:ip2location/ip2location-iata-icao](https://github.com/ip2location/ip2location-iata-icao)


In [101]:
# Dataset 1
path = 'https://raw.githubusercontent.com/ip2location/ip2location-iata-icao/master/iata-icao.csv'
airport = pd.read_csv(path) \
            .rename(columns={'latitude':'lat', 'longitude':'lon'}) \
            .replace('', pd.NA)

check_code_info(airport)

Data shape: (8936, 7) 
 Country: 
   Number of non-nan records: (8904,) 
   Number of unique records: (228,) 
 IATA number: 
   Number of non-nan records: (8936,) 
   Number of unique records: (8936,) 
 ICAO number: 
   Number of non-nan records: (7793,) 
   Number of unique records: (7790,) 



In [102]:
airport[airport['country_code'].isna()]

Unnamed: 0,country_code,region_name,iata,icao,airport,lat,lon
4833,,Erongo,ADI,FYAR,Arandis Airport,-22.4622,14.98
4834,,Erongo,MJO,FYME,Mount Etjo Airport,-21.0233,16.4528
4835,,Erongo,SWP,FYSM,Swakopmund Airport,-22.6619,14.5681
4836,,Erongo,WVB,FYWB,Walvis Bay Airport,-22.9799,14.6453
4837,,Hardap,SZM,FYSS,Sesriem Airport,-24.5128,15.7467
4838,,Karas,AIW,FYAA,Ai-Ais Airport,-27.995,17.5966
4839,,Karas,KAS,FYKB,Karasburg Airport,-28.0297,18.7385
4840,,Karas,KMP,FYKT,Keetmanshoop Airport,-26.5398,18.1114
4841,,Karas,LUD,FYLZ,Luderitz Airport,-26.6874,15.2429
4842,,Karas,OMD,FYOG,Oranjemund Airport,-28.5847,16.4467


In [103]:
airport_cn = airport.query('country_code == \'CN\'')
print('CN airport shape:', airport_cn.shape)
airport_cn[airport_cn['icao'].isna()]

CN airport shape: (264, 7)


Unnamed: 0,country_code,region_name,iata,icao,airport,lat,lon
2035,CN,Anhui,JUH,,Chizhou Jiuhuashan Airport,30.7403,117.686
2063,CN,Guangdong,HSC,,Shaoguan Guitou Airport,24.9786,113.421
2082,CN,Guizhou,KJH,,Kaili Huangping Airport,26.972,107.988
2089,CN,Hainan,BAR,,Qionghai Bo'ao Airport,19.1382,110.455
2100,CN,Heilongjiang,DTU,,Wudalianchi Airport,48.445,126.133
2116,CN,Henan,HSJ,,Zhengzhou Shangjie Airport,34.8422,113.274
2171,CN,Nei Mongol,AEQ,,Ar Horqin Airport,43.8704,120.16
2172,CN,Nei Mongol,AXF,,Alxa Left Banner Bayanhot Airport,38.7483,105.589
2176,CN,Nei Mongol,EJN,,Ejin Banner Taolai Airport,42.0155,101.001
2183,CN,Nei Mongol,NZL,,Zhalantun Chengjisihan Airport,47.8658,122.768


# 2. Dataset 2

Source: [github:mwgg/Airports](https://github.com/mwgg/Airports/issues)

- Contain military airport 

In [104]:
# Dataset 2
path = 'https://raw.githubusercontent.com/mwgg/Airports/master/airports.json'
airport = pd.read_json(path, orient='index') \
            .rename(columns={'country':'country_code'})

# NA data
airport['iata'] = airport['iata'].replace('0', np.nan)
airport = airport.replace(['', 'NA', pd.NA], np.nan)

check_code_info(airport)

Data shape: (28894, 10) 
 Country: 
   Number of non-nan records: (28831,) 
   Number of unique records: (235,) 
 IATA number: 
   Number of non-nan records: (7778,) 
   Number of unique records: (7728,) 
 ICAO number: 
   Number of non-nan records: (28894,) 
   Number of unique records: (28894,) 



In [None]:
# Dataset 1
# airport = pd.read_excel('data/airport/world_airports_location.xlsx') \
#             .dropna(subset='gps_code') \
#             .query('scheduled_service == \'yes\'') \
#             .query('type != \'heliport\'') \
#             .query('type != \'closed\'') \
#             .reset_index(drop=True)
