In [1]:
# Import Dependencies
import pandas as pd
import country_converter as coco

In [2]:
# Create a path to the csv and read it into a Pandas DataFrame
csv_path = "../Resources/cities.csv"
orig_cities_df = pd.read_csv(csv_path)

orig_cities_df.head()

Unnamed: 0,City_ID,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,0,jacareacanga,0,BR,1528902000,62,-6.22,-57.76,89.6,6.93
1,1,kaitangata,100,NZ,1528905304,94,-46.28,169.85,42.61,5.64
2,2,goulburn,20,AU,1528905078,91,-34.75,149.72,44.32,10.11
3,3,lata,76,IN,1528905305,89,30.78,78.62,59.89,0.94
4,4,chokurdakh,0,RU,1528905306,88,70.62,147.9,32.17,2.95


In [3]:
nan_values = orig_cities_df[orig_cities_df["Country"].isna()]
nan_values

Unnamed: 0,City_ID,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
65,65,luderitz,12,,1528905342,64,-26.65,15.16,64.03,17.38
240,240,ondangwa,0,,1528902000,13,-17.91,15.98,84.2,2.24


In [4]:
# The cities in Namibia have the incorrect country abbreviation
# The step replaces the NaN with Nam and verifies NaN are gone

orig_cities_df['Country'].fillna(value="Nam", inplace=True)
nan_values1 = orig_cities_df[orig_cities_df["Country"].isna()]
nan_values1

Unnamed: 0,City_ID,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed


In [5]:
# The following steps are to convert the Country abbreviations to country name
# function runs better when passed a list - this step creates a list of the Country column

co_fn = orig_cities_df['Country'].tolist()
# co_fn

In [6]:
#Convert country abbreviations to full name

Country_fn = coco.convert(names=co_fn, to='name_short')
# Country_fn


In [7]:
# this step adds the Country_fn column into the dataframe and drops the original Country column

orig_cities_df['Country_fn'] = Country_fn
cities_df = orig_cities_df.drop(columns=['Country'])
cities_df.head()

Unnamed: 0,City_ID,City,Cloudiness,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Country_fn
0,0,jacareacanga,0,1528902000,62,-6.22,-57.76,89.6,6.93,Brazil
1,1,kaitangata,100,1528905304,94,-46.28,169.85,42.61,5.64,New Zealand
2,2,goulburn,20,1528905078,91,-34.75,149.72,44.32,10.11,Australia
3,3,lata,76,1528905305,89,30.78,78.62,59.89,0.94,India
4,4,chokurdakh,0,1528905306,88,70.62,147.9,32.17,2.95,Russia


In [8]:
# These steps switch the Cloudiness and Country_fn columns,renames Country column 
# and capitalizes City name

# Switching places with columns
cols = list(cities_df.columns)
a, b = cols.index("Cloudiness"), cols.index("Country_fn")
cols[b], cols[a] = cols[a], cols[b]
cities_df = cities_df[cols]

# Renaming column

cities_df.rename(columns={"Country_fn" : "Country"}, inplace=True)

# Capitalizing City name

cities_df["City"] = cities_df["City"].str.capitalize() 

cities_df.head()

Unnamed: 0,City_ID,City,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Cloudiness
0,0,Jacareacanga,Brazil,1528902000,62,-6.22,-57.76,89.6,6.93,0
1,1,Kaitangata,New Zealand,1528905304,94,-46.28,169.85,42.61,5.64,100
2,2,Goulburn,Australia,1528905078,91,-34.75,149.72,44.32,10.11,20
3,3,Lata,India,1528905305,89,30.78,78.62,59.89,0.94,76
4,4,Chokurdakh,Russia,1528905306,88,70.62,147.9,32.17,2.95,0


In [9]:
# Converting df to html and saving html for use later

cities_df.to_html('../Support_code/cities_df_html.html', index=False)