In [7]:
import glob
import os
import datetime
import numpy as np
import pandas as pd

from datetime import datetime, timedelta

COLUMN_NAMES=['Temperature', 'Year', 'Month_Avg', 'Country', 'Country_Code']

def strip(text):
    try:
        return text.strip()
    except AttributeError:
        return text

def make_float(text):
    return float(text.strip('" '))

def make_int(text):
    return int(text.strip('" '))

def read_csv(filename):
    df = pd.read_csv(filename,
                     index_col=None,
                     header=0,
                     names=COLUMN_NAMES,
                     converters = {'Temperature' : make_float,
                                    'Year' : make_int,
                                    'Month_Avg' : strip,
                                    'Country' : strip,
                                    'Country_Code' : strip})
    return df

df = read_csv('world_temp_2000-2016.csv')
df['Temperature'] = df.Temperature.astype(float)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39984 entries, 0 to 39983
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Temperature   39984 non-null  float64
 1   Year          39984 non-null  int64  
 2   Month_Avg     39984 non-null  object 
 3   Country       39984 non-null  object 
 4   Country_Code  39984 non-null  object 
dtypes: float64(1), int64(1), object(3)
memory usage: 1.5+ MB


In [102]:
# Task A
def getTempByCols(cols, val1, val2):
    grp = df.groupby(cols).agg({'Temperature': ['min', 'max']})
    ndf = grp.reset_index()
    return ndf[(ndf[cols[0]] == val1) & (ndf[cols[1]] == val2)]

def getLowestTempByMonth(country, month):
    ndf = getTempByCols(['Country', 'Month_Avg'],country, month)
    return df[(df.Country == country) & (df.Month_Avg == month) & (df.Temperature == ndf[('Temperature', 'min')].iloc[0])]

def getHighestTempByMonth(country, month):
    ndf = getTempByCols(['Country', 'Month_Avg'],country, month)
    return df[(df.Country == country) & (df.Month_Avg == month) & (df.Temperature == ndf[('Temperature', 'max')].iloc[0])]

def getLowestTempByYear(country, year):
    ndf = getTempByCols(['Country', 'Year'],country, year)
    return df[(df.Country == country) & (df.Year == year) & (df.Temperature == ndf[('Temperature', 'min')].iloc[0])]

def getHighestTempByYear(country, year):
    ndf = getTempByCols(['Country', 'Year'],country, year)
    return df[(df.Country == country) & (df.Year == year) & (df.Temperature == ndf[('Temperature', 'max')].iloc[0])]

def getTempWithinRange(country, rangeLowTemp, rangeHighTemp):
    s = df[(df.Country == country) & (df.Temperature <= rangeHighTemp) & (df.Temperature >= rangeLowTemp)]
    return s.sort_values(by=['Temperature'])
    
def getHighestTemp(country):
    i = df[(df.Country == country)].Temperature.idxmax()
    return df.iloc[i]

def getLowestTemp(country):
    i = df[(df.Country == country)].Temperature.idxmin()
    return df.iloc[i]


In [103]:
[getLowestTempByMonth('Albania', 'Jun'),
 getHighestTempByMonth('Belgium', 'Jul'),
 getLowestTempByYear('Cameroon', 2002),
 getHighestTempByYear('Congo (Republic of the)', 2005)]

[     Temperature  Year Month_Avg  Country Country_Code
 221      18.3694  2001       Jun  Albania          ALB,
       Temperature  Year Month_Avg  Country Country_Code
 3342      21.7403  2006       Jul  Belgium          BEL,
       Temperature  Year Month_Avg   Country Country_Code
 5940      23.5527  2002       Jan  Cameroon          CMR,
       Temperature  Year Month_Avg                  Country Country_Code
 8017      26.4338  2005       Feb  Congo (Republic of the)          COG]

In [104]:
getTempWithinRange('Belgium', 20.0, 25.0)

Unnamed: 0,Temperature,Year,Month_Avg,Country,Country_Code
3426,20.0705,2013,Jul,Belgium,BEL
3307,20.2945,2003,Aug,Belgium,BEL
3342,21.7403,2006,Jul,Belgium,BEL


In [105]:
[getHighestTemp('Belgium'), getLowestTemp('Albania')]

[Temperature     21.7403
 Year               2006
 Month_Avg           Jul
 Country         Belgium
 Country_Code        BEL
 Name: 3342, dtype: object,
 Temperature     -0.4209
 Year               2003
 Month_Avg           Feb
 Country         Albania
 Country_Code        ALB
 Name: 241, dtype: object]

In [115]:
# Task B
def allCountriesGetTop10LowestTempByMonth(month):
    sorted_df = df[df.Month_Avg == month].sort_values(by=['Temperature', 'Country'])
    return sorted_df.head(10)

def allCountriesGetTop10HighestTempByMonth(month):
    sorted_df = df[df.Month_Avg == month].sort_values(by=['Temperature', 'Country'], ascending=False)
    return sorted_df.head(10)

def allCountriesGetTop10LowestTemp():
    return df.sort_values(by=['Temperature', 'Country']).head(10)

def allCountriesGetTop10HighestTemp():
    return df.sort_values(by=['Temperature', 'Country'], ascending=False).head(10)

def allCountriesGetAllDataWithinTempRange(lowRangeTemp,highRangeTemp):
    s = df[(df.Temperature <= highRangeTemp) & (df.Temperature >= lowRangeTemp)]
    return s.sort_values(by=['Temperature', 'Country']).head(10)
    

In [117]:
resultB = [allCountriesGetTop10LowestTempByMonth('Aug'),
 allCountriesGetTop10HighestTempByMonth('Nov'),
 allCountriesGetTop10LowestTemp(),
 allCountriesGetTop10HighestTemp(),
 allCountriesGetAllDataWithinTempRange(15.0, 30.0)]

In [120]:
# Task C
def allCountriesTop10TempDelta(month, year1, year2):
    ndf = df[(df.Month_Avg == month) & ((df.Year == year1) | (df.Year == year2))]
    return ndf.groupby('Country')

In [133]:
groups = allCountriesTop10TempDelta('Nov', 2000, 2001)
deltas = sorted([(abs(g.iloc[0].Temperature - g.iloc[1].Temperature), name) for name, g in groups], reverse=True)[:10]
for temp, country in sorted(deltas):
    print(groups.get_group(country))
    

      Temperature  Year Month_Avg  Country Country_Code
8578      9.31536  2000       Nov  Croatia          HRV
8590      4.72606  2001       Nov  Croatia          HRV
       Temperature  Year Month_Avg  Country Country_Code
23470      7.89914  2000       Nov  Moldova          MDA
23482      3.28905  2001       Nov  Moldova          MDA
      Temperature  Year Month_Avg                 Country Country_Code
4294      8.80161  2000       Nov  Bosnia and Herzegovina          BIH
4306      3.82368  2001       Nov  Bosnia and Herzegovina          BIH
       Temperature  Year Month_Avg Country Country_Code
29794      -18.935  2000       Nov  Russia          RUS
29806      -13.913  2001       Nov  Russia          RUS
       Temperature  Year Month_Avg   Country Country_Code
31630      6.35640  2000       Nov  Slovakia          SVK
31642      1.30004  2001       Nov  Slovakia          SVK
       Temperature  Year Month_Avg  Country Country_Code
29590      7.83432  2000       Nov  Romania      