<a href="https://colab.research.google.com/github/yaobviously/perkins_temp/blob/main/perkins.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [15]:
pop_csv = '/content/drive/MyDrive/Temperature_Perkins/Population Data.csv'
temp_csv = '/content/drive/MyDrive/Temperature_Perkins/Temperature Data.csv'

In [285]:
def load_transform_data():
  """
  Preparing the temperature and population datasets for visualization and 
  analysis
  """

  temp_df = pd.read_csv(temp_csv, parse_dates=['location_date'])

  pop_df = pd.read_csv(pop_csv)

  # case normalizing pop columns
  pop_df.columns = pop_df.columns.str.lower()

  # dropping redundant columns
  col_drop = ['country_name', 'country_code', 'continent']
  temp_df = temp_df.drop(col_drop, axis=1)

  # renaming columns
  temp_df = temp_df.rename(columns={'temp_mean_c': 'mean_temp',
                                    'temp_min_c' : 'min_temp',
                                    'temp_max_c' : 'max_temp'})

  # making it easier to merge later
  temp_df['name'] = [x.split('/')[0] for x in temp_df['name']]
  temp_df = temp_df.rename(columns={'name':'city'})

  # creating a city dictionary to find matching city pops
  city_dict = {'St Louis' : 'St. Louis', 'NYC' : 'New York', "Chicago O'Hare" : 'Chicago',
               'Covington': 'Cincinnati', 'Wash DC' : 'Washington', 'Windsor Locks' : 'Hartford'
               }
  temp_df['city'] = temp_df['city'].map(city_dict).fillna(temp_df['city'])

  # merging the dataframes
  merged_df = pd.merge(temp_df, pop_df, how='left', on='city')
  merged_df = merged_df.set_index('location_date').sort_index()
  
  # creating a function to find the dates with missing values for each
  # station and then appending all of those dates to the main dataframe

  days = merged_df.index.unique()

  def find_miss_days(station_code = 'KLIT'):

    df_ = merged_df[merged_df['station_code'] == station_code]

    indexes = set(df_.index)
    dates_missing = set(days).difference(indexes)

    val_dict = {'city' : df_['city'][0],
                'station_code' : station_code,
                'state' : df_['state'][0]
                }

    new_df = pd.DataFrame(val_dict, index=tuple(dates_missing))

    return new_df

  df_append = [find_miss_days(code) for code in trial.station_code.unique()]
  df_concat = pd.concat(new_list)

  df = merged_df.append(df_concat)

  # extracting month from index to compute monthly expanding averages for each 
  # location
  df['month'] = pd.DatetimeIndex(df.index).month
  df['avg_daily_for_month'] = (df
                               .groupby(['city', 'month'])['mean_temp']
                               .transform(lambda x: x.expanding().mean()
                                ))
  
  df['avg_daily_max_month'] = (df
                              .groupby(['city', 'month'])['max_temp']
                              .transform(lambda x: x.expanding().mean()
                              ))
  
  df['avg_daily_min_month'] = (df
                              .groupby(['city', 'month'])['min_temp']
                              .transform(lambda x: x.expanding().mean()
                                 ))
  df = df.sort_index()
  df = df.round(2)
  
  return df


In [286]:
df = load_transform_data()

In [95]:
def plot_city(city = 'Sacramento', duration='W'):

  cols = ['mean_temp', 'max_temp', 'min_temp']

  df_ = df[df['city'] == city]

  return df_[cols].resample(duration).mean().plot(figsize=(24,5), grid=True, legend=True)