In [None]:
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline

## Defining methods to join bike_data and weather data

In [None]:
def cut_timestamp(timestamp_col):
    return timestamp_col.map(lambda x:int(str(x)[:10]))

def assemble_timestamp_cut(day_frame, time_frame):
    day_clean = day_frame.map(lambda x: x.replace("-", ""))
    hour = time_frame.map(lambda x: x.replace(":", "")[:2])
    return (day_clean + hour).astype("int64")

def join_by_hour(bike_frame, weather_frame, column_name):
    bike_frame["timestamp_cut"] = assemble_timestamp_cut(bike_frame["day"], bike_frame["time"])
    weather_frame["timestamp_cut"] = cut_timestamp(weather_frame["Zeitstempel"])
    
    joined = bike_frame.join(weather_frame[["Wert", "timestamp_cut"]].set_index("timestamp_cut"), on="timestamp_cut")
    joined[column_name] = joined["Wert"]
    
    joined = joined.drop("Wert", axis=1)
    joined = joined.drop("timestamp_cut", axis=1)
    
    return joined

In [None]:
brm = pd.read_csv("resources/bremen.csv")
brm_precipitation = pd.read_csv("resources/bremen_precipitation.csv")
brm_temperature = pd.read_csv("resources/bremen_temperature.csv")
schwarme_precipitation = pd.read_csv("resources/schwarme_precipitation.csv")
bassum_precipitation = pd.read_csv("resources/bassum_precipitation.csv")

brm_precipitation.info()

Identifying missing values

In [None]:
# create a dataframe which contains all dates from 2019-01-20 until 2020-01-20
all_dates_frame = pd.DataFrame(pd.date_range(
    start = datetime.strptime("2019-01-20 00", "%Y-%m-%d %H"), 
    end = datetime.strptime("2020-01-20 00", "%Y-%m-%d %H"),
    freq="1h"), columns=["date"])

all_dates_frame["Zeitstempel"] = all_dates_frame["date"].map(lambda x: int(x.strftime("%Y%m%d%H%M")))
all_dates_frame = all_dates_frame.drop("date", 1)

# loading precipitation data of bremen and mark missing values
brm_precipitation_full = brm_precipitation.join(all_dates_frame.set_index("Zeitstempel"), on="Zeitstempel", how="outer")
brm_precipitation_full = brm_precipitation_full.set_index("Zeitstempel")
brm_precipitation_full = brm_precipitation_full.sort_values("Zeitstempel")
brm_precipitation_full = brm_precipitation_full.reset_index()

brm_precipitation_full[brm_precipitation_full["Wert"].isnull()]

In [None]:
brm_sub_frame = brm_precipitation_full[(brm_precipitation_full["Zeitstempel"] >= 201902070000) & (brm_precipitation_full["Zeitstempel"] <= 201902080300)]
brm_sub_frame

## Getting data from station of schwarme and station of bassum

In [None]:
# loading precipitation data of schwarme and mark missing values
schwarme_precipitation_full = schwarme_precipitation.join(all_dates_frame.set_index("Zeitstempel"), on="Zeitstempel", how="outer")
schwarme_precipitation_full = schwarme_precipitation_full.set_index("Zeitstempel")
schwarme_precipitation_full = schwarme_precipitation_full.sort_values("Zeitstempel")
schwarme_precipitation_full = schwarme_precipitation_full.reset_index()

bassum_precipitation_full = bassum_precipitation.join(all_dates_frame.set_index("Zeitstempel"), on="Zeitstempel", how="outer")
bassum_precipitation_full = bassum_precipitation_full.set_index("Zeitstempel")
bassum_precipitation_full = bassum_precipitation_full.sort_values("Zeitstempel")
bassum_precipitation_full = bassum_precipitation_full.reset_index()

## Finding the station which has approximately the same precipitation than the station of bremen

In [None]:
schwarme_sub_frame = schwarme_precipitation_full[(schwarme_precipitation_full["Zeitstempel"] >= 201902070000) & (schwarme_precipitation_full["Zeitstempel"] <= 201902080300)]

In [None]:
bassum_sub_frame = bassum_precipitation_full[(bassum_precipitation_full["Zeitstempel"] >= 201902070000) & (bassum_precipitation_full["Zeitstempel"] <= 201902080300)]

In [None]:

plt.plot(bassum_sub_frame["Zeitstempel"], bassum_sub_frame["Wert"], '.-', label="bassum")
plt.plot(schwarme_sub_frame["Zeitstempel"], schwarme_sub_frame["Wert"], '-', label="schwarme")
plt.plot(brm_sub_frame["Zeitstempel"], brm_sub_frame["Wert"], '+-', label="bremen")


plt.legend()

plt.show()


## Since precipitiation of bassum fits more the precipitation of bremen we choose to replace missing values of bremen with values of bassum

In [None]:
brm_precipitation_full[brm_precipitation_full["Wert"].isnull()] = bassum_precipitation_full
brm_precipitation_full[brm_precipitation_full["Wert"].isnull()]

## combine missing values of brm_precipitation with precipitation values from the city of schwarme

In [None]:
brm_precipitation = join_by_hour(brm, brm_precipitation_full, "precipitation")
brm_precipitation

brm_weather = join_by_hour(brm_precipitation, brm_temperature, "temperature")

brm_weather.info()

brm_weather.to_csv("generated/bremen_weather.csv")

In [None]:
frb = pd.read_csv("resources/freiburg.csv")
frb_precipitation = pd.read_csv("resources/freiburg_precipitation.csv")
frb_temperature = pd.read_csv("resources/freiburg_temperature.csv")

frb_precipitation = join_by_hour(frb, frb_precipitation, "precipitation")
frb_weather = join_by_hour(frb_precipitation, frb_temperature, "temperature")

frb_weather.to_csv("generated/freiburg_weather.csv")

frb_weather.info()

In [None]:
frb_weather