# **Visual Weather Analysis** from Location Freiburg in Germany

### Imports

In [39]:
import numpy as np
import pandas as pd

import plotly_express as px

### Data Loading & Wrangling

In [40]:
df = pd.read_csv("../data/weather/Freiburg_im_Breisgau_Wetter.csv")
old_rows = df.shape[0]
df.head(2)

Unnamed: 0,dt,dt_iso,timezone,city_name,lat,lon,temp,visibility,dew_point,feels_like,...,wind_gust,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,946684800,2000-01-01 00:00:00 +0000 UTC,3600,Freiburg im Breisgau,47.999008,7.842104,1.54,,0.68,-0.1,...,,,,,,93,804,Clouds,overcast clouds,04n
1,946688400,2000-01-01 01:00:00 +0000 UTC,3600,Freiburg im Breisgau,47.999008,7.842104,1.64,,0.78,1.64,...,,,,,,95,804,Clouds,overcast clouds,04n


In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 194326 entries, 0 to 194325
Data columns (total 28 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   dt                   194326 non-null  int64  
 1   dt_iso               194326 non-null  object 
 2   timezone             194326 non-null  int64  
 3   city_name            194326 non-null  object 
 4   lat                  194326 non-null  float64
 5   lon                  194326 non-null  float64
 6   temp                 194326 non-null  float64
 7   visibility           1174 non-null    float64
 8   dew_point            194326 non-null  float64
 9   feels_like           194326 non-null  float64
 10  temp_min             194326 non-null  float64
 11  temp_max             194326 non-null  float64
 12  pressure             194326 non-null  int64  
 13  sea_level            0 non-null       float64
 14  grnd_level           0 non-null       float64
 15  humidity         

In [42]:
df['visibility'].unique()

array([   nan,  9000., 10000.,   100.,  8000.,  6000.,  7000.,   600.,
         500.,  4500.,   200.,   400.,  5000.,  2500.])

In [43]:
df['visibility'].value_counts()

9000.0     965
10000.0    183
100.0        6
8000.0       6
6000.0       4
7000.0       3
600.0        1
500.0        1
4500.0       1
200.0        1
400.0        1
5000.0       1
2500.0       1
Name: visibility, dtype: int64

drop columns with to many nan values

In [44]:
df.isna().sum()

dt                          0
dt_iso                      0
timezone                    0
city_name                   0
lat                         0
lon                         0
temp                        0
visibility             193152
dew_point                   0
feels_like                  0
temp_min                    0
temp_max                    0
pressure                    0
sea_level              194326
grnd_level             194326
humidity                    0
wind_speed                  0
wind_deg                    0
wind_gust              174306
rain_1h                150740
rain_3h                194295
snow_1h                191781
snow_3h                194326
clouds_all                  0
weather_id                  0
weather_main                0
weather_description         0
weather_icon                0
dtype: int64

In [45]:
df = df.drop(columns=['visibility', 'sea_level', 'grnd_level', 'wind_gust', 'rain_1h', 'rain_3h', 'snow_1h', 'snow_3h'])

In [46]:
df.isna().sum()

dt                     0
dt_iso                 0
timezone               0
city_name              0
lat                    0
lon                    0
temp                   0
dew_point              0
feels_like             0
temp_min               0
temp_max               0
pressure               0
humidity               0
wind_speed             0
wind_deg               0
clouds_all             0
weather_id             0
weather_main           0
weather_description    0
weather_icon           0
dtype: int64

drop / change rest of columns

In [47]:
df.columns

Index(['dt', 'dt_iso', 'timezone', 'city_name', 'lat', 'lon', 'temp',
       'dew_point', 'feels_like', 'temp_min', 'temp_max', 'pressure',
       'humidity', 'wind_speed', 'wind_deg', 'clouds_all', 'weather_id',
       'weather_main', 'weather_description', 'weather_icon'],
      dtype='object')

In [48]:
df.drop(columns=['dt', 'timezone', 'city_name', 'lat', 'lon'], inplace=True)

In [49]:
# remove UTC by date
df['dt_iso'] = df['dt_iso'].replace("\s*\+.*UTC", "", regex=True)
df.loc[:3, ['dt_iso']]

Unnamed: 0,dt_iso
0,2000-01-01 00:00:00
1,2000-01-01 01:00:00
2,2000-01-01 02:00:00
3,2000-01-01 03:00:00


In [50]:
df.dt_iso.isna().sum()

0

In [51]:
df[['date', 'time']] = df['dt_iso'].str.split(" ", 1, expand=True)
df.drop(columns=['dt_iso'], inplace=True)
#df['time'] = pd.to_datetime(df['time'])
#df['date'] = pd.to_datetime(df['date'])

In [52]:
df.head()

Unnamed: 0,temp,dew_point,feels_like,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,clouds_all,weather_id,weather_main,weather_description,weather_icon,date,time
0,1.54,0.68,-0.1,1.42,1.87,1026,94,1.57,168,93,804,Clouds,overcast clouds,04n,2000-01-01,00:00:00
1,1.64,0.78,1.64,1.52,1.97,1026,94,1.21,163,95,804,Clouds,overcast clouds,04n,2000-01-01,01:00:00
2,1.14,0.28,-0.62,0.82,2.07,1026,94,1.61,163,95,804,Clouds,overcast clouds,04n,2000-01-01,02:00:00
3,1.24,0.53,-0.57,0.92,2.17,1026,95,1.66,163,95,804,Clouds,overcast clouds,04n,2000-01-01,03:00:00
4,1.61,0.9,0.07,1.52,1.9,1026,95,1.51,159,99,804,Clouds,overcast clouds,04n,2000-01-01,04:00:00


### Start Exploring