In [None]:
# This note was created to check duplicate, missing value, outliers, dublication and summarize and visulize #opendata:public_precipitation_15min data layer
from owslib.wfs import WebFeatureService
import json
from owslib.fes import *

#get WFS
wfs11 = WebFeatureService(url='https://entw-imis.lab.bfs.de/ogc/opendata/wfs', version='1.1.0')

# apply filter
filter1 = PropertyIsBetween(propertyname='start_measure', lower='2021-01-01' ,upper= '2022-12-31')
filter2 = PropertyIsLike(propertyname='id', literal='DEZ3437',wildCard='*')

filters=[filter1,filter2]

filterxml = etree.tostring(And(operations=filters).toXML()).decode("utf-8")

response = wfs11.getfeature(typename='opendata:public_precipitation_15min',filter = filterxml,outputFormat='application/json')

# convert IO-byte to bytes
bytesD=bytes(response.read())
# convert to json
data1 = json.loads(bytesD)


In [None]:
#save json to show in qgis
import json
with open('Halberstadt-precipitation' + '.json', 'w', encoding='utf-8') as f:
    json.dump(data1, f, ensure_ascii=False, indent=4)

In [None]:
#convert to datafarme
import pandas as pd
datanorm1= pd.json_normalize(data1,"features")
df1=pd.DataFrame(datanorm1)

In [None]:
#describe data
print(df1.sum())
print(df1.info())
print(df1["properties.value"].describe())

In [None]:

# check duplicate
duplicatef1 = df1[df1['properties.end_measure'].duplicated()]
print(duplicatef1)

In [None]:
#prepare data to show plot
from IPython.display import display
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np

df1['properties.end_measure'] = pd.to_datetime(df1['properties.end_measure']).dt.tz_localize(None)
df1['properties.start_measure'] = pd.to_datetime(df1['properties.start_measure']).dt.tz_localize(None)
df1 = df1.set_index(df1['properties.end_measure'])

In [None]:
# dates which are not in the sequence are returned
missv=pd.date_range(start="2021-01-01 00:00:00", end="2022-12-31 23:00:00", freq = 'H').difference(df1.index)
print(pd.DataFrame(missv).count())
missv = np.array(pd.to_datetime(missv.sort_values(ascending=True)))
print(missv)

In [None]:
# resample to date
df1=df1.resample('D').mean()

In [None]:
# check null values
isnulldf1 = df1[df1['properties.value'].isnull()]
print(isnulldf1['properties.value'])

In [None]:
# time series plot

import matplotlib.dates as mdates
fig, ax = plt.subplots(figsize=(11, 4))
year_month_formatter = mdates.DateFormatter("%Y-%m") # four digits for year, two for month
ax.xaxis.set_major_formatter(year_month_formatter) # formatter for major axis only
ax.plot(df1.index.values,df1['properties.value'],linewidth=0.7,color='blue')


# Providing x and y label to the chart
plt.xlabel('Date')
plt.ylabel('Precipitation (mm/d)')

In [None]:
# Scatter Plot
fig, ax = plt.subplots(figsize=(11, 4))

# Add x-axis and y-axis
ax.scatter(df1.index.values,
           df1['properties.value'],
           color='blue')

#title="Daily Precipitation Distribution at Halberstadt Station (2021-2022)
# Set title and labels for axes
ax.set(xlabel="Date",
       ylabel="Precipitation (mm/d)")

plt.show()

In [None]:
#plot for data visualization
df1=df1.resample('W').mean()
sns.set(rc={'figure.figsize':(11, 4)})
df1['properties.value'].plot(linewidth=0.9)

In [None]:
#plot for data visualization
df1=df1.resample('W').mean()
# create the time series plot
sns.set_style()
  
sns.lineplot(x = df1.index.values, y = "properties.value",
             data = df1)
  
#plt.xticks(rotation = 25)

In [None]:
#plot for data visualization
from pandas import DataFrame
from pandas import Grouper
from pandas import concat
import seaborn as sns

df1 = df1.resample('M').mean()
df1 = df1.sort_index().loc['2021-01-01':'2021-12-31']
print(df1.head())

sns.set_style("whitegrid")
  
sns.boxplot(x = df1.index.values, y = df1['properties.value'], data = df1)

In [None]:
#plot for data visualization
import seaborn as sns
sns.set(rc={'figure.figsize':(11, 4)})
# Create figure and plot space
fig, ax = plt.subplots(figsize=(8, 5))

# Add x-axis and y-axis
ax.scatter(df1.index.values,
           df1['properties.value'],
           color='blue')

# Set title and labels for axes
ax.set(xlabel="Date",
       ylabel="ODL (μSv/h)")
df1['properties.value'].plot(linewidth=0.5)