In [89]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import datetime
import numpy as np

import os

In [90]:
def parse_trends_timestamp(timestamp):
    datetime_str = timestamp.split(' ')
    return datetime.datetime.fromisoformat(datetime_str[0].split('/')[2]+'-'+datetime_str[0].split('/')[0]+'-'+datetime_str[0].split('/')[1]+ ' ' + datetime_str[1])

In [91]:
def parse_feedback_timestamp(timestamp):
    date_str = timestamp.split('T')[0]
    time_str = timestamp.split('T')[1]

    return datetime.datetime.fromisoformat(date_str + ' ' + time_str[:5])

### Loading Feedback data


In [92]:
feedback_room_df = pd.read_csv('../data/abschluss-daten/feedback_room.csv')
device_df = pd.read_csv('../data/abschluss-daten/device.csv')
participant_df = pd.read_csv('../data/abschluss-daten/participant.csv')

In [93]:

for column_name in device_df.columns:
    if 'timestamp' in column_name or 'At' in column_name:
        device_df[column_name] = device_df[column_name].dropna().apply(parse_feedback_timestamp)

for column_name in participant_df.columns:
    if 'timestamp' in column_name or 'At' in column_name:
        participant_df[column_name] = participant_df[column_name].dropna().apply(parse_feedback_timestamp)

for column_name in feedback_room_df.columns:
    if 'timestamp' in column_name or 'At' in column_name:
        feedback_room_df[column_name] = feedback_room_df[column_name].dropna().apply(datetime.datetime.fromisoformat)


### Loading Temperature & Air quality data

Loading data is done as a batch, a dataframe variable is created for each file with its name in the form rx_xx_df according to the file read.

In [94]:
dir_list = os.listdir('../data/Trends Smart City/0EG') + os.listdir('../data/Trends Smart City/1OG') + os.listdir('../data/Trends Smart City/2OG') + os.listdir('../data/Trends Smart City/3OG')

In [95]:
df_names = []

for filename in dir_list:
    split_name = filename.lower().split(' ')
    further_split_name = split_name[0].split('-')
    df_name = further_split_name[1]+'_'+further_split_name[2]

    if 'Lufttemp' in filename:
        df_name += '_temp_df'

    else:
        df_name += '_qual_df'

    #df_name = '_'.join(further_split_name[1:])+'_df'
    df_names.append(df_name)

   

    if further_split_name[1] == 'r0':
        globals()[df_name] = pd.read_csv('../data/Trends Smart City/0EG/'+filename,sep=';',header=0,usecols = [i for i in range(3)],names=['timestamp','value','reduced'])

    elif further_split_name[1] == 'r1':
        globals()[df_name] = pd.read_csv('../data/Trends Smart City/1OG/'+filename,sep=';',header=0,usecols = [i for i in range(3)],names=['timestamp','value','reduced'])

    elif further_split_name[1] == 'r2':
        globals()[df_name] = pd.read_csv('../data/Trends Smart City/2OG/'+filename,sep=';',header=0,usecols = [i for i in range(3)],names=['timestamp','value','reduced'])

    elif further_split_name[1] == 'r3':
        globals()[df_name] = pd.read_csv('../data/Trends Smart City/3OG/'+filename,sep=';',header=0,usecols = [i for i in range(3)],names=['timestamp','value','reduced'])

    globals()[df_name]['timestamp'] = globals()[df_name]['timestamp'].apply(parse_trends_timestamp)

Changing the room name format to match that used in the trends dataset

In [96]:
def parse_roomName(roomName):
    if 'VS' not in roomName:
        return

    roomName = roomName[3:]
    roomName = roomName.split('.')
    return 'r'+'_'.join(roomName)



In [97]:
feedback_room_df['roomName'] = feedback_room_df['roomName'].apply(parse_roomName)

### Splitting feedback data

In [98]:
daily_airDraft_df = feedback_room_df[['daily_airDraft_timestamp','daily_airDraft_value','roomName']].dropna()
daily_ambientSounds_df = feedback_room_df[['daily_ambientSounds_timestamp','daily_ambientSounds_value','roomName']].dropna()
daily_artificialLight_df = feedback_room_df[['daily_artificialLight_timestamp','daily_artificialLight_value','roomName']].dropna()
daily_dayLight_df = feedback_room_df[['daily_dayLight_timestamp','daily_dayLight_value','roomName']].dropna()
daily_smell_df = feedback_room_df[['daily_smell_timestamp','daily_smell_value','roomName']].dropna()
daily_studentsAdherence_df = feedback_room_df[['daily_studentsAdherence_timestamp', 'daily_studentsAdherence_value','roomName']].dropna()
daily_studentsFocused_df = feedback_room_df[['daily_studentsFocused_timestamp', 'daily_studentsFocused_value','roomName']].dropna()
daily_studentsIrritated_df = feedback_room_df[['daily_studentsIrritated_timestamp', 'daily_studentsIrritated_value','roomName']].dropna()
daily_studentsMotivated_df =feedback_room_df[['daily_studentsMotivated_timestamp', 'daily_studentsMotivated_value','roomName']].dropna()

hourly_airQuality_df = feedback_room_df[['hourly_airQuality_timestamp', 'hourly_airQuality_value','roomName']].dropna()
hourly_humidity_df = feedback_room_df[['hourly_humidity_timestamp', 'hourly_humidity_value','roomName']].dropna()
hourly_indoorClimate_df = feedback_room_df[['hourly_indoorClimate_timestamp', 'hourly_indoorClimate_value','roomName']].dropna()
hourly_temperature_df = feedback_room_df[['hourly_temperature_timestamp', 'hourly_temperature_value','roomName']].dropna()

## Plotting

In [102]:
x = daily_airDraft_df['daily_airDraft_value']

In [None]:
#still need to choses appropriate timestamps to pllot with for the below

In [105]:
globals()[daily_airDraft_df['roomName'].to_list()[1]+'_temp_df'].head()

Unnamed: 0,timestamp,value,reduced
0,2021-11-17 15:43:24,23.619999,Reduced
1,2021-11-17 22:28:24,23.199999,Reduced
2,2021-11-18 05:28:24,22.779999,Reduced
3,2021-11-18 07:28:24,23.299999,Reduced
4,2021-11-18 14:58:36,23.559999,Reduced


In [103]:
y = globals()[daily_airDraft_df['roomName'].to_list()[1]+'_temp_df']['value']