<a href="https://colab.research.google.com/github/w2j1y12/pdm17/blob/main/py-pandas/pandas_6_iot_nano33.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Pandas: access to the remote json from MongoDB
- The json file is generated on the fly from the express server of Node.js.
- The data stored in MongoDB are saved in the json file.
- The data are composed of three time series; temperature, humidity, and luminosity.


In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation, rc

## [데이터 로드 1.]
- loading real-time json file from MongoDB via web

- http://chaos.inje.ac.kr:3030/client_33.html

In [33]:
# loading real-time json file from MongoDB via web (CORS, port=3030)
# url="http://chaos.inje.ac.kr:3030/iot"
# df=pd.read_json(url)
# print('Large IoT data was retrieved successfully from MongoDB!')

## [데이터 로드 2.]
- Load the compressed data from github.com

In [None]:
# Load the compressed data from github.com
# https://towardsdatascience.com/4-awesome-ways-of-loading-ml-data-in-google-colab-9a5264c61966
# url_zip = 'https://github.com/Redwoods/Py/raw/master/pdm2020/my-note/py-pandas/data/iot_data_201005_raw.zip'
!wget https://github.com/Redwoods/Py/raw/master/pdm2020/my-note/py-pandas/data/iot_data_201005_raw.zip
!unzip iot_data_201005_raw.zip

--2021-10-17 11:21:19--  https://github.com/Redwoods/Py/raw/master/pdm2020/my-note/py-pandas/data/iot_data_201005_raw.zip
Resolving github.com (github.com)... 192.30.255.113
Connecting to github.com (github.com)|192.30.255.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/Redwoods/Py/master/pdm2020/my-note/py-pandas/data/iot_data_201005_raw.zip [following]
--2021-10-17 11:21:20--  https://raw.githubusercontent.com/Redwoods/Py/master/pdm2020/my-note/py-pandas/data/iot_data_201005_raw.zip
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8389520 (8.0M) [application/zip]
Saving to: ‘iot_data_201005_raw.zip.1’


2021-10-17 11:21:20 (104 MB/s) - ‘iot_data_201005_raw.zip.1’ saved [8389520/8389520]

A

In [None]:
df = pd.read_csv("iot_data_201005_raw.csv", index_col=False)
df.head()
# 시간/ 온도/ 습도/ 조도/ 대기압/ 빛의 3요소 r비율/ g비울/ b비율

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.shape, type(df)

In [None]:
df['date'][:1],df['date'][-1:]
# 처음/ 끝

In [None]:
df.info()
# r,g,b_ratio는 값이 반만 나오는데 밤에는 조도가 0이라서(방에 불이 꺼진 경우) non값으로 나와서 그렇대

In [None]:
df.describe()

In [None]:
# Save iot data in csv file.
# df.to_csv('iot_data_201005_raw.csv')

## Check data
- NaN
- missing value

In [None]:
df.isnull().sum() #/df.shape[0]*100

In [None]:
df.isna().sum()

## Replace NaN with mean: Imputation
- NaN ==>> 33.3 (100/3)

In [None]:
df=df.fillna(33.3)
df.isna().sum()
# NaN에다가 33.3을 넣어줌

In [None]:
# for whole dataframe
# df = df.replace(np.nan, 33.3)
# df.isnull().sum()

## IoT data were cleaned!

In [None]:
df.head()
# 시간이 중요해서 시간을 기준으로 데이터프레임을 재구성

## New DataFrame : iot_data
### Dataframe with date and sensor values(temperature, humidity, luminosity, pressure, r, g, b)

In [None]:
#  select columns
cols = ['date', 'temperature', 'humidity', 'luminosity', 'pressure', 'r_ratio', 'g_ratio', 'b_ratio']

In [None]:
# Make iot data
iot_data = df[cols]

In [None]:
iot_data.shape
# -> shape이 바뀌겠지(11->8)

In [None]:
iot_data.head()  # date와 7 개의 신호를 확인.

In [None]:
iot_data.info()

In [None]:
iot_data.isna().sum()

In [None]:
# Save iot data in csv file.
iot_data.to_csv('iot_data_201005_clean.csv')
# NaN과 Null값이 처리된 깨끗한 값이 저장된 파일
# 다운로드해서 재활용 가능

## EDA

In [None]:
# 20년도 중간고사
# csv 파일로 특정한 데이터를 주고 그동안 배운 파이썬 모듈을 사용해 EDA 수행 후 결과 github에 올렸

In [None]:
# Plot boxplot
# iot_data[['temperature','humidity','luminosity']].plot.box(title='Boxplot of temperature, humidity, and luminosity')
# 온도 습도 조도를 박스플랏으로

# iot_data.loc[:,'temperature':'luminosity'].plot.box(title='Boxplot of temperature, humidity, and luminosity')
# loc은 인덱스 마지막도 포함

iot_data.iloc[:,1:].plot.box(title='Boxplot of all signals')
# 7개의 신호에 대한 박스플랏
# plt.xticks(rotation=60)
plt.show()

### Plot time series of sensor data

In [None]:
iot_data.head()

In [None]:
iot_data.plot(x='date', y='temperature', figsize=(10,5), title='temperature')
plt.xticks(rotation=90)
plt.show()
# 한 달동안의 온도 변화

### Set style of graph
- plt.style.use('fivethirtyeight')
- plt.style.use('ggplot')

In [None]:
plt.style.available

In [None]:
iot_data.plot(x='date', y='temperature', figsize=(10,5), title='temperature')
plt.xticks(rotation=60)
plt.style.use('seaborn-pastel') # 'seaborn-pastel', 'fivethirtyeight'
plt.show()

In [None]:
iot_data.plot(x='date', y='temperature', figsize=(10,5), title='temperature')
plt.xticks(rotation=60)
plt.style.use('ggplot')
plt.show()

In [None]:
# restore default style
plt.style.use('default')

In [None]:
iot_data.plot(x='date', y='humidity', figsize=(10,5), title='humidity')
plt.xticks(rotation=60)
plt.show()

In [None]:
iot_data.plot(x='date', y='luminosity', figsize=(10,5), title='luminosity')
plt.xticks(rotation=60)
plt.show()

In [None]:
iot_data.plot(x='date', y='pressure', figsize=(10,5), title='pressure')
plt.xticks(rotation=90)
plt.show()
# 극도로 낮은 저기압 = 태풍(며칠동안 낮에도 조도가 낮음)

#### 다중그래프 (multiple graphs)

In [31]:
# Plot of ['temperature','humidity']
iot_data.plot(x='date', y=['temperature','humidity'], figsize=(10,5), title='temperature and humidity') 시원 하이방가방가방가방 키보드 귀여우ㅝㅠㅠ

SyntaxError: ignored

In [None]:
# Plot of ['temperature','humidity','luminosity','pressure']
iot_data.plot(x='date', y=['temperature','humidity','luminosity','pressure'], figsize=(10,6), 
             title='temperature, humidity, luminosity and pressure')
plt.show()

#### Plot the mean of sensor data

In [None]:
iot_data[['temperature','humidity','luminosity']].mean()  

In [None]:
iot_data[['temperature','humidity','luminosity']].mean().plot.bar(figsize=(8,5), 
                                                                  title="Mean of temperature, humidity, and luminosity")

In [None]:
plt.style.use('seaborn-pastel')   # fivethirtyeight, ggplot
iot_data[['temperature','humidity','luminosity']].mean().plot.bar(figsize=(6,3), 
                                                                  title="Mean of temperature, humidity, and luminosity")

# Advanced graphics using pandas

### Plot the change of sensor data over various time spans.

> time span: 60 sec, 1 hour, 1 day, 1 week

#### Set date as index of timestamp

In [None]:
iot_data.head()

In [None]:
iot_data.set_index('date',inplace=True)

In [None]:
# Convert date index to Datetime index
iot_data.index = pd.to_datetime(iot_data.index)

In [None]:
iot_data.info() # Range index -> timestamp(Datetime) index 

In [None]:
iot_data.head()

In [None]:
# Estimate the mean of the iot data for every minute
iot_data.resample('60S').mean()

In [None]:
#  Plot mean of the iot data per every minute
iot_data.resample('60S').mean().plot(figsize=(8,5), 
                                     title='Minutely change of temperature, humidity, and luminosity')
plt.legend(bbox_to_anchor=(1.02, 1.0))

In [None]:
#  Plot mean of the iot data per every minute
iot_data[['r_ratio', 'g_ratio', 'b_ratio']].resample('60S').mean().plot(figsize=(10,5), 
                                     title='Minutely change of R, G, B',
                                     color="rgb")

In [None]:
# Estimate the mean of iot data for every hour
iot_data.resample('H').mean()  # mean per each hour

In [None]:
iot_data.resample('H').mean().shape, iot_data.resample('H').mean().info()

In [None]:
#  Plot mean of the iot data per every hour 
iot_data.resample('H').mean().plot(figsize=(10,6), 
                                   title='Hourly change of temperature, humidity, and luminosity') #, 
                                #    ylim=[0,500])

In [None]:
#  Plot mean of the iot data per every minute
iot_data[['r_ratio', 'g_ratio', 'b_ratio']].resample('H').mean().plot(figsize=(10,6), 
                                    color = ['red', 'green', 'blue'],
                                    title='Hourly change of R, G, B')

## [도전하기]  하루 24 시간 동안의 R,G,B 변화 그래프를 그려보시오.

In [None]:
# Estimate the mean of iot data for every day
iot_data.resample('D').mean()

In [None]:
iot_data.resample('D').mean().shape, iot_data.resample('D').mean().info()

In [None]:
iot_data.resample('D').mean().head()

In [None]:
iot_data_day=iot_data.resample('D').mean()
iot_data_day.head()

In [None]:
iot_data_day.info()

In [None]:
#  Plot mean of the iot data per every day
iot_data.resample('D').mean().plot(kind='bar', #marker='o', ms=6, figsize=(12,6), 
                                   title='Daily change of temperature, humidity, and luminosity')

In [None]:
#  Plot mean of the iot data per every minute
iot_data[['r_ratio', 'g_ratio', 'b_ratio']].resample('D').mean().plot(figsize=(10,6), 
                                    color = ['r','g','b'], #marker='o', ms=6, 
                                    title='Dayly change of R, G, B')

## 위의 그래프를 파이 그래프로 그려보시오.

In [None]:
#  Plot mean of the iot data per every day
# df.plot.pie(subplots=True, figsize=(6, 3))
iot_rgb = iot_data[['r_ratio', 'g_ratio', 'b_ratio']].resample('D').mean()
iot_rgb.head()


In [None]:
iot_rgb.iloc[0],iot_rgb.iloc[1]

In [None]:
iot_rgb.index, len(iot_rgb.index)

In [None]:
# Convert timestamp to date string
iot_rgb.index[0].strftime('%Y-%m-%d')

In [None]:
iot_rgb.iloc[0].plot.pie(subplots=True, figsize=(5,3), 
                         colors = ['red', 'green', 'blue'], labeldistance=None, 
                         autopct='%.2f', 
                         title='Daily change of R, G, B')
plt.ylabel(iot_rgb.index[0].strftime('%Y-%m-%d'))
plt.show()

## Weekly graph

In [None]:
# Estimate the mean of iot data for every week
iot_data.resample('W').mean()

In [None]:
#  Plot mean of the iot data per every week
iot_data.resample('W').mean().plot(kind='bar', #marker='o', ms=10, 
                                   figsize=(12,6), 
                                   title='Weekly change of temperature, humidity, and luminosity, pressure, R, G , B')

In [None]:
#  Plot mean of the iot data per every week
iot_data['temperature'].resample('W').mean().plot(kind='bar', #marker='o', ms=10, 
                                   figsize=(12,6), 
                                   title='Weekly change of temperature')

### 다중 파이 그래프
- https://medium.com/@kvnamipara/a-better-visualisation-of-pie-charts-by-matplotlib-935b7667d77f

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=iot_rgb.index.size, figsize=(20,5))

fig.subplots_adjust(hspace=0.5, wspace=0.05)

for row in range(iot_rgb.index.size):
    fig.add_subplot(axs[row] )
    plt.pie(iot_rgb.iloc[row],
            colors = ['red', 'green', 'blue'], labeldistance=None, 
                         autopct='%.2f')  #, labels=df.columns)
    plt.ylabel(iot_rgb.index[row].strftime('%Y-%m-%d'))
    # plt.axis('off')

In [None]:
for row in ax:
    for col in row:
        col.plot(x, y)

In [None]:
fig, axs = plt.subplots(nrows=(iot_rgb.index.size-1)//7+1, ncols=7, figsize=(16,16))
# axs.set_axis_off()

fig.subplots_adjust(hspace=0.5, wspace=0.05)
idx=0
for row in axs:
    # fig.add_subplot(axs[row] )
    # fig.add_subplot(5, 7, row+1)
    # ax = axs[row]
    for col in row:
        col.pie(iot_rgb.iloc[idx],
            colors = ['red', 'green', 'blue'], labeldistance=None, 
                         autopct='%.2f')  #, labels=df.columns)
        
        col.set_ylabel(iot_rgb.index[idx].strftime('%Y-%m-%d'),fontsize=12)
        idx = idx + 1

        if idx == iot_rgb.index.size:
            idx = iot_rgb.index.size-1
        
    # plt.axis('off')
    # plt.axis('off')
    # plt.grid(False)
plt.show()

In [None]:
fig, axs = plt.subplots(nrows=(iot_rgb.index.size-1)//7+1, ncols=7, figsize=(16,16))
# axs.set_axis_off()

fig.subplots_adjust(hspace=0.5, wspace=0.05)

for row in range(iot_rgb.index.size):
    # fig.add_subplot(axs[row] )
    fig.add_subplot(5, 7, row+1)
    ax = axs[row]
    plt.pie(iot_rgb.iloc[row],
            colors = ['red', 'green', 'blue'], labeldistance=None, 
                         autopct='%.2f')  #, labels=df.columns)
    plt.ylabel(iot_rgb.index[row].strftime('%Y-%m-%d'),fontsize=14)
    # plt.axis('off')
    # plt.axis('off')
    plt.grid(False)
plt.show()

### animate pie chart
- https://towardsdatascience.com/learn-how-to-create-animated-graphs-in-python-fce780421afe
- https://pinkwink.kr/1090

In [None]:
iot_rgb.columns

In [None]:
fig,ax = plt.subplots()
explode=[0.01,0.01,0.01] #pop out each slice from the pie
def getmepie(i):
    iv = min(i, len(iot_rgb.index)-1)
    # def absolute_value(val): #turn % back to a number
    #     a  = np.round(val/100.*df1.head(i).max().sum(), 0)
    #     return int(a)
    ax.clear()
    plot = iot_rgb.iloc[iv].plot.pie(y=iot_rgb.columns, autopct='%.2f', label='',explode = explode, colors=['r','g','b'], shadow = True)
    plot.set_title('Date\n' + str(iot_rgb.index[iv].strftime('%Y-%m-%d')), fontsize=12)

# import matplotlib.animation as ani
animator = animation.FuncAnimation(fig, getmepie, interval = 200)
# plt.show()
rc('animation', html='html5')
animator

## 와, Great!