#  Air Pollution Data in Taiwan

In [51]:
import numpy as np
import scipy.linalg as la
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [52]:
from util import *

# import the station information

In [53]:
station_chi = ["古亭","土城","桃園","新竹",
               "二林","新營","小港","美濃",
               "宜蘭","冬山","花蓮","臺東"]

In [54]:
stations_n = ["Guting","Tucheng","Taoyuan","Hsinchu"]
stations_s = ["Erlin","Xinying","Xiaogang","Meinong"]
stations_e = ["Yilan","Dongshan","Hualien","Taitung"]
stations_w = ["Hsinchu","Erlin","Xinying","Xiaogang"]
station_Eng = stations_n+stations_s+stations_e+stations_w

In [55]:
location = [[25.0261,121.5229],[24.9732,121.4441],[25.0015,121.2971],[24.7907,120.9578],
            [23.9363,120.4068],[23.3161,120.2994],[22.5489,120.3567],[22.8871,120.5620],
            [24.7509,121.7588],[24.6129,121.7534],[23.9909,121.6031],[22.7562,121.1120]]

In [56]:
TW_n = np.zeros((144, 7, len(stations_n)))
TW_s = np.zeros((144, 7, len(stations_s)))
TW_e = np.zeros((144, 7, len(stations_e)))
TW = np.zeros((144, 7, len(stations_n+stations_s+stations_e)))

# import the pollutant data

In [57]:
TW_n = np.zeros((144, 7, len(stations_n)))
TW_s = np.zeros((144, 7, len(stations_s)))
TW_e = np.zeros((144, 7, len(stations_e)))
TW = np.zeros((144, 7, len(stations_n+stations_s+stations_e)))

for i,s in enumerate(stations_n):
    filename = "TW_env/"+s+"_2005_2017_adj.csv"
    data = pd.read_csv(filename, index_col=0)
    TW_n[:,:,i] = data.iloc[:,2:].values

for i,s in enumerate(stations_s):
    filename = "TW_env/"+s+"_2005_2017_adj.csv"
    data = pd.read_csv(filename, index_col=0)
    TW_s[:,:,i] = data.iloc[:,2:].values

for i,s in enumerate(stations_e):
    filename = "TW_env/"+s+"_2005_2017_adj.csv"
    data = pd.read_csv(filename, index_col=0)
    TW_e[:,:,i] = data.iloc[:,2:].values
    
for i,s in enumerate(stations_n+stations_s+stations_e):
    filename = "TW_env/"+s+"_2005_2017_adj.csv"
    data = pd.read_csv(filename, index_col=0)
    TW[:,:,i] = data.iloc[:,2:].values

pollutant = list(data.columns[2:])

In [58]:
def split_ws(X):
    (T, P, S)= X.shape
    W = np.zeros((int(T/2), P, S))
    S = np.zeros((int(T/2), P, S))
    w = 0
    s = 0
    for t in range(T):
        if (t%12<3)|(t%12>8):
            W[w,:,:] = X[t, :,:]
            w += 1
        else:
            S[s,:,:] = X[t, :,:]
            s += 1
    return W, S

In [59]:
T_W, T_S = split_ws(TW)

# TCCA

# N vs S

In [60]:
inputX, inputY = TW_n, TW_s
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
loading, corr = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
for i in range(20):
    loading_tmp, corr_tmp = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
corr_n_s = corr

0.8876959173071776


In [61]:
data = np.r_[loading[1],loading[3]].flatten()
df = pd.DataFrame(data,columns=['coefficient'],index=stations_n+stations_s,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

             Guting Tucheng Taoyuan Hsinchu   Erlin Xinying Xiaogang Meinong
coefficient  -0.051   0.145   0.032   0.988  -0.776  -0.584    0.125  -0.200


\begin{tabular}{lllllllll}
\toprule
{} &  Guting & Tucheng & Taoyuan & Hsinchu &   Erlin & Xinying & Xiaogang & Meinong \\
\midrule
coefficient &  -0.051 &   0.145 &   0.032 &   0.988 &  -0.776 &  -0.584 &    0.125 &  -0.200 \\
\bottomrule
\end{tabular}



In [62]:
data = {'N':loading[0].flatten(),'S':loading[2].flatten()}
df = pd.DataFrame(data,index=pollutant,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

     SO2      CO      O3    PM10     NOx      NO    NO2
N  0.001   0.479  -0.322  -0.132  -0.417   0.604  0.333
S  0.570  -0.254   0.415   0.067   0.198  -0.619  0.111


\begin{tabular}{llllllll}
\toprule
{} &    SO2 &      CO &      O3 &    PM10 &     NOx &      NO &    NO2 \\
\midrule
N &  0.001 &   0.479 &  -0.322 &  -0.132 &  -0.417 &   0.604 &  0.333 \\
S &  0.570 &  -0.254 &   0.415 &   0.067 &   0.198 &  -0.619 &  0.111 \\
\bottomrule
\end{tabular}



In [63]:
# N vs E

In [64]:
inputX, inputY = TW_n, TW_e
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
loading, corr = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
for i in range(20):
    (loading_tmp, corr_tmp) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
corr_n_e = corr

0.9044492163445442


In [65]:
data = np.r_[loading[1],loading[3]].flatten()
df = pd.DataFrame(data,columns=['coefficient'],index=stations_n+stations_e,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

             Guting Tucheng Taoyuan Hsinchu   Yilan Dongshan Hualien Taitung
coefficient  -0.627  -0.671  -0.215  -0.331  -0.895    0.051  -0.441  -0.044


\begin{tabular}{lllllllll}
\toprule
{} &  Guting & Tucheng & Taoyuan & Hsinchu &   Yilan & Dongshan & Hualien & Taitung \\
\midrule
coefficient &  -0.627 &  -0.671 &  -0.215 &  -0.331 &  -0.895 &    0.051 &  -0.441 &  -0.044 \\
\bottomrule
\end{tabular}



In [66]:
data = {'N':loading[0].flatten(),'E':loading[2].flatten()}
df = pd.DataFrame(data,index=pollutant,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

      SO2      CO     O3   PM10     NOx     NO     NO2
N  -0.779   0.567  0.170  0.165  -0.051  0.112  -0.014
E  -0.027  -0.566  0.402  0.409   0.212  0.010  -0.552


\begin{tabular}{llllllll}
\toprule
{} &     SO2 &      CO &     O3 &   PM10 &     NOx &     NO &     NO2 \\
\midrule
N &  -0.779 &   0.567 &  0.170 &  0.165 &  -0.051 &  0.112 &  -0.014 \\
E &  -0.027 &  -0.566 &  0.402 &  0.409 &   0.212 &  0.010 &  -0.552 \\
\bottomrule
\end{tabular}



In [67]:
# S vs E

In [68]:
inputX, inputY = TW_s, TW_e
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
(loading, corr) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
for i in range(20):
    (loading_tmp, corr_tmp) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
corr_s_e = corr

0.8166496879971927


In [69]:
data = np.r_[loading[1],loading[3]].flatten()
df = pd.DataFrame(data,columns=['coefficient'],index=stations_s+stations_e,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

             Erlin Xinying Xiaogang Meinong  Yilan Dongshan Hualien Taitung
coefficient  0.620   0.170    0.567   0.516  0.497   -0.142   0.296   0.803


\begin{tabular}{lllllllll}
\toprule
{} &  Erlin & Xinying & Xiaogang & Meinong &  Yilan & Dongshan & Hualien & Taitung \\
\midrule
coefficient &  0.620 &   0.170 &    0.567 &   0.516 &  0.497 &   -0.142 &   0.296 &   0.803 \\
\bottomrule
\end{tabular}



In [70]:
data = {'S':loading[0].flatten(),'E':loading[2].flatten()}
df = pd.DataFrame(data,index=pollutant,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

      SO2      CO     O3   PM10    NOx      NO     NO2
S  -0.282  -0.715  0.140  0.021  0.355  -0.394  -0.329
E  -0.183  -0.323  0.428  0.077  0.213  -0.033  -0.791


\begin{tabular}{llllllll}
\toprule
{} &     SO2 &      CO &     O3 &   PM10 &    NOx &      NO &     NO2 \\
\midrule
S &  -0.282 &  -0.715 &  0.140 &  0.021 &  0.355 &  -0.394 &  -0.329 \\
E &  -0.183 &  -0.323 &  0.428 &  0.077 &  0.213 &  -0.033 &  -0.791 \\
\bottomrule
\end{tabular}



In [71]:
data = [corr_n_s, corr_s_e, corr_n_e]
df = pd.DataFrame(data,columns=['Correlation'],index=['N vs S','S vs E','N vs E'],dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df)
print('\n')
print(df.T.to_latex())

       Correlation
N vs S       0.888
S vs E       0.817
N vs E       0.904


\begin{tabular}{llll}
\toprule
{} & N vs S & S vs E & N vs E \\
\midrule
Correlation &  0.888 &  0.817 &  0.904 \\
\bottomrule
\end{tabular}



In [72]:
# split data by summer and winter

In [73]:
N_W, N_S = split_ws(TW_n)
S_W, S_S = split_ws(TW_s)
E_W, E_S = split_ws(TW_e)

In [74]:
# N vs S

In [75]:
inputX, inputY = N_W, S_W
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
(loading, corr) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=40)
for i in range(30):
    (loading_tmp, corr_tmp) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=100)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
loading_ww = loading
corr_n_s_ww = corr

0.94026698462649


In [76]:
inputX, inputY = N_S, S_S
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
(loading, corr) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=40)
for i in range(30):
    (loading_tmp, corr_tmp) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=40)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
loading_ss = loading
corr_n_s_ss = corr

0.8891444766710277


In [77]:
data = {"Winter":np.r_[loading_ww[1],loading_ww[3]].flatten(),"Summer":np.r_[loading_ss[1],loading_ss[3]].flatten()}
df = pd.DataFrame(data,index=stations_n+stations_s,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

       Guting Tucheng Taoyuan Hsinchu   Erlin Xinying Xiaogang Meinong
Winter  0.423   0.061   0.078   0.901  -0.925  -0.345    0.161  -0.024
Summer  0.400  -0.277  -0.189  -0.853  -0.834  -0.435   -0.206  -0.270


\begin{tabular}{lllllllll}
\toprule
{} & Guting & Tucheng & Taoyuan & Hsinchu &   Erlin & Xinying & Xiaogang & Meinong \\
\midrule
Winter &  0.423 &   0.061 &   0.078 &   0.901 &  -0.925 &  -0.345 &    0.161 &  -0.024 \\
Summer &  0.400 &  -0.277 &  -0.189 &  -0.853 &  -0.834 &  -0.435 &   -0.206 &  -0.270 \\
\bottomrule
\end{tabular}



In [78]:
data = {'N(Winter)':loading_ww[0].flatten(),'S(Winter)':loading_ww[2].flatten(),'N(Summer)':loading_ss[0].flatten(),'S(Summer)':loading_ss[2].flatten()}
df = pd.DataFrame(data,index=pollutant,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

              SO2      CO      O3    PM10     NOx     NO     NO2
N(Winter)  -0.122  -0.672   0.176   0.084  -0.298  0.083   0.633
S(Winter)  -0.229  -0.805  -0.357  -0.071  -0.291  0.242  -0.153
N(Summer)   0.540   0.344  -0.532  -0.060  -0.263  0.479  -0.066
S(Summer)  -0.001   0.136  -0.360   0.003   0.232  0.740  -0.500


\begin{tabular}{llllllll}
\toprule
{} &     SO2 &      CO &      O3 &    PM10 &     NOx &     NO &     NO2 \\
\midrule
N(Winter) &  -0.122 &  -0.672 &   0.176 &   0.084 &  -0.298 &  0.083 &   0.633 \\
S(Winter) &  -0.229 &  -0.805 &  -0.357 &  -0.071 &  -0.291 &  0.242 &  -0.153 \\
N(Summer) &   0.540 &   0.344 &  -0.532 &  -0.060 &  -0.263 &  0.479 &  -0.066 \\
S(Summer) &  -0.001 &   0.136 &  -0.360 &   0.003 &   0.232 &  0.740 &  -0.500 \\
\bottomrule
\end{tabular}



In [79]:
# N vs E

In [80]:
inputX, inputY = N_W, E_W
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
(loading, corr) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
for i in range(20):
    (loading_tmp, corr_tmp) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
loading_ww = loading
corr_ww = corr
corr_n_e_ww = corr

0.9302649230143509


In [81]:
inputX, inputY = N_S, E_S
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
(loading, corr) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
for i in range(20):
    (loading_tmp, corr_tmp) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
loading_ss = loading
corr_ss = corr
corr_n_e_ss = corr

0.9143104490461496


In [82]:
data = {"Winter":np.r_[loading_ww[1],loading_ww[3]].flatten(),"Summer":np.r_[loading_ss[1],loading_ss[3]].flatten()}
df = pd.DataFrame(data,index=stations_n+stations_e,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

        Guting Tucheng Taoyuan Hsinchu   Yilan Dongshan Hualien Taitung
Winter   0.601   0.719   0.343   0.060  -0.817   -0.279  -0.501  -0.065
Summer  -0.419  -0.225  -0.783  -0.401   0.843    0.195   0.464   0.188


\begin{tabular}{lllllllll}
\toprule
{} &  Guting & Tucheng & Taoyuan & Hsinchu &   Yilan & Dongshan & Hualien & Taitung \\
\midrule
Winter &   0.601 &   0.719 &   0.343 &   0.060 &  -0.817 &   -0.279 &  -0.501 &  -0.065 \\
Summer &  -0.419 &  -0.225 &  -0.783 &  -0.401 &   0.843 &    0.195 &   0.464 &   0.188 \\
\bottomrule
\end{tabular}



In [83]:
data = {'N(Winter)':loading_ww[0].flatten(),'E(Winter)':loading_ww[2].flatten(),'N(Summer)':loading_ss[0].flatten(),'E(Summer)':loading_ss[2].flatten()}
df = pd.DataFrame(data,index=pollutant,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

              SO2      CO      O3    PM10     NOx      NO     NO2
N(Winter)   0.473   0.331  -0.247  -0.077   0.397  -0.431  -0.506
E(Winter)  -0.566  -0.233   0.331   0.148   0.414  -0.479  -0.304
N(Summer)  -0.111   0.113   0.074   0.188   0.450  -0.283  -0.807
E(Summer)  -0.272   0.371  -0.127  -0.209  -0.099  -0.473   0.704


\begin{tabular}{llllllll}
\toprule
{} &     SO2 &      CO &      O3 &    PM10 &     NOx &      NO &     NO2 \\
\midrule
N(Winter) &   0.473 &   0.331 &  -0.247 &  -0.077 &   0.397 &  -0.431 &  -0.506 \\
E(Winter) &  -0.566 &  -0.233 &   0.331 &   0.148 &   0.414 &  -0.479 &  -0.304 \\
N(Summer) &  -0.111 &   0.113 &   0.074 &   0.188 &   0.450 &  -0.283 &  -0.807 \\
E(Summer) &  -0.272 &   0.371 &  -0.127 &  -0.209 &  -0.099 &  -0.473 &   0.704 \\
\bottomrule
\end{tabular}



In [84]:
# S vs E

In [85]:
inputX, inputY = S_W, E_W
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
(loading, corr) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
for i in range(20):
    (loading_tmp, corr_tmp) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
loading_ww = loading
corr_s_e_ww = corr

0.9037652204428891


In [86]:
inputX, inputY = S_S, E_S
inputX, inputY = list(map(lambda x: x-x.mean(axis=0),(inputX, inputY)))
(loading, corr) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
for i in range(20):
    (loading_tmp, corr_tmp) = twoDcca(inputX, inputY, x_regular=0.01, y_regular=0.01, iter_max=30)
    if corr<corr_tmp:
        corr = corr_tmp
        loading = loading_tmp
print(corr)
loading_ss = loading
corr_s_e_ss = corr

0.8214624741700951


In [87]:
data = {"Winter":np.r_[loading_ww[1],loading_ww[3]].flatten(),"Summer":np.r_[loading_ss[1],loading_ss[3]].flatten()}
df = pd.DataFrame(data,index=stations_s+stations_e,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

         Erlin Xinying Xiaogang Meinong  Yilan Dongshan Hualien Taitung
Winter  -0.961   0.060   -0.115  -0.243  0.462    0.147   0.751   0.447
Summer   0.346   0.809    0.291   0.374  0.741   -0.357   0.327   0.465


\begin{tabular}{lllllllll}
\toprule
{} &   Erlin & Xinying & Xiaogang & Meinong &  Yilan & Dongshan & Hualien & Taitung \\
\midrule
Winter &  -0.961 &   0.060 &   -0.115 &  -0.243 &  0.462 &    0.147 &   0.751 &   0.447 \\
Summer &   0.346 &   0.809 &    0.291 &   0.374 &  0.741 &   -0.357 &   0.327 &   0.465 \\
\bottomrule
\end{tabular}



In [88]:
data = {'S(Winter)':loading_ww[0].flatten(),'E(Winter)':loading_ww[2].flatten(),'S(Summer)':loading_ss[0].flatten(),'E(Summer)':loading_ss[2].flatten()}
df = pd.DataFrame(data,index=pollutant,dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df.T)
print('\n')
print(df.T.to_latex())

              SO2      CO      O3    PM10     NOx      NO     NO2
S(Winter)   0.401   0.026  -0.263  -0.019  -0.628   0.392   0.469
E(Winter)  -0.302  -0.524   0.170   0.077   0.282  -0.712  -0.116
S(Summer)   0.619   0.135  -0.173  -0.010  -0.446   0.288   0.535
E(Summer)   0.106  -0.025  -0.265  -0.048  -0.259  -0.622   0.680


\begin{tabular}{llllllll}
\toprule
{} &     SO2 &      CO &      O3 &    PM10 &     NOx &      NO &     NO2 \\
\midrule
S(Winter) &   0.401 &   0.026 &  -0.263 &  -0.019 &  -0.628 &   0.392 &   0.469 \\
E(Winter) &  -0.302 &  -0.524 &   0.170 &   0.077 &   0.282 &  -0.712 &  -0.116 \\
S(Summer) &   0.619 &   0.135 &  -0.173 &  -0.010 &  -0.446 &   0.288 &   0.535 \\
E(Summer) &   0.106 &  -0.025 &  -0.265 &  -0.048 &  -0.259 &  -0.622 &   0.680 \\
\bottomrule
\end{tabular}



In [89]:
# summary

In [90]:
data = {'N vs S':[corr_n_s_ww, corr_n_s_ss], "S vs E":[corr_s_e_ww, corr_s_e_ss], "N vs E":[corr_n_e_ww, corr_n_e_ss]}
df = pd.DataFrame(data,index=['Winter','Summer'],dtype=float)
df = df.replace('\$\s+','', regex=True).astype(float).applymap('{:,.3f}'.format)
print(df)
print('\n')
print(df.to_latex())

       N vs S S vs E N vs E
Winter  0.940  0.904  0.930
Summer  0.889  0.821  0.914


\begin{tabular}{llll}
\toprule
{} & N vs S & S vs E & N vs E \\
\midrule
Winter &  0.940 &  0.904 &  0.930 \\
Summer &  0.889 &  0.821 &  0.914 \\
\bottomrule
\end{tabular}

