In [1]:
data.raw = read.csv("final_cleaned_data.csv")

# These values should be treated as constants
NROW = nrow(data.raw)
NCOL = ncol(data.raw)
YEARS = unique(data.raw$Year)

# Oscillations to be subtracted
seasonal.annual = spectral::filter.fft(data.raw$MaxTemp, x = NULL, fc = 0.002725, BW = 0.00105, n = 1)
seasonal.biannual = spectral::filter.fft(data.raw$MaxTemp, x = NULL, fc = 0.00545, BW = 0.0002, n = 1)
# seasonal.biannual = 0

# This is for recovering original data
imaginary_residuals = Im(data.raw$MaxTemp - seasonal.annual - seasonal.biannual)

# This is for further processing
data.raw.filtered = data.frame(data.raw)
data.raw.filtered$MaxTemp = Re(data.raw$MaxTemp - seasonal.annual - seasonal.biannual)

data.raw.filtered.normalized = data.frame(data.raw.filtered)

# Find average and variance of each day of the year
grouped.daily.var = aggregate(MaxTemp ~ Month*Day, data.raw.filtered.normalized, FUN = var)
grouped.daily.var = grouped.daily.var[order(grouped.daily.var$Month, grouped.daily.var$Day), ]
grouped.daily.avg = aggregate(MaxTemp ~ Month*Day, data.raw.filtered.normalized, FUN = mean)
grouped.daily.avg = grouped.daily.avg[order(grouped.daily.avg$Month, grouped.daily.avg$Day), ]

normalize = function(x, grouped_daily_var, grouped_daily_avg){
    for (row in c(1: nrow(x))){
        month = x[row, 2]
        day = x[row, 3]
        
        variance = grouped_daily_var[ which(grouped_daily_var$Month == month & grouped_daily_var$Day == day), ]$MaxTemp
        mean = grouped_daily_avg[ which(grouped_daily_avg$Month == month & grouped_daily_avg$Day == day), ]$MaxTemp
        
        x[row, 4] = (x[row, 4] - mean )/ sqrt(variance)
    }
    
    return(x)
}

data.raw.filtered.normalized = normalize(data.raw.filtered.normalized, grouped.daily.var, grouped.daily.avg)

ts = ts(data.raw.filtered.normalized$MaxTemp, frequency = 1)

library(forecast)

ar2 <- Arima(ts, order = c(2, 0, 0))
ar3 <- Arima(ts, order = c(3, 0, 0))
ar4 <- Arima(ts, order = c(4, 0, 0)) 
ar1ma1 <- Arima(ts, order = c(1, 0, 1))
ar2ma1 <- Arima(ts, order = c(2, 0, 1))
ar3ma1 <- Arima(ts, order = c(3, 0, 1))

In [2]:
data.bad.segment1 = read.csv("final_bad_data_segment1.csv")
data.bad.segment2 = read.csv("final_bad_data_segment2.csv")

In [3]:
head(data.raw)

Unnamed: 0,Year,Month,Day,MaxTemp
4746,1867,12,29,23.3
4747,1867,12,30,21.1
4748,1867,12,31,21.8
4749,1868,1,1,19.0
4750,1868,1,2,33.2
4751,1868,1,3,22.7


In [4]:
head(data.bad.segment1)

Unnamed: 0,Year,Month,Day,MaxTemp
57087,2011,4,19,23.2
57679,2012,12,1,27.4
57680,2012,12,2,20.7
57681,2012,12,3,21.4
57682,2012,12,4,18.9
57683,2012,12,5,18.2
