In [2]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt

# Essential Components of Time-Series Analysis



In [179]:
# Load the data

car_sales_data = pd.read_csv(r"D:\time_series_session\sales_cars.csv")
sample_pm_data = pd.read_csv(r'D:/time_series_session/sample_ctl.csv')

In [180]:
# Check for the Columns & Also their Data type
#car_sales_data[:10]
car_sales_data.columns
car_sales_data.dtypes

Month    object
Sales     int64
dtype: object

In [181]:
# Check the Sample Data

car_sales_data.head(5)

Unnamed: 0,Month,Sales
0,2016-01,266
1,2016-02,146
2,2016-03,183
3,2016-04,119
4,2016-05,180


In [182]:
# Check what is the Current Index (By default sequential numbers from 0 - [length-1] )

car_sales_data.index

RangeIndex(start=0, stop=36, step=1)

In [183]:
# Simply remove data present in the rows 1, 2, 3

car_sales_data.drop([1,2,3], axis=0, inplace=True)

In [184]:
# Convert a specific Column to DateTime

#car_sales_data[:10]
#car_sales_data.drop(columns=['level_0'], inplace=True)
car_sales_data['Month'] = pd.to_datetime(car_sales_data['Month'])

In [185]:
# Check for the Column Type again

car_sales_data.dtypes
#car_sales_data.columns

Month    datetime64[ns]
Sales             int64
dtype: object

In [186]:
# Set the Index -- Converted Datetime column

car_sales_data.set_index('Month', inplace=True)

In [187]:
#Check for the Initial 5 rows of Data

car_sales_data.index[:5]

DatetimeIndex(['2016-01-01', '2016-05-01', '2016-06-01', '2016-07-01',
               '2016-08-01'],
              dtype='datetime64[ns]', name='Month', freq=None)

In [188]:
# Datetime Index has options for Month, Week, Day of the Week, Hour of the day

car_sales_data.index.month

Int64Index([ 1,  5,  6,  7,  8,  9, 10, 11, 12,  1,  2,  3,  4,  5,  6,  7,  8,
             9, 10, 11, 12,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
           dtype='int64', name='Month')

In [191]:
# Evaluate the Length of the TIMESTAMP INDEX to check if that is the expected length

#max(car_sales_data.index)
#idx = pd.date_range(min(car_sales_data.index), max(car_sales_data.index), freq='MS')
#len(idx)

len(car_sales_data)

33

In [192]:
# Try filling the missing values (in the TIMESTAMP COLUMN) for Car Sales Data

car_sales_data = car_sales_data.resample('MS').mean()
car_sales_data[:10]

Unnamed: 0_level_0,Sales
Month,Unnamed: 1_level_1
2016-01-01,266.0
2016-02-01,
2016-03-01,
2016-04-01,
2016-05-01,180.0
2016-06-01,169.0
2016-07-01,232.0
2016-08-01,225.0
2016-09-01,193.0
2016-10-01,123.0


In [198]:
# Efficient Methods to fill the Missing Values

#car_sales_data.fillna(0, inplace=True)
#car_sales_data.fillna(method='bfill')[:10]
sample_data = car_sales_data.interpolate(method='linear', order=2 )
#car_sales_data[:10]
sample_data[:10]

Unnamed: 0_level_0,Sales
Month,Unnamed: 1_level_1
2016-01-01,266.0
2016-02-01,0.0
2016-03-01,0.0
2016-04-01,0.0
2016-05-01,180.0
2016-06-01,169.0
2016-07-01,232.0
2016-08-01,225.0
2016-09-01,193.0
2016-10-01,123.0


In [199]:
# Check for Sample PM data

sample_pm_data['pm_collectiontime'] = pd.to_datetime(sample_pm_data['pm_collectiontime'])
sample_pm_data.set_index('pm_collectiontime', inplace=True)

In [200]:
# Checking for the missing length & filling with the approx value

test_idx = pd.date_range(min(sample_pm_data.index), max(sample_pm_data.index), freq='5T')
len_diff = len(test_idx) - len(sample_pm_data)
len_diff

2

In [201]:
# Sampling the Data for '5 min interval'

sample_pm_data = sample_pm_data.resample('5T').mean()

In [202]:
# Get New Features from the Timestamped Index

#sample_pm_data.index.month
sample_pm_data.index.hour
#sample_pm_data.index.day
#sample_pm_data.index.dayofweek
#sample_pm_data.index.dayofyear

Int64Index([17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
            18, 18, 19, 19, 19],
           dtype='int64', name='pm_collectiontime')

In [150]:
# Resampling the Data to 1-min Interval

sample_pm_data_1min = sample_pm_data.resample('1T').mean()

In [152]:
sample_pm_data_1min.interpolate(method='linear', order=2)

Unnamed: 0_level_0,pm_jitteratoz
pm_collectiontime,Unnamed: 1_level_1
2019-01-03 17:25:00+00:00,114.0
2019-01-03 17:26:00+00:00,123.8
2019-01-03 17:27:00+00:00,133.6
2019-01-03 17:28:00+00:00,143.4
2019-01-03 17:29:00+00:00,153.2
2019-01-03 17:30:00+00:00,163.0
2019-01-03 17:31:00+00:00,150.6
2019-01-03 17:32:00+00:00,138.2
2019-01-03 17:33:00+00:00,125.8
2019-01-03 17:34:00+00:00,113.4


In [203]:
# Shifting of Data

shifted_version = sample_pm_data.shift(-1)

In [204]:
shifted_version[:5]


Unnamed: 0_level_0,pm_jitteratoz
pm_collectiontime,Unnamed: 1_level_1
2019-01-03 17:25:00+00:00,163.0
2019-01-03 17:30:00+00:00,101.0
2019-01-03 17:35:00+00:00,153.0
2019-01-03 17:40:00+00:00,
2019-01-03 17:45:00+00:00,


In [155]:
sample_pm_data[:5]

Unnamed: 0_level_0,pm_jitteratoz
pm_collectiontime,Unnamed: 1_level_1
2019-01-03 17:25:00+00:00,114.0
2019-01-03 17:30:00+00:00,163.0
2019-01-03 17:35:00+00:00,101.0
2019-01-03 17:40:00+00:00,153.0
2019-01-03 17:45:00+00:00,
