In [1]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow as tf

mpl.rcParams['figure.figsize'] = (10,8)
mpl.rcParams['axes.grid']= False



In [2]:
df = pd.read_csv("Travel_scrape.csv")

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8051 entries, 0 to 8050
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Hotel          8051 non-null   object 
 1   Room Type      8051 non-null   object 
 2   Rate           8051 non-null   object 
 3   Check-in Date  8051 non-null   object 
 4   Search Date    8051 non-null   object 
 5   Search Time    8051 non-null   object 
 6   Unnamed: 6     0 non-null      float64
dtypes: float64(1), object(6)
memory usage: 440.4+ KB


In [4]:
df = df.drop('Unnamed: 6', axis=1)
# Change Data type
df['Check-in Date'] = pd.to_datetime(df['Check-in Date'])
df['Search Date'] = pd.to_datetime(df['Search Date'])

df['Date_gap'] = df['Check-in Date'] - df['Search Date']
df['Date_gap'] = df['Date_gap'].astype(str)
df["Date_gap"]= df["Date_gap"].replace( r"days","", regex=True)

# Remove $ mark in Rate Column
df['Rate'] = df['Rate'].apply(lambda x: x[3:])
df.head()

Unnamed: 0,Hotel,Room Type,Rate,Check-in Date,Search Date,Search Time,Date_gap
0,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,0:00:00,0
1,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,1:00:00,0
2,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,2:00:00,0
3,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,3:00:00,0
4,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,4:00:00,0


In [5]:
df['Search Time'] = pd.to_datetime(df['Search Time'], infer_datetime_format=True)

In [6]:
df.head()

Unnamed: 0,Hotel,Room Type,Rate,Check-in Date,Search Date,Search Time,Date_gap
0,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,2022-09-05 00:00:00,0
1,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,2022-09-05 01:00:00,0
2,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,2022-09-05 02:00:00,0
3,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,2022-09-05 03:00:00,0
4,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,2022-09-05 04:00:00,0


In [7]:
df1 = df.set_index(['Search Time'])
df1= df1.reset_index()
df1.head()

Unnamed: 0,Search Time,Hotel,Room Type,Rate,Check-in Date,Search Date,Date_gap
0,2022-09-05 00:00:00,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,0
1,2022-09-05 01:00:00,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,0
2,2022-09-05 02:00:00,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,0
3,2022-09-05 03:00:00,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,0
4,2022-09-05 04:00:00,2100 SqFt Penthouse Suite W/ Strip Views! POOL...,Penthouse Apartment,254,2022-08-03,2022-08-03,0


In [8]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8051 entries, 0 to 8050
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Search Time    8051 non-null   datetime64[ns]
 1   Hotel          8051 non-null   object        
 2   Room Type      8051 non-null   object        
 3   Rate           8051 non-null   object        
 4   Check-in Date  8051 non-null   datetime64[ns]
 5   Search Date    8051 non-null   datetime64[ns]
 6   Date_gap       8051 non-null   object        
dtypes: datetime64[ns](3), object(4)
memory usage: 440.4+ KB


In [9]:
df2 = df1[['Search Time','Rate']]
#df2= df2.reset_index()
df2.head()

Unnamed: 0,Search Time,Rate
0,2022-09-05 00:00:00,254
1,2022-09-05 01:00:00,254
2,2022-09-05 02:00:00,254
3,2022-09-05 03:00:00,254
4,2022-09-05 04:00:00,254


In [10]:
df2['Search Time'] = pd.to_numeric(pd.to_datetime(df2['Search Time']))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Search Time'] = pd.to_numeric(pd.to_datetime(df2['Search Time']))


In [11]:
df2['Rate'] = pd.to_numeric(df2['Rate'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Rate'] = pd.to_numeric(df2['Rate'])


In [12]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8051 entries, 0 to 8050
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Search Time  8051 non-null   int64
 1   Rate         8051 non-null   int64
dtypes: int64(2)
memory usage: 125.9 KB


In [13]:
# from statsmodels.tsa.seasonal import seasonal_decompose
# decompose_data = seasonal_decompose(df1, model = "additive")
# decompose_data.plot()

In [14]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df2_scaled = scaler.fit_transform(df2)

In [15]:
from sklearn.model_selection import train_test_split
train , test = train_test_split(df2_scaled, test_size= 0.10, shuffle=False)

### This is MultiVarient data , "n_features" > 1

In [16]:
from keras_preprocessing.sequence import TimeseriesGenerator

n_input = 6

# This is MultiVarient data , "n_features" > 1
n_features = 1

generatorTrain = TimeseriesGenerator(train, train, length=n_input, batch_size=1)
generatorTest = TimeseriesGenerator(test, test, length=n_input, batch_size=1)

In [17]:
batch_0 = generatorTrain[0]
x, y = batch_0

print(x, y)

[[[0.         0.30148048]
  [0.04347826 0.30148048]
  [0.08695652 0.30148048]
  [0.13043478 0.30148048]
  [0.17391304 0.30148048]
  [0.2173913  0.30148048]]] [[0.26086957 0.30148048]]


In [8]:
#df('Rate')[['Hotel','Room Type','Date_gap']].plot(subplots=True)