Prophet Model

In [None]:
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt


In [None]:
df = pd.read_csv("/kaggle/input/tourismdataset/GoaTourism.csv")
df.head()

In [None]:


# Rename columns to match Prophet's requirements
df = df.rename(columns={"Year": "year", "Month": "month", "No. of Tourists": "y"})

# Combine year and month into a single date column
df['ds'] = pd.to_datetime(df[['year', 'month']].assign(day=1))

# Add one month and subtract one day to get the last day of the month
df['ds'] = df['ds'] + pd.DateOffset(months=1) - pd.DateOffset(days=1)

# Set "ds" as the index
df.set_index('ds', inplace=True)

# Reset the index
df = df.reset_index()

df['cap'] = 300000

df1 = df[df['year']<2020]

In [None]:
train = df1
test = df[ df['year'] == 2022 ]


In [None]:
model = Prophet(growth='logistic')
model.fit(train)

In [None]:
future_dates= pd.DataFrame({'ds':pd.date_range(start='2010-01-31', end='2027-12-31', freq='M')})
future_dates['cap'] = 300000
print(future_dates)

In [None]:
forecast = model.predict(future_dates)
df2 = forecast.head(12)
df2

In [None]:
model.plot(forecast, xlabel = 'Date', ylabel = 'No. of Tourists')
plt.show()

In [None]:
fig2 = model.plot_components(forecast)
plt.show()

In [None]:
actual = df['y'].values
predicted = forecast['yhat'].head(df.shape[0]).values
actual.shape, predicted.shape

In [None]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(actual, predicted)
print("Mean Squared Error:", mse)

def calculate_mape(actual, predicted):
    return (abs(actual - predicted) / actual).mean() * 100

mape = calculate_mape(actual, predicted)

accuracy = 100 - mape
print("Percentage Accuracy:", accuracy)

In [None]:
from sklearn.metrics import mean_absolute_error

actual = test['y'].values

forecasted = forecast['yhat'][:len(test)].values
for f, a in zip(forecasted, actual):
    print(f"Forecasted: {f}, Actual: {a}")

mae = mean_absolute_error(actual, forecasted)
print("Mean Absolute Error (MAE):", mae)

def calculate_mape(actual, predicted):
    return (abs(actual - predicted) / actual).mean() * 100

mape = calculate_mape(actual, forecasted)

accuracy = 100 - mape
print("Percentage Accuracy:", accuracy)

VAR Model


In [None]:
import numpy as np
import math
from sklearn.metrics import mean_absolute_error, mean_squared_error
import statsmodels.api as sm
from statsmodels.tsa.api import VAR

In [None]:
data=pd.read_csv('/kaggle/input/tourismdataset/GoaTourism.csv')

In [None]:
data['Date'] = pd.to_datetime(data[['Year', 'Month']].assign(day=1))
data = data.set_index('Date')

In [None]:
model = VAR(data)

In [None]:
k_ar = 5  # adjust this based on the data and analysis

# Fit the VAR model
model_fitted = model.fit(k_ar)

# Predict the number of tourists for the next month (June)
forecast = model_fitted.forecast(model_fitted.endog, steps=1)

In [None]:
forecasted_tourists = forecast[:,data.columns.get_loc('No. of Tourists')][0]

In [None]:
train_data = data[:-12]  # 9 years and 11 months of data for training
validation_data = data[-12:]  # 1 year of data for validation

# Fit a VAR model
model = VAR(train_data)
k_ar = 5 
model_fitted = model.fit()

# Forecast one step ahead for the validation set
forecast = model_fitted.forecast(train_data.values[-12:], steps=12)  # Forecast for 12 months
predicted_tourists = forecast[:, 4]  # Index 4 corresponds to the "No. of Tourists" variable

# Calculate accuracy metrics
actual_tourists = validation_data['No. of Tourists'].values
mae = mean_absolute_error(actual_tourists, predicted_tourists)
mse = mean_squared_error(actual_tourists, predicted_tourists)
rmse = np.sqrt(mse)

print(f'Predicted Tourists for Next 12 Months: {predicted_tourists}')
print(f'Actual Tourists for the Next 12 Months: {actual_tourists}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')

In [None]:
# Calculate Mean Absolute Percentage Error (MAPE)
def calculate_mape(actual, predicted):
    return (abs(actual_tourists - predicted_tourists) / actual).mean() * 100

mape = calculate_mape(actual_tourists, predicted_tourists)

# Calculate the percentage accuracy
accuracy = 100 - mape
print("Percentage Accuracy:", accuracy)

In [None]:
train_data = data[:-12]  # 9 years and 11 months of data for training
validation_data = data[-1:]  # Data for the next month (1 data point) for validation

# Fit a VAR model
model = VAR(train_data)
model_fitted = model.fit()

# Forecast one step ahead for the validation set (1 month)
forecast = model_fitted.forecast(train_data.values[-12:], steps=1)  # Forecast for 1 month
predicted_tourists = forecast[0][4]  # Index 4 corresponds to the "No. of Tourists" variable

# Calculate accuracy metrics
actual_tourists = validation_data['No. of Tourists'].values
mae = mean_absolute_error(actual_tourists, [predicted_tourists])
mse = mean_squared_error(actual_tourists, [predicted_tourists])
rmse = np.sqrt(mse)

print(f'Predicted Tourists for Next Month: {predicted_tourists}')
print(f'Actual Tourists for the Next Month: {actual_tourists[0]}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')

In [None]:
# Calculate Mean Absolute Percentage Error (MAPE)
def calculate_mape(actual, predicted):
    return (abs(actual_tourists - predicted_tourists) / actual).mean() * 100

mape = calculate_mape(actual_tourists, predicted_tourists)

# Calculate the percentage accuracy
accuracy = 100 - mape
print("Percentage Accuracy:", accuracy)

RNN Model

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [None]:
df1 = pd.read_csv("/kaggle/input/tourismdataset/GoaTourism.csv")
df2 = df1.drop(['Relative','Percentage','GDP of goa (k )'],axis = 1)
scaler = StandardScaler()
column_to_standardize = df2['No. of Tourists']
standardized_column = scaler.fit_transform(column_to_standardize.values.reshape(-1, 1))
df2['No. of Tourists2'] = standardized_column*10
df3 = df2.copy()
df3 = df3.drop(['No. of Tourists'], axis = 1)
df3

In [None]:
def datapoint(n, df):
    t1 = []
    t2 = []
    for i in range(len(df)-n):
        r = df3.iloc[i:i+n+1,1:].values
        t1.append(r[0:n])
        t2.append(r[n][-1])
    return np.array(t1), np.array(t2)
X, Y = datapoint(5, df3.iloc[:144])
X = X.astype('float32')
Y = Y.astype('float32')
X.shape, Y.shape, X.dtype, Y.dtype

In [None]:
Xp = torch.tensor(X)
Yp = torch.tensor(Y)
Xp.shape, Yp.shape

In [None]:
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = {'data': self.data[idx], 'label': self.labels[idx]}
        return sample
data = [1, 2, 3, 4, 5]
labels = [0, 1, 0, 1, 0]
custom_dataset = CustomDataset(Xp, Yp.reshape(Yp.shape[0],1))

In [None]:
batch_size = 20  # Define your batch size
shuffle = True  # Set to True for random shuffling of data
data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=shuffle)

In [None]:
for batch in data_loader:
    inputs = batch['data']
    labels = batch['label']
    print("Batch data:", inputs.shape)
    print("Batch labels:", labels.shape)
    print(inputs.dtype)
    print(labels.dtype)
    break

In [None]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, h0):
        out, hn = self.rnn(x, h0)
        out = self.fc(out)
        return out, hn

In [None]:
input_size = 7
hidden_size = 20
output_size = 1
sequence_length = 4
input_data = torch.randn(4, sequence_length, input_size)
rnn = SimpleRNN(input_size, hidden_size, output_size)
h0 = torch.zeros(1, 4, hidden_size)
output, final_hidden_state = rnn(input_data, h0)
print("Input shape:", input_data.shape)
print("Output shape:", output.shape)
print("Final hidden state shape:", final_hidden_state.shape)
print("H0 shape:", h0.shape)

In [None]:
for batch in data_loader:
    inputs = batch['data']
    labels = batch['label']
    print("Batch data:", inputs.shape)
    print("Batch labels:", labels.shape)
    # print(inputs.dtype)
    # print(labels.dtype)
    h0 = torch.zeros(1, inputs.shape[0], 20)
    output, final_hidden_state = rnn(inputs, h0)
    # print(output[:,-1,:])
    criterion = nn.MSELoss()
    loss = criterion(output[:,-1,:], labels)
    print("Loss:", loss.item())
    print(loss)

In [None]:
criterion = nn.MSELoss()  # Use the appropriate loss function for your task
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)
rnn.train()

In [None]:
lossr = []
for epoch in range(3500):
    for i, batch in enumerate(data_loader):
        inputs = batch['data']
        labels = batch['label']
        # Zero the gradients
        optimizer.zero_grad()
        # Forward pass
        h0 = torch.zeros(1, inputs.shape[0], 20)
        output, final_hidden_state = rnn(inputs, h0)
        # Calculate the loss
        loss = criterion(output[:,-1,:], labels)
        # Backpropagation
        loss.backward(retain_graph=True)
        # Update model parameters
        optimizer.step()
        # Print the loss for monitoring
        if epoch % 100 == 0:
          print(f'Epoch [{epoch + 1}/2000], Batch [{i + 1}/{len(data_loader)}], Loss: {loss.item()}')
        lossr.append(loss.item())
print('Training complete')

In [None]:
plt.figure(figsize=(10, 8))  # Set the figure size first

plt.plot(lossr)
plt.xlabel('epoch')
plt.ylabel('Loss')
plt.title('Loss vs epoch')
plt.grid(True)  # Add a grid
plt.show()


In [None]:
Xtest, Ytest = datapoint(5, df3.iloc[144:])
Xtest = Xtest.astype('float32')
Ytest = Ytest.astype('float32')
Xtest.shape, Ytest.shape, Xtest.dtype, Ytest.dtype

In [None]:
rnn.eval()

In [None]:
Xptest = torch.tensor(Xtest)
Yptest = torch.tensor(Ytest)
Xptest.shape, Yptest.shape

In [None]:
custom_datasettest = CustomDataset(Xptest, Yptest.reshape(Yptest.shape[0],1))
batch_size = Xptest.shape[0]  # Define your batch size
shuffle = True  # Set to True for random shuffling of data
data_loadertest = DataLoader(custom_datasettest, batch_size=batch_size, shuffle=shuffle)
actual = []
predcited = []
for batch in data_loadertest:
    inputs = batch['data']
    labels = batch['label']
    actual = labels
    print("Batch data:", inputs.shape)
    print("Batch labels:", labels.shape)
    # print(inputs.dtype)
    # print(labels.dtype)
    h0 = torch.zeros(1, inputs.shape[0], 20)
    output, final_hidden_state = rnn(inputs, h0)
    # print(output[:,-1,:])
    criterion = nn.MSELoss()
    predicted = output[:,-1,:]
    loss = criterion(output[:,-1,:], labels)
    print("Loss:", loss.item())
    print(loss)

In [None]:
# Calculate Mean Absolute Percentage Error (MAPE)
def calculate_mape(actual, predicted):
    return (abs(actual - predicted) / actual).mean() * 100

mape = calculate_mape(actual, predicted)

# Calculate the percentage accuracy
accuracy = 100 - mape
print("Percentage Accuracy:", accuracy)