# Deep Learning (RNN-LSTM):

In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM
import datetime as datetime
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

## Prepare the data:

In [2]:
train = pd.read_csv('cleanbooksdata.csv')

In [3]:
# Bleibt: rating, num_ratings, price, type, das Jahr
train = train.drop(['title', 'currency', 'author'], axis=1)
train = train.drop(train.columns[0], axis=1)

In [4]:
#Für die Vorhersage wird 0 und 1 für die Werte gesetzt: Taschenbuch = 0, Gebundenes Buch = 1 
train['type'] = train['type'].str.replace('Taschenbuch', '0')
train['type'] = train['type'].str.replace('Gebundenes Buch', '1')
train['booktype'] = train['type'].astype(object).astype(int)
train = train.drop('type', axis=1)
display(train)

Unnamed: 0,rating,num_ratings,price,date,booktype
0,4.7,67538.0,20.41,10. Dezember 2018,0
1,4.5,8627.0,11.49,5. Februar 2013,1
2,4.5,99055.0,12.44,13. September 2016,0
3,4.6,33078.0,11.49,10. Mai 2012,0
4,4.6,22039.0,5.69,23. März 2017,0
...,...,...,...,...,...
81,4.4,1478.0,10.54,15. April 2019,0
82,4.3,6049.0,1.98,29. November 2004,0
83,4.7,24188.0,10.24,4. Januar 2018,0
84,4.6,37461.0,7.77,22. Mai 2012,0


In [5]:
# Define a list of month names in German
month_names = ['Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 
               'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember']

# Define a function to convert German date string to datetime object
def german_date_to_datetime(date_string):
    # Split the date string into day, month, and year
        day, month_name, year = date_string.split()
    # Remove any trailing dot from the day component
        day = day.rstrip('.')
    # Convert the month name to its corresponding month number
        month = month_names.index(month_name) + 1
    # Create a datetime object from the day, month, and year
        return datetime.datetime(int(year), month, int(day))
        

# Apply the function to the "datum" column of the DataFrame
train['date'] = train['date'].apply(german_date_to_datetime)

In [6]:
train['date'] = train['date'].dt.year

In [7]:
display(train)

Unnamed: 0,rating,num_ratings,price,date,booktype
0,4.7,67538.0,20.41,2018,0
1,4.5,8627.0,11.49,2013,1
2,4.5,99055.0,12.44,2016,0
3,4.6,33078.0,11.49,2012,0
4,4.6,22039.0,5.69,2017,0
...,...,...,...,...,...
81,4.4,1478.0,10.54,2019,0
82,4.3,6049.0,1.98,2004,0
83,4.7,24188.0,10.24,2018,0
84,4.6,37461.0,7.77,2012,0


In [8]:
train['age'] = 2023 - (train['date'])
train = train.drop(['date'], axis=1)

In [9]:
# Print the first 5 rows of the DataFrame to confirm the conversion
print(train.head())

   rating  num_ratings  price  booktype  age
0     4.7      67538.0  20.41         0    5
1     4.5       8627.0  11.49         1   10
2     4.5      99055.0  12.44         0    7
3     4.6      33078.0  11.49         0   11
4     4.6      22039.0   5.69         0    6


In [10]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86 entries, 0 to 85
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   rating       86 non-null     float64
 1   num_ratings  86 non-null     float64
 2   price        86 non-null     float64
 3   booktype     86 non-null     int32  
 4   age          86 non-null     int64  
dtypes: float64(3), int32(1), int64(1)
memory usage: 3.1 KB


## Extract the input and output variables from the DataFrame

In [11]:
# Extract the input daten:
X = train[['rating', 'num_ratings', 'booktype', 'age']].values

In [12]:
# Extract the output daten:
y = train['price'].values

## Normalisation

In [13]:
# Daten normalisieren

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

print("The data points have been standardized.")

The data points have been standardized.


## Split the dataset into train and test data

In [31]:
#Aufteilen:
train_size = int(len(train)*0.5)
train_X, train_y = X[:train_size], y[train_size:]
test_X, test_y = X[:train_size], y[train_size:]

train_X.shape

(43, 4)

In [32]:
train_y

array([ 8.54, 14.83,  6.44,  9.99,  9.57, 17.62,  9.38, 47.13,  9.09,
       25.  , 21.06,  9.47,  9.09,  7.18,  8.71,  8.52, 11.3 ,  9.3 ,
       19.46,  7.08,  6.69,  9.47, 10.54, 19.99, 10.19, 22.62,  0.99,
       11.49, 18.15, 11.3 ,  5.49, 10.31, 11.45,  6.89,  8.61,  9.  ,
        9.31, 11.49, 10.54,  1.98, 10.24,  7.77, 13.4 ])

In [33]:
train_X

array([[0.85714286, 0.15533067, 0.        , 0.06666667],
       [0.57142857, 0.01915773, 1.        , 0.23333333],
       [0.57142857, 0.2281823 , 0.        , 0.13333333],
       [0.71428571, 0.07567629, 0.        , 0.26666667],
       [0.71428571, 0.05015961, 0.        , 0.1       ],
       [0.85714286, 0.00760022, 0.        , 0.13333333],
       [0.57142857, 0.05273462, 0.        , 0.13333333],
       [0.71428571, 0.21993948, 0.        , 0.33333333],
       [0.85714286, 0.28572948, 1.        , 0.06666667],
       [0.71428571, 0.05399208, 0.        , 0.46666667],
       [0.85714286, 0.00490501, 0.        , 0.1       ],
       [0.57142857, 0.0397486 , 0.        , 0.6       ],
       [0.71428571, 0.2297495 , 0.        , 0.13333333],
       [0.85714286, 0.01807364, 1.        , 0.06666667],
       [0.42857143, 0.05018735, 0.        , 0.9       ],
       [0.42857143, 0.0439278 , 0.        , 0.03333333],
       [0.57142857, 0.02929367, 1.        , 0.1       ],
       [1.        , 0.0476655 ,

## LSTM-Model creation

In [34]:
# Erstellen des LSTM-Models:

model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], 1)))
model.add(Dense(1))

In [35]:
# Kompilieren des Models
model.compile(loss='mean_squared_error', optimizer='adam')

In [36]:
# Trainieren des Models

model.fit(train_X.reshape((train_X.shape[0], train_X.shape[1], 1)), train_y, epochs=50, batch_size=10, verbose=0)

<keras.callbacks.History at 0x1c5399c9f40>

In [37]:
# Evaluieren des Models besierend auf den Testdaten
test_loss = model.evaluate(test_X, test_y)



In [38]:
# Verwendung von dem, um Vorhersagen zu erstellen
#predictions = model.predict(test_X)

In [39]:
# Vorhersage auf Testdaten

X_test_norm = scaler.transform(test_X)
y_pred = model.predict(X_test_norm.reshape((X_test_norm.shape[0], X_test_norm.shape[1], 1)))




In [40]:
# Bewertung des Modells

from sklearn.metrics import mean_squared_error
mse = mean_squared_error(test_y, y_pred)

print('MSE: %.3f' % mse)

MSE: 232.715
