# MEP HS2022 Data Science Fundamentals

### Task 1a) Modeling

Answer A3

In [None]:
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor

# Assuming X and Y are defined elsewhere and properly formatted
regr = SGDRegressor(max_iter=1000, eta0=0.1)
regr.fit(X, Y)

# Use the predict method for generating predictions
Ypred = regr.coef_*X + regr.intercept_

plt.scatter(X, Y, color="blue")
plt.plot(X, Ypred, color="red")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()

### Task 1c) Data organization with SQL - sqlite3

Answer C4

In [None]:
# The start and the end of the code is Python specific, when using sqlite3
# sql_query contains the SQL-Code which is usually directly typed in the DBMS
import pandas as pd # Very useful for large amount of data stored in tables
import sqlite3 # Imports the SQL lite3 library into Python

conn = sqlite3.connect("transactions.db") # Connects the DB tables

sql_query = """SELECT 
                T1.CustomerID, T1.ContactName, T1.Address, T1.City,
                T1.PostalCode, T1.Country, T2.OrderID, T2.OrderDate
                FROM customers AS T1
                LEFT JOIN orders AS T2
                ON T1.CustomerID = T2.CustomerID
                WHERE Country = "France"
                AND T1.City <> "Nantes"
                ORDER BY T1.ContactName"""

data = pd.read_sql(sql_query, con=conn)
data

### Neural Networks, Task 1e) - Answer E2

In [9]:
import tensorflow as tf
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# cancer_y = class, then 0 = malignant tumor, 1 = benign tumor

cancer_X, cancer_y = datasets.load_breast_cancer(return_X_y=True)
cancer_X_train, cancer_X_val, cancer_y_train, cancer_y_val = train_test_split(cancer_X, cancer_y, test_size=0.25, random_state=42)

sc = StandardScaler()
cancer_X_train = sc.fit_transform(cancer_X_train)
cancer_X_val = sc.transform(cancer_X_val)

# input = all 30 features/variables of the dataset and predict the most likely class a new sample belongs. In addition, it will be a fully connecated neural network with 4 hidden layers.
# Answer E1

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation="relu", input_dim = 30),
    tf.keras.layers.Dense(32, activation="relu"), # ReLU = Rectified Linear Unit, activates inputs greater than 0 witout bounds, fast training and reduces likelihood of vanishing gradients.
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(8, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid") # Output layer with Sigmoid Function, the function maps input values to a range between 0-1 - used for binary classification
])

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
              optimizer="sgd", metrics=["accuracy"])

history = model.fit(cancer_X_train, cancer_y_train, epochs=5,
                    validation_data=(cancer_X_val, cancer_y_val))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
