## Parte 1 - Regressão Linear do zero!

In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
# Seed -> randomização fixa
torch.manual_seed(42)
np.random.seed(42)

In [None]:
# Pontos do eixo x
x = torch.arange(-5, 5, 0.1, dtype=torch.float32)
x

In [None]:
# Definindo uma função linear y(x)
y = 3*x + 5
y

In [None]:
plt.title('Equação y = 3x + 5')
plt.scatter(x, y, c='g')

plt.xlim([0, max(x)])
plt.ylim([0, max(y)])

plt.xlabel('x')
plt.ylabel('y')

plt.show()

## Objetivo

Automaticamente aproximar **y**

y = 3x + 5

y = wx + b

```python
w = 3.0
b = 5.0
```

### Modelo de Regressão Linear


In [None]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.w = torch.randn(1, requires_grad=True)
        self.b = torch.randn(1, requires_grad=True)

        print(f'w inicializado como: {self.w}')
        print(f'b inicializado como: {self.b}')

    def forward(self, x):
        return self.w * x + self.b

In [None]:
model = LinearRegressionModel()

#### O que um modelo não treinado produz?

In [None]:
preds = model(x)
preds

In [None]:
plt.title('Alvo: y = 3x + 5')
plt.scatter(x, y, c='g', label='y=3x + 5 - real')
plt.plot(x, preds.detach().numpy(), c='r', label=f'y = {model.w.item():.2f}x + {model.b.item():.2f} - modelo')
plt.xlim([0, max(x)])
plt.ylim([0, max(y)])
plt.xlabel('x')
plt.ylabel('y')

plt.legend()

plt.show()

### Medindo o Loss

Loss = MSE = Mean Squared Error

$ \text{MSE} = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2 $

In [None]:
def mse_loss(y_i, y_hat):
    return ((y_i - y_hat) ** 2).mean()

### Minimizando o Loss

### Obtendo o vetor gradiente contendo a influência de cada parâmetro no Loss:

$ \nabla J = \begin{bmatrix} \frac{\partial J}{\partial \theta_1} \\ \frac{\partial J}{\partial \theta_2} \\ \vdots \\ \frac{\partial J}{\partial \theta_n} \end{bmatrix} $

##### Mágica do **PyTorch** = AutoGrad!
`https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html`

In [None]:
torch.cuda.is_available()

In [None]:
print(f"Gradiente de w: {model.w.grad}, grad. de b: {model.b.grad}")

In [None]:
loss = mse_loss(preds, y)
loss

In [None]:
### Definindo a taxa de aprendizado
learning_rate = 0.1

### Obtendo os gradientes ---> AutoGrad!
loss.backward(retain_graph=True)

w_grad = model.w.grad
b_grad = model.b.grad

print(f"w: {model.w.item():.2f} - grad: {w_grad} \nb: {model.b.item():.2f} - grad: {b_grad}")

$ {grad(w)} = \frac{\partial J}{\partial w} = [-43.9102] $

$ {grad(b)} = \frac{\partial J}{\partial b} = [-9.4761] $

### Atualizando os parâmetros

$ \theta_{i+1} = \theta_i - \alpha \frac{\partial J(\theta_i)}{\partial \theta_i} $

$ \theta_{i+1} = \theta_i - \alpha \nabla J(\theta_i) $

In [None]:
### Atualizando w e b conforme o gradiente e taxa de aprendizado
new_w = model.w - learning_rate * model.w.grad
new_b = model.b - learning_rate * model.b.grad

print(f'[w] Antes: {model.w.item():.2f} - Depois: {new_w.item():.2f}')
print(f'[b] Antes: {model.b.item():.2f} - Depois: {new_b.item():.2f}')

In [None]:
### Atualizando os parâmetros
model.w.data = new_w
model.b.data = new_b

### Limpando os gradientes para não se acumularem
model.w.grad.zero_()
model.b.grad.zero_()

print(f'O Modelo y = w*x + b se tornou: y = {model.w.item():.2f} * x + {model.b.item():.2f}')

In [None]:
### Refazendo as previsões
preds_new = model(x)
preds_new

In [None]:
plt.title('Alvo: y = 3x + 5')
plt.scatter(x, y, c='g', label='y=3x + 5 - real')
plt.plot(x, preds_new.detach().numpy(), c='r', label=f'y = {model.w.item():.2f}x + {model.b.item():.2f} - modelo')
plt.xlabel('x')
plt.ylabel('y')

plt.legend()

plt.show()

In [None]:
new_loss = mse_loss(preds_new, y)

new_loss, new_loss / loss

In [None]:
### Como ficaram os gradientes?
new_loss.backward(retain_graph=True)

w_grad = model.w.grad
b_grad = model.b.grad

print(f"Gradiente de w: {w_grad}, grad. de b: {b_grad}")
model.w.grad.zero_()
model.b.grad.zero_()

### Automatizando o treinamento - Criando o Looping

In [None]:
loss_hist = []
w_grad_hist = []
b_grad_hist = []
w_hist = []
b_hist = []

In [None]:
model = LinearRegressionModel()

In [None]:
learning_rate = 0.1
epochs = 50

for i in tqdm(range(epochs)):

    if i == 0:
        print(f'[INFO] Epoch: {i} - w: {model.w.item():.4f} - b: {model.b.item():.4f}')

    y_hat = model(x)

    loss = mse_loss(y_hat, y)
    loss_hist.append(loss.item())

    ### AutoGrad ---> obtendo o gradiente
    loss.backward()

    w_grad = model.w.grad.data
    b_grad = model.b.grad.data

    w_grad_hist.append(w_grad.item())
    b_grad_hist.append(b_grad.item())

    ### Atualizando os parâmetros
    model.w.data = model.w.data - learning_rate * w_grad
    model.b.data = model.b.data - learning_rate * b_grad

    w_hist.append(model.w.data.item())
    b_hist.append(model.b.data.item())

    ### Zerando para não acumular gradientes
    model.w.grad.data.zero_()
    model.b.grad.data.zero_()

    if i % 10 == 0:
        print(f'[INFO] Epoch: {i+1} - Loss: {loss.item()} - w: {model.w.item():.4f} - b: {model.b.item():.4f}')

print(f'\n[INFO] Parâmetros aprendidos: w = {model.w.item():.4f}, b = {model.b.item():.4f}')

### Como o Loss evoluiu?

In [None]:
plt.title(f'Loss vs epochs\ny = {model.w.item():.4f} * x + {model.b.item():.4f}')
plt.ylabel('Loss - MSE')
plt.xlabel('Epochs')
plt.plot(range(len(loss_hist)), loss_hist)

### Como `w` variou?

In [None]:
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.title('W vs Epochs')
plt.ylabel('W')
plt.xlabel('Epochs')
plt.axhline(y=3, color='r', linestyle='--')
plt.plot(range(len(w_hist)), w_hist)

plt.subplot(1, 2, 2)
plt.title('∂J/∂w vs Epochs')
plt.ylabel('∂J/∂w')
plt.xlabel('Epochs')
plt.axhline(y=0, color='r', linestyle='--')
plt.plot(range(len(w_grad_hist)), w_grad_hist)

plt.tight_layout()

### Como `b` variou?

In [None]:
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.title('b vs Epochs')
plt.ylabel('b')
plt.xlabel('Epochs')
plt.axhline(y=5, color='r', linestyle='--')
plt.plot(range(len(b_hist)), b_hist, color='purple')

plt.subplot(1, 2, 2)
plt.title('∂J/∂b vs Epochs')
plt.ylabel('∂J/∂b')
plt.xlabel('Epochs')
plt.axhline(y=0, color='r', linestyle='--')
plt.plot(range(len(b_grad_hist)), b_grad_hist, color='purple')

plt.tight_layout()

## Parte 2 - Aplicando ML em dados reais

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

### I. Prevendo o Índice de Liberdade Econômica de um país

**Sobre o problema**:

O Índice de Liberdade Econômica é utilizado para o estudo de mais de duas décadas de avanço na liberdade econômica e prosperidade ao redor do mundo. O Índice abrange 12 liberdades - desde direitos de propriedade até liberdade financeira - medido em 186 países.

Fonte:
> https://www.kaggle.com/datasets/lewisduncan93/the-economic-freedom-index

#### 1. Importando os dados

In [None]:
df = pd.read_csv('./economic_freedom_index2019_data.csv', encoding='latin-1')

#### 2. EDA - Análise Exploratória de Dados

In [None]:
df.head()

In [None]:
df.shape

In [None]:
### Removendo colunas redundantes
df = df.drop('CountryID', axis=1)
df = df.drop('WEBNAME', axis=1)
df = df.drop('Country', axis=1)

In [None]:
### Problema -> variável categórica está como string
df['Region']

In [None]:
df['Region'].value_counts()

In [None]:
### Convertendo variáveis categóricas em one-hot-encoding
from sklearn.preprocessing import OneHotEncoder
regions = df[['Region']]
encoder = OneHotEncoder()

feature_array = encoder.fit_transform(regions).toarray()
feature_array

In [None]:
# regions_names = encoder.get_feature_name_out(input_features=['Region'])
regions_names = encoder.get_feature_names_out(input_features=['Region'])
regions_names = [name.replace('Region_', '') for name in regions_names]
regions_names


In [None]:
regions_encoded = pd.DataFrame(feature_array, columns=regions_names)
regions_encoded

In [None]:
df = pd.concat([df, regions_encoded], axis=1)
df = df.drop(['Region'], axis=1)

df.head()

In [None]:
def dollar_to_float(value):
    value = str(value)
    value = value.replace('$', '').replace(',', '')

    return float(value.strip())

In [None]:
### Corrigindo colunas problemáticas
df['GDP (Billions, PPP)'] = df['GDP (Billions, PPP)'].apply(lambda x: dollar_to_float(x))
df['GDP per Capita (PPP)'] = df['GDP per Capita (PPP)'].apply(lambda x: dollar_to_float(x))
df['FDI Inflow (Millions)'] = df['FDI Inflow (Millions)'].apply(lambda x: dollar_to_float(x))

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
df

In [None]:
target_features = [
                   '2019 Score', # <---- alvo!
                   'Labor Freedom',
                   'Monetary Freedom',
                   'Trade Freedom',
                   'Investment Freedom ',
                   'Financial Freedom',
                   'GDP (Billions, PPP)',
                   'GDP per Capita (PPP)',
                   'Unemployment (%)',
                   'Inflation (%)',
                   'FDI Inflow (Millions)',
                   'Public Debt (% of GDP)',
                   'Property Rights',
                   'Tax Burden',
                   'Fiscal Health',
                   'Business Freedom'
                ]

In [None]:
### Visualizando os dados
df[target_features].hist(bins=70, figsize=(12, 12))
plt.show()

In [None]:
target_features += ['Americas',
                   'Asia-Pacific',
                   'Europe',
                   'Middle East and North Africa',
                   'Sub-Saharan Africa']

In [None]:
### Buscando correlações
corr_matrix = df[target_features].corr()
corr_matrix

In [None]:
## Impacto de cada feature no Score de 2019
corr_matrix['2019 Score'].sort_values()

> +1 = correlação positiva

> -1 = correlação negativa

> 0 = sem correlação

In [None]:
plt.figure(figsize=(12, 12))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")

plt.xticks(ticks=range(len(target_features)), labels=target_features, rotation=90)
plt.yticks(ticks=range(len(target_features)), labels=target_features, rotation=0)

plt.title('Correlation Matrix')
plt.show()

In [None]:
df_features = df[target_features]

In [None]:
### Preenchendo valores NaN com a média da coluna
for column in df_features.columns:
    df_features[column] = df_features[column].fillna(df_features[column].mean())

In [None]:
### Antes
df.describe()

In [None]:
### Depois
df_features.describe()

In [None]:
df_features.head()

In [None]:
### Plotando a correlação mais positiva
plt.figure(figsize=(7, 7))
plt.scatter(df_features['Property Rights'],
            df_features['2019 Score'],
            color='orange')

plt.title('Direitos de propriedade privada x Liberdade Econômica')
plt.xlabel('Direitos de propriedade privada')
plt.ylabel('Score de 2019')

In [None]:
### Plotando uma correlação positiva fraca
plt.figure(figsize=(7, 7))
plt.scatter(df_features['GDP (Billions, PPP)'],
            df_features['2019 Score'],
            color='green')

plt.title('PIB x Liberdade Econômica')
plt.xlabel('PIB')
plt.xlim(0, 2000)
plt.ylabel('Score de 2019')

In [None]:
### Plotando uma correlação negativa fraca
plt.figure(figsize=(7, 7))
plt.scatter(df_features['Inflation (%)'],
            df_features['2019 Score'],
            color='purple')

plt.title('Inflação x Liberdade Econômica')
plt.xlabel('Inflação')
plt.xlim(0, 60)
plt.ylabel('Score de 2019')

In [None]:
df_features.columns

In [None]:
### Separando a variável-alvo (y)
df_targets = df_features['2019 Score']
df_targets

In [None]:
### Removendo o score das features
df_features = df_features.drop(['2019 Score'], axis=1)
df_features

In [None]:
### Padronização das features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df_features_scaled = scaler.fit_transform(df_features)
df_features_scaled.shape

In [None]:
df_features_scaled

In [None]:
df_targets

In [None]:
scaler.__dict__

In [None]:
### O que o scaler armazenou?
print(f'Amostras - {scaler.n_samples_seen_}\n')
print(f'Menor valor - {scaler.data_min_} - {scaler.data_min_.shape} features\n')
print(f'Maior valor - {scaler.data_max_} - {scaler.data_max_.shape} features')

In [None]:
### Visualizando os dados padronizados
df_features_scaled_plot = pd.DataFrame(df_features_scaled, columns=df_features.columns)
df_features_scaled_plot.hist(bins=70, figsize=(12, 12))
plt.show()

In [None]:
### Separando datasets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df_features_scaled,
                                                   df_targets,
                                                   test_size=0.2,
                                                   random_state=42)

In [None]:
def plot_data_split(train_data = X_train,
                     train_labels = y_train,
                     test_data = X_test,
                     test_labels = y_test
                     ):

    num_samples = min(len(test_data), len(train_data))

    samples = range(num_samples)
    plt.figure(figsize=(10,7))
    plt.scatter(samples, train_labels[:num_samples] , c='b', label='Training data')
    plt.scatter(samples, test_labels[:num_samples], c='g', label='Test data')

    plt.legend()

In [None]:
assert len(X_train) + len(X_test) == len(df_features_scaled), "A divisão não foi realizada corretamente!"

In [None]:
assert len(y_train) + len(y_test) == len(df_features_scaled), "A divisão não foi realizada corretamente!"

In [None]:
X_train.shape, y_train.shape

In [None]:
X_test.shape, y_test.shape

In [None]:
X_train

In [None]:
plot_data_split()

In [None]:
### Treinando modelos
# Regressão Linear
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)

In [None]:
### Coeficientes aprendidos (pesos)
lr.coef_

In [None]:
### b (viés)
lr.intercept_

In [None]:
### Obtendo o Loss de test
from sklearn.metrics import mean_squared_error

y_preds = lr.predict(X_test)
mse_linear_regression = mean_squared_error(y_test, y_preds)
mse_linear_regression

In [None]:
### Visualizando
def plot_predictions(test_labels = y_test,
                     predictions = None,
                     num_samples=38):

    samples = range(num_samples)
    plt.figure(figsize=(10,7))

    plt.xlabel('Amostra aleatória')
    plt.ylabel('Score em 2019')
    plt.title('Comparando as previsões no conjunto de testes')
    plt.scatter(samples, test_labels[:num_samples], c='g', label='Test data')
    plt.scatter(samples, predictions[:num_samples], c='r', label='Predictions')

    plt.legend()

In [None]:
for i in range(10):
    print(f'Predicted: {y_preds[i]} - Real: {y_test.iloc[i]}')

In [None]:
plot_predictions(predictions=y_preds)

### Testando outros modelos

In [None]:
# Decision Tree
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor()
tree_reg.fit(X_train, y_train)

In [None]:
tree_preds = tree_reg.predict(X_test)
mse_tree = mean_squared_error(y_test, tree_preds)
mse_tree

In [None]:
plot_predictions(predictions=tree_preds)

In [None]:
# Random forest
from sklearn.ensemble import RandomForestRegressor
forest_reg = RandomForestRegressor()
forest_reg.fit(X_train, y_train)

In [None]:
forest_preds = forest_reg.predict(X_test)
mse_forest = mean_squared_error(y_test, forest_preds)
mse_forest

In [None]:
plot_predictions(predictions=forest_preds)

In [None]:
forest_preds

##### Regressão Linear venceu!

In [None]:
mse_linear_regression, mse_tree, mse_forest

## Classificação - Regressão Logística

In [None]:
from IPython.display import Image
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [None]:
Image(url='https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Machine+Learning+R/iris-machinelearning.png')

In [None]:
scaler = StandardScaler()

In [None]:
df = load_iris(as_frame=True)

In [None]:
df.data

In [None]:
df.target_names

In [None]:
df.target

In [None]:
iris = load_iris()
X = iris.data
y = iris.target

In [None]:
X.shape, X

In [None]:
y.shape, y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
log_reg = LogisticRegression(random_state=42, max_iter=1000)
log_reg.fit(X_train_scaled, y_train)

In [None]:
log_reg.__dict__

In [None]:
y_pred = log_reg.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
print(f"Acc: {accuracy}")

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(y_test, y_pred)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=iris.target_names)
disp.plot()
plt.show()

In [None]:
log_reg.coef_

In [None]:
log_reg.intercept_

#### 3x4 pesos e 3 vieses?

### Recordando...

Temos 3 classes: 'setosa', 'versicolor', 'virginica'

E o modelo de regressão logística é apenas binário: y = sigmoid(w*x + b)

Em cenários multiclasse, usa-se a estratégia OvA (one vs. all): Para cada classe, um modelo binário será treinado (dirá se pertence ou não a essa classe)

3 classes = 3 modelos, cada um com 4 pesos e 1 viés.