In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR

# Load the dataset
df_dados = pd.read_csv('df_bsi_TR.csv', sep=',')

# Separate features (X) and target (y)
X = df_dados.drop(columns=['tempo_relativo']).to_numpy()
y = df_dados['tempo_relativo'].to_numpy()

# Define SVR models with different kernels
svr_rbf = SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.1)
svr_lin = SVR(kernel="linear", C=100, gamma="auto")
svr_poly = SVR(kernel="poly", C=100, gamma="auto", degree=3, epsilon=0.1, coef0=1)

# Fit the models
svrs = [svr_rbf, svr_lin, svr_poly]
kernel_label = ["RBF", "Linear", "Polynomial"]
model_color = ["m", "c", "g"]

# Plot the results
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10), sharey=True)
for ix, svr in enumerate(svrs):
    svr.fit(X, y)
    y_pred = svr.predict(X)
    
    axes[ix].scatter(X[:, 0], y, color='k', s=20, label="Data")
    axes[ix].scatter(X[:, 0], y_pred, color=model_color[ix], s=50, label="{} model".format(kernel_label[ix]), alpha=0.6)
    
    axes[ix].legend(loc="upper center", bbox_to_anchor=(0.5, 1.1), ncol=1, fancybox=True, shadow=True)
    axes[ix].set_title("{} SVR".format(kernel_label[ix]))

fig.text(0.5, 0.04, "Feature 1", ha="center", va="center")
fig.text(0.06, 0.5, "tempo_relativo", ha="center", va="center", rotation="vertical")
fig.suptitle("Support Vector Regression", fontsize=14)
plt.show()

In [None]:
# 1. Carregar os dados
 # Substitua pelo seu caminho de arquivo
data = pd.read_csv('tabela_final.csv', sep=';')

In [None]:
# 3. Seleção de Variáveis (exemplo simplificado)
X = data[['semestre_dividido', 'ch_cumprida_dividida']]
y = data['status']  # 1 se evadiu, 0 se não evadiu

# 4. Modelagem
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)

# 5. Treinamento e Validação
y_pred = model.predict(X_test)

# Avaliação do Modelo
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Acurácia: {accuracy}')
print(f'Precisão: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')

KeyError: "None of [Index(['semestre_dividido', 'ch_cumprida_dividida'], dtype='object')] are in the [columns]"

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 1. Carregar os dados
 # Substitua pelo seu caminho de arquivo
df = pd.read_csv('tabela_final.csv', sep=';')

# Definir o target
df['evasao'] = df['status'].apply(lambda x: 1 if x == -1 else 0)

# Selecionar as features (colunas de reprovação)
features = [col for col in df.columns if col.endswith('_REPROVADO')]

# Criar X e y
X = df[features]
y = df['evasao']

# Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Treinar o modelo de regressão logística
model = LogisticRegression()
model.fit(X_train, y_train)

# Fazer previsões no conjunto de teste
y_pred = model.predict(X_test)

# Avaliar a performance do modelo
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')

# Obter os coeficientes do modelo
coef_df = pd.DataFrame({'Disciplina': features, 'Coeficiente': model.coef_[0]})
coef_df = coef_df.sort_values(by='Coeficiente', ascending=False)

print(coef_df)


ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 0

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# 1. Carregar os dados
 # Substitua pelo seu caminho de arquivo
df = pd.read_csv('tabela_final.csv', sep=';')

# 2. Pré-processamento dos dados
# Convertendo colunas categóricas para numéricas
df['sexo'] = df['sexo'].replace({'M': 1, 'F': 0})

# Criar a coluna 'evadido' baseada no último período disponível
ultimo_periodo_disponivel = df['ultimo_periodo'].max()
df['status'] = df['ultimo_periodo'] < ultimo_periodo_disponivel

# Remover colunas não relevantes
df = df.drop(columns=['discente'])  # Remover a coluna 'discente' ou outras que não são úteis para a previsão

# 3. Separar as variáveis independentes (X) e a variável dependente (y)
X = df.drop(columns=['status'])
y = df['status']

# 4. Imputação de valores ausentes
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# 5. Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 6. Normalizar os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 7. Treinamento do modelo
model = LogisticRegression()
model.fit(X_train, y_train)

# 8. Fazer previsões
y_pred = model.predict(X_test)

# 9. Avaliar o modelo
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')
print('Confusion Matrix:')
print(conf_matrix)

# 10. Coeficientes do modelo
coeficientes = pd.DataFrame(model.coef_[0], index=df.drop(columns=['status']).columns, columns=['Coeficiente']).sort_values(by='Coeficiente', ascending=False)
print(coeficientes)



Accuracy: 0.95
Precision: 0.94
Recall: 0.99
F1 Score: 0.97
Confusion Matrix:
[[ 36  10]
 [  1 158]]
                                            Coeficiente
FUNDAMENTOS DE SISTEMAS DE INFORMAÇÃO          1.082961
ESTRUTURA DE DADOS_REPROVADO                   0.912692
PROGRAMAÇÃO ORIENTADA A OBJETOS I_APROVADO     0.711454
MATEMÁTICA FINANCEIRA                          0.665336
EMPREENDEDORISMO EM INFORMÁTICA_APROVADO       0.659360
...                                                 ...
ENGENHARIA DE SOFTWARE I                      -0.923658
ENGENHARIA DE SOFTWARE I_APROVADO             -1.005640
PROGRAMAÇÃO_APROVADO                          -1.138788
ano_ingresso                                  -2.185628
ultimo_periodo                                -2.233724

[97 rows x 1 columns]


In [None]:
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np

reg = LinearRegression()
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

reg.fit(x_train, y_train)

y_pred = reg.predict(x_test)
print(y_pred)

print('Coeficiente: \n', reg.coef_)

print('MSE: %f' % np.mean((reg.predict(x_test) - y_test) ** 2))

print('Variance score: %.2f' % reg.score(x_test, y_test))

#print('RMSE:', % np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

#print('MAE: %', np.mean_absolute_error(y_test, y_pred))

plt.plot(np.linspace(-1, 1, len(y_test)), y_test, label='Notas - Real', color='b')
plt.plot(np.linspace(-1, 1, len(y_pred)), y_pred, label='Notas - Predita', color='g')
plt.legend()
plt.show()

NameError: name 'x' is not defined

In [None]:
model = LinearRegression()

In [None]:
model.fit(x_train, y_train)

NameError: name 'x_train' is not defined