In [141]:
import pandas as pd
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score

In [142]:
def executa_regresie_pib():
    """
    Evalueaza o regresie liniara folosind doar PIB-ul ca feature.
    Se foloseste atat SGDRegressor (demo API), cat si implementare cod propriu
    cu gradient descrescator batch.
    """
    X, y_norm, y_real, scalator_y = incarca_date_fericire("2017.csv", ["Economy..GDP.per.Capita."])

    # Tool: SGDRegressor 
    model_sgd = regresie_batch_gradient(X, y_norm)
    mse_sgd, r2_sgd = evalueaza_regresie(model_sgd, X, y_real, y_norm, scalator_y)
    print("\nRegresie: Fericire ~ PIB (SGDRegressor)")
    print("MSE:", mse_sgd)
    print("R² Score:", r2_sgd)

    # Cod propriu: Regresie cu gradient descrescator batch
    X_bias = np.hstack((np.ones((X.shape[0], 1)), X))
    theta, costuri = gradient_descrescator_batch_propriu(X_bias, y_norm, epoci=500, alpha=0.1)
    predictii_norm = X_bias @ theta
    predictii = scalator_y.inverse_transform(predictii_norm)
    mse_cp = mean_squared_error(y_real, predictii)
    r2_cp = r2_score(y_real, predictii)
    print("Regresie: Fericire ~ PIB (Gradient descrescator cod propriu)")
    print("MSE:", mse_cp)
    print("R² Score:", r2_cp)

In [143]:
def executa_regresie_pib_libertate():
    """
    Evalueaza o regresie liniara folosind atat PIB-ul cat si libertatea ca feature-uri.
    Se foloseste atat SGDRegressor (demo), cat si metoda implementata manual (cod propriu).
    """
    X, y_norm, y_real, scalator_y = incarca_date_fericire("2017.csv", ["Economy..GDP.per.Capita.", "Freedom"])

    # Tool: SGDRegressor 
    model = regresie_batch_gradient(X, y_norm)
    mse, r2 = evalueaza_regresie(model, X, y_real, y_norm, scalator_y)
    print("\nRegresie: Fericire ~ PIB + Libertate (SGDRegressor)")
    print("MSE:", mse)
    print("R² Score:", r2)

    # Cod propriu: Regresie cu gradient descrescator batch
    X_bias = np.hstack((np.ones((X.shape[0], 1)), X))
    theta, costuri = gradient_descrescator_batch_propriu(X_bias, y_norm, epoci=500, alpha=0.1)
    predictii_norm = X_bias @ theta
    predictii = scalator_y.inverse_transform(predictii_norm)
    mse_cp = mean_squared_error(y_real, predictii)
    r2_cp = r2_score(y_real, predictii)
    print("Regresie: Fericire ~ PIB + Libertate (Gradient descrescator cod propriu)")
    print("MSE:", mse_cp)
    print("R² Score:", r2_cp)

In [144]:
def logistic_regresie_personalizata(X, y, nr_clase=1, epoci=1000, alpha=0.01, prag=0.5):
    """
    Antreneaza si prezice cu regresie logistica cod propriu.
    Daca nr_clase=1 -> clasificare binara
    Daca nr_clase>1 -> clasificare multiclasă (one-vs-rest)
    Returneaza predictii
    """
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))

    def antreneaza_binara(X, y):
        m, n = X.shape
        w = np.zeros((n, 1))
        y = y.reshape(-1, 1)
        for _ in range(epoci):
            z = X @ w
            h = sigmoid(z)
            gradient = (1 / m) * (X.T @ (h - y))
            w -= alpha * gradient
        return w

    X_bias = np.hstack([np.ones((X.shape[0], 1)), X])  

    if nr_clase == 1:
        w = antreneaza_binara(X_bias, y)
        probabilitati = sigmoid(X_bias @ w)
        return (probabilitati >= prag).astype(int).ravel()
    else:
        m, n = X_bias.shape
        W = np.zeros((nr_clase, n))
        for cls in range(nr_clase):
            y_bin = (y == cls).astype(int)
            W[cls] = antreneaza_binara(X_bias, y_bin).ravel()
        scoruri = sigmoid(X_bias @ W.T)
        return np.argmax(scoruri, axis=1)


In [145]:
def executa_clasificare_cancer():
    """
    Clasifica o leziune mamara si evalueaza performanta clasificatorului
    logistic pentru mai multe praguri de decizie.
    """
    X, y, scalator = incarca_date_cancer("wdbc.data")
    # Tool: LogisticRegression
    model = antreneaza_clasificator_logistic(X, y)
    rezultat = clasifica_leziune(model, scalator, 18, 10)
    print("\nClasificare cancer - LogisticRegression (tool)")
    print("Leziune (raza=18, textura=10):", rezultat)

    # Cod propriu: logistic regresie binara
    print("\nClasificare cancer - Cod propriu (regresie logistica binara)")
    predictii = logistic_regresie_personalizata(X, y, nr_clase=1)
    precizie = precision_score(y, predictii)
    recall = recall_score(y, predictii)
    f1 = f1_score(y, predictii)

    exemplu = scalator.transform([[18, 10]])
    prob = logistic_regresie_personalizata(exemplu, np.array([1]), nr_clase=1)[0]
    eticheta = "Malign" if prob == 1 else "Benign"
    print("Leziune (raza=18, textura=10):", eticheta)
    print("Precizie:", precizie)
    print("Recall:", recall)
    print("F1 Score:", f1)

In [146]:
def executa_clasificare_iris():
    """
    Clasifica specia unei flori de iris folosind caracteristicile geometrice
    ale sepalei si petalei. Se foloseste regresie logistica.
    """
    X, y, scalator, etichete = incarca_date_iris("iris.data")
    # Tool: LogisticRegression
    model = antreneaza_clasificator_logistic(X, y)
    specie = clasifica_floare(model, scalator, 5.35, 3.85, 1.25, 0.4, etichete)
    print("\nClasificare iris - LogisticRegression (tool)")
    print("Specie pentru floare [5.35, 3.85, 1.25, 0.4]:", specie)

    # Cod propriu: clasificare multiclasă
    print("\nClasificare iris - Cod propriu (regresie logistica multiclasă)")
    predictii = logistic_regresie_personalizata(X, y, nr_clase=3)
    acc = np.mean(predictii == y)
    exemplu = scalator.transform([[5.35, 3.85, 1.25, 0.4]])
    predictie_floare = logistic_regresie_personalizata(exemplu, y[:len(exemplu)], nr_clase=3)[0]
    print("Specie pentru floare [5.35, 3.85, 1.25, 0.4]:", etichete[predictie_floare])
    print("Acuratete model cod propriu:", acc)


In [147]:
def executa_validare_si_loss_si_praguri():
    """
    Executa cerintele optionale:
    - validare incrucisata folosind SGDRegressor
    - testarea unei functii de pierdere alternative (huber loss)
    - evaluarea clasificatorului pentru mai multe praguri de decizie
    """
    X, y_norm, _, _ = incarca_date_fericire("2017.csv", ["Economy..GDP.per.Capita.", "Freedom"])
    media_r2, std_r2 = scor_validare_incrucisata(SGDRegressor(), X, y_norm.ravel())
    print("\nValidare incrucisata (SGDRegressor - tool)")
    print("R² mediu:", media_r2)
    print("Deviatie standard:", std_r2)

    model_huber = antreneaza_regresor_loss_custom(X, y_norm, 'huber')
    _, y_real, _, scalator_y = incarca_date_fericire("2017.csv", ["Economy..GDP.per.Capita.", "Freedom"])
    mse_h, r2_h = evalueaza_regresie(model_huber, X, y_real, y_norm, scalator_y)
    print("\nRegresie cu loss = 'huber' (SGDRegressor)")
    print("MSE:", mse_h)
    print("R² Score:", r2_h)

    X_cancer, y_cancer, _ = incarca_date_cancer("wdbc.data")
    model = antreneaza_clasificator_logistic(X_cancer, y_cancer)
    df_praguri = evalueaza_praguri(model, X_cancer, y_cancer, praguri=[0.3, 0.5, 0.7])
    print("\nEvaluare clasificator pentru praguri diferite (tool):")
    print(df_praguri)

In [148]:
print("\nCerinta 1")
executa_regresie_pib()
executa_regresie_pib_libertate()
print("\nCerinta 2")
executa_clasificare_cancer()
print("\nCerinta 3")
executa_clasificare_iris()
print("\nCerinta optionala")
executa_validare_si_loss_si_praguri()


Cerinta 1

Regresie: Fericire ~ PIB (SGDRegressor)
MSE: 0.4324467834111914
R² Score: 0.65987249651308
Regresie: Fericire ~ PIB (Gradient descrescator cod propriu)
MSE: 0.4321505672954763
R² Score: 0.6601054760421126

Regresie: Fericire ~ PIB + Libertate (SGDRegressor)
MSE: 0.32553654295041573
R² Score: 0.7439594051918159
Regresie: Fericire ~ PIB + Libertate (Gradient descrescator cod propriu)
MSE: 0.325070606045429
R² Score: 0.7443258733038585

Cerinta 2

Clasificare cancer - LogisticRegression (tool)
Leziune (raza=18, textura=10): Malign

Clasificare cancer - Cod propriu (regresie logistica binara)
Leziune (raza=18, textura=10): Malign
Precizie: 0.9016393442622951
Recall: 0.7783018867924528
F1 Score: 0.8354430379746836

Cerinta 3

Clasificare iris - LogisticRegression (tool)
Specie pentru floare [5.35, 3.85, 1.25, 0.4]: Iris-setosa

Clasificare iris - Cod propriu (regresie logistica multiclasă)
Specie pentru floare [5.35, 3.85, 1.25, 0.4]: Iris-setosa
Acuratete model cod propriu: 0.8