In [23]:
import pandas as pd
import numpy as np


In [24]:
df = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/mpg.csv")

In [25]:
df = df.drop("name", axis=1)

In [26]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,18.0,8,307.0,130.0,3504,12.0,70,usa
1,15.0,8,350.0,165.0,3693,11.5,70,usa
2,18.0,8,318.0,150.0,3436,11.0,70,usa
3,16.0,8,304.0,150.0,3433,12.0,70,usa
4,17.0,8,302.0,140.0,3449,10.5,70,usa


In [27]:
df.origin.value_counts()

origin
usa       249
japan      79
europe     70
Name: count, dtype: int64

In [28]:
def origin(x):
    if x == "usa":
        return 1
    else:
        return 0

In [29]:
df["origin"] = df.origin.apply(origin)

In [30]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,18.0,8,307.0,130.0,3504,12.0,70,1
1,15.0,8,350.0,165.0,3693,11.5,70,1
2,18.0,8,318.0,150.0,3436,11.0,70,1
3,16.0,8,304.0,150.0,3433,12.0,70,1
4,17.0,8,302.0,140.0,3449,10.5,70,1


In [31]:
X = df.drop("mpg", axis=1)

In [32]:
y = df.mpg

In [33]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline

In [34]:
norm = MinMaxScaler()
norm.fit(X)
X_sc = norm.transform(X)

In [35]:
params = {"max_depth": [3,5,7,9], "n_estimators": [100, 200, 300]}
rf = RandomForestRegressor(n_jobs=-1)

grid = RandomizedSearchCV(
    estimator=rf,
    param_distributions=params,
    n_iter=200,
    cv=3,
    n_jobs=-1
)

grid.fit(X_sc,y)




In [36]:
pipe = Pipeline([
("norm", MinMaxScaler()),
 ("rf", RandomForestRegressor(n_jobs=-1))
])

params = {"rf__max_depth": [3,5,7,9], "rf__n_estimators": [100, 200, 300]}

grid = RandomizedSearchCV(
    estimator=pipe,
    param_distributions=params,
    n_iter=200,
    cv=3,
    n_jobs=-1
)

grid.fit(X,y)





In [37]:
grid.best_score_


0.602146825506965

In [38]:
grid.best_estimator_.score(X, y)

0.9680971507659918

In [39]:
import pickle 
with open("model.pkl", "wb") as f:
    pickle.dump(grid.best_estimator_, f)

In [40]:
with open("model.pkl", "rb") as f:
    saved_model = pickle.load(f)

In [41]:
pred_df = pd.DataFrame(np.array([8,307, 130, 3504,12, 70, 1]).reshape(1,-1))
pred_df

Unnamed: 0,0,1,2,3,4,5,6
0,8,307,130,3504,12,70,1


In [42]:
y.head(1)

0    18.0
Name: mpg, dtype: float64

In [43]:
saved_model.predict(pred_df)[0]



16.998402391782598

In [44]:
saved_model.feature_names_in_

array(['cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model_year', 'origin'], dtype=object)

In [45]:
from sqlalchemy import create_engine
import sqlite3


In [46]:
churro = "sqlite:///mpgdb.db"

In [47]:
engine = create_engine(churro)

In [48]:
inputs = str(pred_df.values[0])

In [49]:
outputs = saved_model.predict(pred_df)[0]



In [53]:
from datetime import datetime


date = str(datetime.now())[0:19]
date

'2024-06-10 18:14:37'

In [54]:
log_df = pd.DataFrame({"inputs":[inputs], "outputs": [outputs], "date": [date]})

In [55]:
log_df.to_sql("logs", con=engine, if_exists="append", index=None)

1

In [56]:
pd.read_sql("SELECT * FROM logs", con=engine)

Unnamed: 0,inputs,outputs,date
0,[ 8 307 130 3504 12 70 1],17.244803,2024-06-10 10:22:49
1,"['10', '400', '250', '4000', '15', '70', '1']",13.501718,2024-06-10 10:34:44
2,"['20', '400', '250', '4000', '15', '70', '1']",13.501718,2024-06-10 10:36:00
3,"['20', '400', '250', '4000', '15', '70', '1']",13.501718,2024-06-10 10:45:44
4,[ 8 307 130 3504 12 70 1],16.998402,2024-06-10 18:14:37


In [57]:
import requests

url = "http://127.0.0.1:5000/predict?cylinders=20&displacement=400&horsepower=250&weight=4000&acceleration=15&model_year=70&origin=1"

payload = {}
headers = {}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)


ModuleNotFoundError: No module named 'requests'

Para acceder al POSTGRES

In [None]:
# pip install psycopg2-binary

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl.metadata (4.6 kB)
Downloading psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl (1.2 MB)
   ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.2 MB ? eta -:--:--
   ---- ----------------------------------- 0.1/1.2 MB 2.1 MB/s eta 0:00:01
   --------------------- ------------------ 0.6/1.2 MB 5.5 MB/s eta 0:00:01
   ---------------------------------------  1.2/1.2 MB 7.4 MB/s eta 0:00:01
   ---------------------------------------- 1.2/1.2 MB 6.7 MB/s eta 0:00:00
Installing collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.9
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
from sqlalchemy import create_engine

In [None]:
# churro = "postgresql://user:password@host:port/database"   Estructura del churro
churro = "postgresql://postgres:postgresql@104.155.61.55/postgres"    # en este caso no tenemos el puerto, todo lo demás sí

In [None]:
engine = create_engine(churro)

In [None]:
df = pd.DataFrame({"a":[1,2,3], "b":[4,5,6]})

In [None]:
# Aquí creo el nombre de la base de datos "test"

df.to_sql("test", con = engine)

3

In [None]:
pd.read_sql("SELECT* FROM test", con = engine)

Unnamed: 0,index,a,b
0,0,1,4
1,1,2,5
2,2,3,6
