# Prediccion de datos
Este dataset viene desde world bank group, climate change knowledge Portal, Contiene 3 tipos de datos
- Average mean surface air temperature (tas)
- Relative Humidity (hurs)
- Precipitation (pr)

Son datos mensuales de Bolivia desde 1950 hasta 2023


In [None]:
import polars as pl
import json
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error

In [None]:
df = pl.read_json("datasets/temperatures.json")
df

In [None]:
def __load_data():
    with open("datasets/temperatures.json", 'r', encoding='utf-8') as archivo:
        return json.load(archivo)


In [None]:
data_json = __load_data()
print(data_json)

In [None]:
data_dict = {}
for col in data_json.keys():
    lista = list(data_json[col].values())
    data_dict[col] = lista
print(data_dict)
print(data_dict.keys())
    

In [None]:
df = pl.DataFrame(data_dict)
df

In [None]:
df.describe()

In [None]:
fig , ax = plt.subplots()
ax.scatter(df["hurs"], df["pr"])
ax.set_xlabel("Humedad relativa")
ax.set_ylabel("Precipitacion [mm]")
plt.show()

In [None]:
fig , ax = plt.subplots()
ax.scatter(df["tas"], df["pr"])
ax.set_xlabel("Temperatura media en superficie")
ax.set_ylabel("Precipitacion [mm]")
plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter(df["hurs"], df["tas"], df["pr"])
plt.show()

In [None]:
y_data = df["pr"].to_numpy().flatten()
df.drop_in_place("pr")
x_data = df.to_numpy()


In [None]:
def prepare_dataset(data):
    divi = int(0.8*len(data))
    test = data[divi:]
    train = data[:divi]
    return train, test

In [None]:
x_train, x_test = prepare_dataset(x_data)
y_train, y_test = prepare_dataset(y_data)
print(len(x_train))
print(len(y_train))

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(x_train, y_train)

In [None]:
y_pred = lin_reg.predict(x_test)
y_pred[:1]

In [None]:
mean_absolute_percentage_error(y_test, y_pred)

In [None]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(12, 8))

ax.scatter(x_train[:,0], x_train[:,1], y_train, c='cyan', marker='x', alpha=0.3)

hurs_range = np.linspace(x_train[:, 0].min(), x_train[:, 0].max(), 20)
tas_range = np.linspace(x_train[:, 1].min(), x_train[:, 1].max(), 20)
hurs_mesh, tas_mesh = np.meshgrid(hurs_range, tas_range)

pr_mesh = lin_reg.intercept_ + (lin_reg.coef_[0] * hurs_mesh) + (lin_reg.coef_[1] * tas_mesh)
surface = ax.plot_surface(hurs_mesh, tas_mesh, pr_mesh, alpha=0.2, color='red')

ax.set_xlabel('HURS')
ax.set_ylabel('TAS')
ax.set_zlabel('PR')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

y_graph = lin_reg.intercept_ + (lin_reg.coef_[0]*x_train[:,0])+(lin_reg.coef_[1]*x_train[:, 1].mean())

ax.scatter(x_train[:,0], y_train, alpha=0.5,c='blue')
ax.plot(x_train[:,0], y_graph, 'r', linewidth=2)
ax.set_xlabel('HURS')
ax.set_ylabel('PR')
plt.show()

In [None]:
fig, ax = plt.subplots()
y_graph = lin_reg.intercept_ +(lin_reg.coef_[0]*x_train[:, 0].mean()) +(lin_reg.coef_[1]*x_train[:,1])

ax.scatter(x_train[:,1], y_train, alpha=0.5, c='blue')
ax.plot(x_train[:,1], y_graph, 'r', linewidth=2)
ax.set_xlabel('TAS')
ax.set_ylabel('PR')

plt.tight_layout()
plt.show()