# Por hacer: 
# Parametros para determinar necesidad de riego
# Determinar modelo que mejor se adapte al problema (logisticRegression por el momento da valores correctos)

## Librerias

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, roc_curve, roc_auc_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.model_selection import learning_curve
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt 

## Leer Datos

In [2]:
data = pd.read_csv('TARP.csv') 

In [3]:
data

Unnamed: 0,Soil Moisture,Temperature,Soil Humidity,Time,Air temperature (C),Wind speed (Km/h),Air humidity (%),Wind gust (Km/h),Pressure (KPa),ph,rainfall,N,P,K,Status
0,54,22,70,21,19.52,2.13,55.04,6.30,101.50,6.502985,202.935536,90.0,42.0,43.0,ON
1,12,20,40,104,19.49,2.01,55.17,10.46,101.50,7.038096,226.655537,85.0,58.0,41.0,OFF
2,34,26,35,62,19.47,1.90,55.30,14.63,101.51,7.840207,263.964248,60.0,55.0,44.0,ON
3,7,44,44,93,19.54,2.28,54.20,16.08,101.51,6.980401,242.864034,74.0,35.0,40.0,OFF
4,50,38,23,92,19.61,2.66,53.09,17.52,101.51,7.628473,262.717340,78.0,42.0,42.0,OFF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,74,3,29,83,,,,,,,,,,,OFF
99996,58,10,46,82,,,,,,,,,,,OFF
99997,4,35,39,17,,,,,,,,,,,ON
99998,83,36,56,15,,,,,,,,,,,OFF


## Limpieza de Datos

In [4]:
data.columns

Index(['Soil Moisture', 'Temperature', ' Soil Humidity', 'Time',
       'Air temperature (C)', 'Wind speed (Km/h)', 'Air humidity (%)',
       'Wind gust (Km/h)', 'Pressure (KPa)', 'ph', 'rainfall', 'N', 'P', 'K',
       'Status'],
      dtype='object')

In [5]:
data.rename(columns={' Soil Humidity': 'Soil Humidity'}, inplace=True)

In [6]:
data.columns

Index(['Soil Moisture', 'Temperature', 'Soil Humidity', 'Time',
       'Air temperature (C)', 'Wind speed (Km/h)', 'Air humidity (%)',
       'Wind gust (Km/h)', 'Pressure (KPa)', 'ph', 'rainfall', 'N', 'P', 'K',
       'Status'],
      dtype='object')

In [7]:
columnas_eliminar = ['Wind speed (Km/h)', 'Wind gust (Km/h)', 'rainfall', 'N', 'P', 'K', 'Status']
data = data.drop(columns=columnas_eliminar)

In [8]:
data

Unnamed: 0,Soil Moisture,Temperature,Soil Humidity,Time,Air temperature (C),Air humidity (%),Pressure (KPa),ph
0,54,22,70,21,19.52,55.04,101.50,6.502985
1,12,20,40,104,19.49,55.17,101.50,7.038096
2,34,26,35,62,19.47,55.30,101.51,7.840207
3,7,44,44,93,19.54,54.20,101.51,6.980401
4,50,38,23,92,19.61,53.09,101.51,7.628473
...,...,...,...,...,...,...,...,...
99995,74,3,29,83,,,,
99996,58,10,46,82,,,,
99997,4,35,39,17,,,,
99998,83,36,56,15,,,,


In [9]:
print(data.isnull().sum())

Soil Moisture              0
Temperature                0
Soil Humidity              0
Time                       0
Air temperature (C)    76005
Air humidity (%)       76005
Pressure (KPa)         76005
ph                     97800
dtype: int64


In [10]:
data = data.dropna()
data

Unnamed: 0,Soil Moisture,Temperature,Soil Humidity,Time,Air temperature (C),Air humidity (%),Pressure (KPa),ph
0,54,22,70,21,19.52,55.04,101.50,6.502985
1,12,20,40,104,19.49,55.17,101.50,7.038096
2,34,26,35,62,19.47,55.30,101.51,7.840207
3,7,44,44,93,19.54,54.20,101.51,6.980401
4,50,38,23,92,19.61,53.09,101.51,7.628473
...,...,...,...,...,...,...,...,...
2195,90,32,28,68,31.44,7.10,101.46,6.780064
2196,41,21,48,42,31.47,7.05,101.45,6.086922
2197,24,37,44,78,31.48,6.92,101.44,6.362608
2198,10,15,22,62,31.48,6.79,101.43,6.758793


In [11]:
data.head()

Unnamed: 0,Soil Moisture,Temperature,Soil Humidity,Time,Air temperature (C),Air humidity (%),Pressure (KPa),ph
0,54,22,70,21,19.52,55.04,101.5,6.502985
1,12,20,40,104,19.49,55.17,101.5,7.038096
2,34,26,35,62,19.47,55.3,101.51,7.840207
3,7,44,44,93,19.54,54.2,101.51,6.980401
4,50,38,23,92,19.61,53.09,101.51,7.628473


## Funcion para determinar si se necesita regar o no

In [25]:
def determinar_necesidad_riego(row):
    if row['Soil Moisture'] < 40 and row['Temperature'] > 25:
        return 1  # Se necesita riego
    else:
        return 0  # No se necesita riego

data.loc[:, 'necesidad_riego'] = data.apply(lambda row: determinar_necesidad_riego(row), axis=1)


# Seleccionar características relevantes y la variable objetivo
X = data[['Soil Moisture', 'Temperature', 'Soil Humidity', 'Time', 'Air humidity (%)', 'Pressure (KPa)', 'ph']]
y = data['necesidad_riego']

In [13]:
data['necesidad_riego'].value_counts()

necesidad_riego
0    1773
1     427
Name: count, dtype: int64

In [14]:
data

Unnamed: 0,Soil Moisture,Temperature,Soil Humidity,Time,Air temperature (C),Air humidity (%),Pressure (KPa),ph,necesidad_riego
0,54,22,70,21,19.52,55.04,101.50,6.502985,0
1,12,20,40,104,19.49,55.17,101.50,7.038096,0
2,34,26,35,62,19.47,55.30,101.51,7.840207,1
3,7,44,44,93,19.54,54.20,101.51,6.980401,1
4,50,38,23,92,19.61,53.09,101.51,7.628473,0
...,...,...,...,...,...,...,...,...,...
2195,90,32,28,68,31.44,7.10,101.46,6.780064,0
2196,41,21,48,42,31.47,7.05,101.45,6.086922,0
2197,24,37,44,78,31.48,6.92,101.44,6.362608,1
2198,10,15,22,62,31.48,6.79,101.43,6.758793,0


## Division de datos entre datos de entrenamiento y datos de testeo

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Normalizacion de datos

In [16]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## GridSearchCV se usa para buscar los mejores parametros para un estimador, en este caso usando RandomForestClassifier() y luego usando adaboost

In [17]:
param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [10, 20, 30]}
grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5)

grid_search.fit(X_train_scaled, y_train)  

print("Mejores parámetros:", grid_search.best_params_)
print("Mejor puntuación:", grid_search.best_score_)

Mejores parámetros: {'max_depth': 10, 'n_estimators': 50}
Mejor puntuación: 1.0


In [18]:
adaboost = AdaBoostClassifier(n_estimators=100, random_state=42, algorithm='SAMME')
adaboost.fit(X_train_scaled, y_train)

In [19]:
ada_predictions = adaboost.predict(X_test_scaled)
ada_accuracy = accuracy_score(y_test, ada_predictions)
print(f"Accuracy de AdaBoost: {ada_accuracy:.3f}")

Accuracy de AdaBoost: 1.000


In [20]:
param_grid = {'n_estimators': [50, 100, 200], 'learning_rate': [0.1, 0.5, 1.0]}
grid_search = GridSearchCV(adaboost, param_grid, cv=5)

grid_search.fit(X_train_scaled, y_train) 

print("Mejores parámetros:", grid_search.best_params_)
print("Mejor puntuación:", grid_search.best_score_)

Mejores parámetros: {'learning_rate': 0.1, 'n_estimators': 50}
Mejor puntuación: 1.0


## Pruebas de entrenamiento usando distintos modelos

In [21]:
# Entrenar el modelo
model = RandomForestClassifier(n_estimators=50, random_state=42)
model.fit(X_train_scaled, y_train)

# Predecir en el conjunto de prueba
y_pred = model.predict(X_test_scaled)

# Evaluar el modelo
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse}')

# Para predecir si se necesita regar usando nuevos datos
new_data = [
    [45, 24, 50, 90, 54.5, 101.52, 6.9],
    [12, 22, 35, 110, 55.0, 101.54, 7.2],
    [67, 26, 45, 85, 55.1, 101.56, 7.0],
    [33, 21, 40, 105, 54.6, 101.51, 7.3],
    [48, 27, 55, 95, 55.2, 101.55, 7.4],
    [23, 30, 60, 100, 54.8, 101.52, 7.1],
    [50, 25, 70, 80, 54.7, 101.53, 6.8],
    [35, 33, 45, 120, 55.3, 101.57, 7.9],
    [52, 26, 40, 110, 55.0, 101.50, 7.2],
    [59, 34, 50, 115, 54.9, 101.59, 6.7]
] 
predictions = model.predict(new_data)
for i, pred in enumerate(predictions):
    print(f"Predicción para new_data[{i}] (0: No regar, 1: Regar): {pred}")

Accuracy: 1.0
MSE: 0.0
Predicción para new_data[0] (0: No regar, 1: Regar): 0
Predicción para new_data[1] (0: No regar, 1: Regar): 0
Predicción para new_data[2] (0: No regar, 1: Regar): 0
Predicción para new_data[3] (0: No regar, 1: Regar): 0
Predicción para new_data[4] (0: No regar, 1: Regar): 0
Predicción para new_data[5] (0: No regar, 1: Regar): 0
Predicción para new_data[6] (0: No regar, 1: Regar): 0
Predicción para new_data[7] (0: No regar, 1: Regar): 0
Predicción para new_data[8] (0: No regar, 1: Regar): 0
Predicción para new_data[9] (0: No regar, 1: Regar): 0


In [22]:
# Entrenamiento
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse}')
# Para predecir si se necesita regar usando nuevos datos
new_data = [
    [45, 24, 50, 90, 54.5, 101.52, 6.9],
    [12, 22, 35, 110, 55.0, 101.54, 7.2],
    [67, 26, 45, 85, 55.1, 101.56, 7.0],
    [33, 21, 40, 105, 54.6, 101.51, 7.3],
    [48, 27, 55, 95, 55.2, 101.55, 7.4],
    [23, 30, 60, 100, 54.8, 101.52, 7.1],
    [50, 25, 70, 80, 54.7, 101.53, 6.8],
    [35, 33, 45, 120, 55.3, 101.57, 7.9],
    [52, 26, 40, 110, 55.0, 101.50, 7.2],
    [59, 34, 50, 115, 54.9, 101.59, 6.7]
] 
predictions = model.predict(new_data)
for i, pred in enumerate(predictions):
    print(f"Predicción para new_data[{i}] (0: No regar, 1: Regar): {pred}")

Accuracy: 0.925
MSE: 0.075
Predicción para new_data[0] (0: No regar, 1: Regar): 0
Predicción para new_data[1] (0: No regar, 1: Regar): 1
Predicción para new_data[2] (0: No regar, 1: Regar): 0
Predicción para new_data[3] (0: No regar, 1: Regar): 0
Predicción para new_data[4] (0: No regar, 1: Regar): 0
Predicción para new_data[5] (0: No regar, 1: Regar): 1
Predicción para new_data[6] (0: No regar, 1: Regar): 0
Predicción para new_data[7] (0: No regar, 1: Regar): 1
Predicción para new_data[8] (0: No regar, 1: Regar): 0
Predicción para new_data[9] (0: No regar, 1: Regar): 0


In [23]:
# Entrenamiento
model = GradientBoostingClassifier(random_state=42)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse}')
# Para predecir si se necesita regar usando nuevos datos
new_data = [
    [45, 24, 50, 90, 54.5, 101.52, 6.9],
    [12, 22, 35, 110, 55.0, 101.54, 7.2],
    [67, 26, 45, 85, 55.1, 101.56, 7.0],
    [33, 21, 40, 105, 54.6, 101.51, 7.3],
    [48, 27, 55, 95, 55.2, 101.55, 7.4],
    [23, 30, 60, 100, 54.8, 101.52, 7.1],
    [50, 25, 70, 80, 54.7, 101.53, 6.8],
    [35, 33, 45, 120, 55.3, 101.57, 7.9],
    [52, 26, 40, 110, 55.0, 101.50, 7.2],
    [59, 34, 50, 115, 54.9, 101.59, 6.7]
] 
predictions = model.predict(new_data)
for i, pred in enumerate(predictions):
    print(f"Predicción para new_data[{i}] (0: No regar, 1: Regar): {pred}")

Accuracy: 1.0
MSE: 0.0
Predicción para new_data[0] (0: No regar, 1: Regar): 0
Predicción para new_data[1] (0: No regar, 1: Regar): 0
Predicción para new_data[2] (0: No regar, 1: Regar): 0
Predicción para new_data[3] (0: No regar, 1: Regar): 0
Predicción para new_data[4] (0: No regar, 1: Regar): 0
Predicción para new_data[5] (0: No regar, 1: Regar): 0
Predicción para new_data[6] (0: No regar, 1: Regar): 0
Predicción para new_data[7] (0: No regar, 1: Regar): 0
Predicción para new_data[8] (0: No regar, 1: Regar): 0
Predicción para new_data[9] (0: No regar, 1: Regar): 0


In [24]:
# Entrenamiento
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse}')
# Para predecir si se necesita regar usando nuevos datos
new_data = [
    [45, 24, 50, 90, 54.5, 101.52, 6.9],
    [12, 22, 35, 110, 55.0, 101.54, 7.2],
    [67, 26, 45, 85, 55.1, 101.56, 7.0],
    [33, 21, 40, 105, 54.6, 101.51, 7.3],
    [48, 27, 55, 95, 55.2, 101.55, 7.4],
    [23, 30, 60, 100, 54.8, 101.52, 7.1],
    [50, 25, 70, 80, 54.7, 101.53, 6.8],
    [35, 33, 45, 120, 55.3, 101.57, 7.9],
    [52, 26, 40, 110, 55.0, 101.50, 7.2],
    [59, 34, 50, 115, 54.9, 101.59, 6.7]
] 
predictions = model.predict(new_data)
for i, pred in enumerate(predictions):
    print(f"Predicción para new_data[{i}] (0: No regar, 1: Regar): {pred}")

Accuracy: 1.0
MSE: 0.0
Predicción para new_data[0] (0: No regar, 1: Regar): 0
Predicción para new_data[1] (0: No regar, 1: Regar): 0
Predicción para new_data[2] (0: No regar, 1: Regar): 0
Predicción para new_data[3] (0: No regar, 1: Regar): 0
Predicción para new_data[4] (0: No regar, 1: Regar): 0
Predicción para new_data[5] (0: No regar, 1: Regar): 0
Predicción para new_data[6] (0: No regar, 1: Regar): 0
Predicción para new_data[7] (0: No regar, 1: Regar): 0
Predicción para new_data[8] (0: No regar, 1: Regar): 0
Predicción para new_data[9] (0: No regar, 1: Regar): 0
