In [None]:
import pandas as pd
df=pd.read_csv(r"tsunami\artifact\data_ingestion\2024-02-20-15-35-51\train\tsunami.csv")
features=["ID","YEAR","DAY","HOUR","LOCATION_NAME","MINUTE","LATITUDE","LONGITUDE","DAMAGE_TOTAL_DESCRIPTION","HOUSES_TOTAL_DESCRIPTION","DEATHS_TOTAL_DESCRIPTION","URL","COMMENTS"]
df=df.drop(features,axis=1)
df=df[df["CAUSE"].str.contains("Unknown")==False]
df["MONTH"]=df["MONTH"].map({1.0:"January", 2.0:"February",3.0: "March",4.0: "April", 5.0:"May", 6.0:"June",7.0: "July", 8.0:"August",9.0: "September",10.0: "October",11.0: "November",12.0: "December"})

In [None]:
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

numerical_columns=["EQ_MAGNITUDE","EQ_DEPTH","TS_INTENSITY"]
cat_columns=["MONTH","COUNTRY","REGION","CAUSE"]

num_pipeline=Pipeline(steps=[
                ('impute',SimpleImputer(strategy="median")),
                ('StandardScalar',StandardScaler())
            ])
            
            

cat_pipeline=Pipeline(steps=[
                ('imputer',SimpleImputer(strategy="most_frequent")),
                ('OneHotEncoder',OneHotEncoder()),
                ('scaler', StandardScaler(with_mean=False))
            ])
            

Preprocessing=ColumnTransformer([
                ('num_pipeline',num_pipeline,numerical_columns),
                ('cat_pipeline',cat_pipeline,cat_columns)
            ])

In [None]:
x=df.drop("EVENT_VALIDITY",axis=1)
y=df["EVENT_VALIDITY"]
x1=Preprocessing.fit_transform(X=x)

In [None]:
import os
import dill

def save_object(file_path:str,obj):
    dir_path = os.path.dirname(file_path)
    os.makedirs(dir_path, exist_ok=True)
    with open(file_path, "wb") as file_obj:
        dill.dump(obj, file_obj)
   

In [None]:
file_path=trans_config.preprocessed_object_file_path

In [None]:
save_object(file_path,x1)

In [1]:
from tsunami.components.data_ingestion import DataIngestion
from tsunami.components.data_validation import DataValidation
from tsunami.components.data_transformation import DataTransformation
from tsunami.components.model_trainer import ModelTrainer
from tsunami.config.configuration import configuration
config=configuration()

trans_config=config.get_data_transformation_config()

ing_obj=DataIngestion(config.get_data_ingestion_config())
inj_art=ing_obj.initiate_data_ingestion()


val_obj=DataValidation(config.get_data_validation_config(),
                       inj_art)

val_art=val_obj.initiate_data_validation()


trans_obj=DataTransformation(inj_art,
                             val_art,
                             trans_config)
trans_art=trans_obj.initiate_data_transformation()

model_config=config.get_mode_trainer_config()

model_trainer_obj=ModelTrainer(data_transformation_artifact=trans_art,
                               model_trainer_config=model_config)

model_trainer_obj.initiate_model_trainer()




ModelTrainerArtifact(is_trained=True, trained_model_file_path='c:\\Users\\Admin\\OneDrive\\Desktop\\Tsunami_Git\\Tsunami_Prediction_Pipeline\\tsunami\\artifact\\model_trainer\\2024-02-21-13-53-52\\trained_model\\model.pkl', model_accuracy=0.9276534983341266)

In [None]:
import pandas as pd
def load_data(file_path):
    df=pd.read_csv(file_path)
    return df
    


def load_object(file_path:str):
      with open(file_path, "rb") as file_obj:
           return dill.load(file_obj)
    

In [None]:
x=trans_art.transformed_train_file_path
y=trans_art.target_feature_file_path
preproce=load_object(trans_art.preprocessed_object_file_path)
x=load_object(file_path=x)
y=load_data(file_path=y)
from sklearn.ensemble import RandomForestClassifier
model1=RandomForestClassifier(n_estimators=100, max_depth=13)
model1=model1.fit(X=x,y=y)
predict=model1.predict(X=x)
from sklearn.metrics import accuracy_score
accuracy_score(y_true=y,y_pred=predict)

In [None]:
xtrain=r"C:\Users\Admin\OneDrive\Desktop\Tsunami_Git\Tsunami_Prediction_Pipeline\tsunami\artifact\data_transformation\2024-02-20-15-50-23\transformed_data\train\tsunami.npz"
ytrain=r"tsunami\artifact\data_transformation\2024-02-20-15-50-23\transformed_data\target_feature\target_feature.csv"
model_config=r"C:\Users\Admin\OneDrive\Desktop\Tsunami_Git\Tsunami_Prediction_Pipeline\config\model.yaml"
from tsunami.utils import load_data,load_object
xf=load_object(file_path=xtrain)
yf=load_data(file_path=ytrain)
from tsunami.entity.model_factory import evaluate_regression_model
from tsunami.entity.model_factory import ModelFactory
model_fac=ModelFactory(model_config_path=model_config)


In [None]:
model_fac.get_best_model(x=xf,y=yf,base_accuracy=0.4)

In [None]:
yf1=yf.values.ravel() 
"if using grid search"

In [None]:
from sklearn.ensemble import RandomForestClassifier
model1=RandomForestClassifier(n_estimators=130, max_depth=20)
model1=model1.fit(xf,yf1)



In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
params={"n_estimators": [130],
"max_depth": [20]}

In [None]:
grid=GridSearchCV(model1,params,cv=2,verbose=2)

In [None]:
grid.best_score_

In [None]:
yf1=yf.values.ravel()


In [None]:
grid.fit(xf,yf1)

In [None]:
m1=[model1]
type(m1)

In [None]:
evaluate_regression_model(model_list=m1, X_train=xf, y_train=yf1, base_accuracy=0.5)