In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from scipy.stats import randint, uniform


train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
ss = pd.read_csv("sample_sub.csv")


def preprocess_data(df):

    df["Item_Weight"].fillna(df.Item_Weight.mean(), inplace=True)


    df['Item_Fat_Content'] = df['Item_Fat_Content'].replace({
        'Regular': 1, 'reg': 1, 'Low Fat': 0, 'low fat': 0, 'LF': 0
    }).astype(int)


    df['Item_Visibility'] = np.where(df['Item_Visibility'] == 0, df['Item_Visibility'].mean(), df['Item_Visibility'])


    df['Item_Identifier'] = df['Item_Identifier'].str.slice(0, 2)


    df['Years_of_Operation'] = 2013 - df['Outlet_Establishment_Year']

    return df

train = preprocess_data(train)
test = preprocess_data(test)


y_train = train['Item_Outlet_Sales']
X_train = train.drop(['Item_Outlet_Sales', 'Outlet_Establishment_Year'], axis=1)



le = LabelEncoder()
for col in X_train.select_dtypes(include=['object']):
    X_train[col] = le.fit_transform(X_train[col].astype(str))
    test[col] = le.transform(test[col].astype(str))


model_params={}
model = LGBMRegressor(**model_params)




param_dist = {
    'n_estimators': randint(50, 200),
    'learning_rate': uniform(0.01, 0.3),
    'max_depth': randint(3, 8),
    'min_child_samples': randint(20, 150),
    'reg_lambda': uniform(0.001, 0.1),
    'subsample': uniform(0.6, 1),  
}




random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_dist,
    n_iter=20,
    cv=5,
    random_state=2020,
    n_jobs=-1
)

random_search.fit(X_train, y_train)


best_model = random_search.best_estimator_
print("Best Parameters Found:\n", random_search.best_params_)


test_pred = best_model.predict(test[X_train.columns])





ss['Item_Outlet_Sales'] = test_pred
ss.to_csv('bigm3rt_predictions_optimized.csv', index=False)

