In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split


# Read the data
X_full = pd.read_csv('/kaggle/input/icds2021-mini-hackathon/train_data.csv', index_col='ID')
X_test = pd.read_csv('/kaggle/input/icds2021-mini-hackathon/test_data.csv', index_col='ID')

# Remove rows with missing target, separate target from predictors
X_full.dropna(axis=0, subset=['app_status'], inplace=True)
y = X_full.app_status
X_full.drop(['app_status'], axis=1, inplace=True)

# Break off validation set from training data
X_train, X_valid, y_train, y_valid = train_test_split(X_full, y,test_size=0.2)

In [None]:
from xgboost import XGBClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, X_train.columns)
    ])

# Define model
model =  XGBClassifier(use_label_encoder=False,
                                  eval_metric='mlogloss')

# Bundle preprocessing and modeling code in a pipeline
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('model', model)
                     ])

# Preprocessing of training data, fit model 
clf.fit(X_train, y_train)

# Preprocessing of validation data, get predictions
y_pred =clf.predict(X_valid)
print(accuracy_score(y_valid,y_pred))

In [None]:
#confusion matrix
print(confusion_matrix(y_valid, y_pred))

In [None]:
# classification report
print(classification_report(y_valid,y_pred))

In [None]:
# Preprocessing of test data, fit model
out_put=clf.predict(X_test)

In [None]:
# Save test predictions to file
output = pd.DataFrame({'ID': X_test.index,
                       'app_status': out_put})
output.to_csv('submission.csv', index=False)