<a href="https://colab.research.google.com/github/tsuchidarikuto/chatgpt-followup-extension/blob/main/%E3%82%B3%E3%83%B3%E3%83%9A2_0_74131.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ライブラリの読み込み
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#機械学習用ライブラリ
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline

#データ前処理系
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#スコア
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score


import warnings
warnings.filterwarnings('ignore')

In [None]:
# データの読み込み
# INPUT_DIRにtrain.csvなどのデータを置いているディレクトリを指定してください。
INPUT_DIR = "/content/drive/MyDrive/GCI/compe2/"

train = pd.read_csv(INPUT_DIR + "train.csv")
test = pd.read_csv(INPUT_DIR + "test.csv")
sample_sub = pd.read_csv(INPUT_DIR + "sample_submission.csv")
train.columns

Index(['SK_ID_CURR', 'TARGET', 'NAME_CONTRACT_TYPE', 'CODE_GENDER',
       'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'CNT_CHILDREN', 'AMT_INCOME_TOTAL',
       'AMT_CREDIT', 'AMT_ANNUITY', 'AMT_GOODS_PRICE', 'NAME_TYPE_SUITE',
       'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS',
       'NAME_HOUSING_TYPE', 'REGION_POPULATION_RELATIVE', 'DAYS_BIRTH',
       'DAYS_EMPLOYED', 'DAYS_REGISTRATION', 'DAYS_ID_PUBLISH', 'OWN_CAR_AGE',
       'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE',
       'FLAG_PHONE', 'FLAG_EMAIL', 'OCCUPATION_TYPE', 'CNT_FAM_MEMBERS',
       'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY',
       'REG_REGION_NOT_LIVE_REGION', 'REG_REGION_NOT_WORK_REGION',
       'LIVE_REGION_NOT_WORK_REGION', 'REG_CITY_NOT_LIVE_CITY',
       'REG_CITY_NOT_WORK_CITY', 'LIVE_CITY_NOT_WORK_CITY',
       'ORGANIZATION_TYPE', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3',
       'OBS_30_CNT_SOCIAL_CIRCLE', 'DEF_30_CNT_SOCIAL_CIRCLE',
       'OBS_60

In [None]:
# EXT_SOURCE_2の欠損値を平均値で補完
train["EXT_SOURCE_2"].fillna(train["EXT_SOURCE_2"].mean(), inplace=True)
test["EXT_SOURCE_2"].fillna(train["EXT_SOURCE_2"].mean(), inplace=True)

# NAME_CONTRACT_TYPEの数値化（Label Encoding）
train["NAME_CONTRACT_TYPE"].replace({'Cash loans': 0, 'Revolving loans': 1}, inplace=True)
test["NAME_CONTRACT_TYPE"].replace({'Cash loans': 0, 'Revolving loans': 1}, inplace=True)

# ORGANIZATION_TYPEの数値化（Count Encoding）
organization_ce = train["ORGANIZATION_TYPE"].value_counts()
train["ORGANIZATION_TYPE"] = train["ORGANIZATION_TYPE"].map(organization_ce)
test["ORGANIZATION_TYPE"] = test["ORGANIZATION_TYPE"].map(organization_ce)

# OWN_CAR_AGEの60以上の値（外れ値）を欠損値扱いする
train.loc[train["OWN_CAR_AGE"] >= 60, "OWN_CAR_AGE"] = np.nan
test.loc[test["OWN_CAR_AGE"] >= 60, "OWN_CAR_AGE"] = np.nan
train["OWN_CAR_AGE"] = train["OWN_CAR_AGE"] // 10
test["OWN_CAR_AGE"] = test["OWN_CAR_AGE"] // 10
train_car_age_ohe = pd.get_dummies(train["OWN_CAR_AGE"]).add_prefix("OWN_CAR_AGE_")
test_car_age_ohe = pd.get_dummies(test["OWN_CAR_AGE"]).add_prefix("OWN_CAR_AGE_")
train = pd.concat([train, train_car_age_ohe], axis=1)
test = pd.concat([test, test_car_age_ohe], axis=1)
train.drop('OWN_CAR_AGE', axis=1, inplace=True)
test.drop('OWN_CAR_AGE', axis=1, inplace=True)

#絶対に要らないのでIDは削除
train.drop('SK_ID_CURR',axis=1, inplace=True)
test.drop('SK_ID_CURR',axis=1, inplace=True)

#性別がXNAの行は削除
train=train[train["CODE_GENDER"]!="XNA"]
test=test[test["CODE_GENDER"]!="XNA"]

#性別でohe
train_sex_ohe = pd.get_dummies(train["CODE_GENDER"])
test_sex_ohe = pd.get_dummies(test["CODE_GENDER"])
train = pd.concat([train,train_sex_ohe],axis=1)
test = pd.concat([test,test_sex_ohe],axis=1)
train.drop("CODE_GENDER",axis=1,inplace=True)
test.drop("CODE_GENDER",axis=1,inplace=True)

#車の所有を所有年数の有無で判定
train.loc[train["FLAG_OWN_CAR"].isnull() & ~train["OWN_CAR_AGE_0.0"], "FLAG_OWN_CAR",] = 1
train.loc[train["FLAG_OWN_CAR"] == "Y", "FLAG_OWN_CAR"] = 1
train.loc[train["FLAG_OWN_CAR"]!=1,"FLAG_OWN_CAR"]=0
test.loc[test["FLAG_OWN_CAR"].isnull() & ~test["OWN_CAR_AGE_0.0"], "FLAG_OWN_CAR",] = 1
test.loc[test["FLAG_OWN_CAR"] == "Y", "FLAG_OWN_CAR"] = 1
test.loc[test["FLAG_OWN_CAR"]!=1,"FLAG_OWN_CAR"]=0

#不動産の所有を車の所有の有無、ローンの額で判定
train.loc[train["FLAG_OWN_REALTY"].isnull() & train["FLAG_OWN_CAR"] ==1,"FLAG_OWN_REALTY"] = 1
train.loc[train["FLAG_OWN_REALTY"]=="Y","FLAG_OWN_REALTY"] = 1
train.loc[train["FLAG_OWN_REALTY"]=="N","FLAG_OWN_REALTY"] = 0
train.loc[train["FLAG_OWN_REALTY"].isnull() & train["AMT_CREDIT"] >=604478,"FLAG_OWN_REALTY"] = 1
train.loc[train["FLAG_OWN_REALTY"].isnull() & train["AMT_CREDIT"] <604478,"FLAG_OWN_REALTY"] = 0
test.loc[test["FLAG_OWN_REALTY"].isnull() & test["FLAG_OWN_CAR"] ==1,"FLAG_OWN_REALTY"] = 1
test.loc[test["FLAG_OWN_REALTY"]=="Y","FLAG_OWN_REALTY"] = 1
test.loc[test["FLAG_OWN_REALTY"]=="N","FLAG_OWN_REALTY"] = 0
test.loc[test["FLAG_OWN_REALTY"].isnull() & test["AMT_CREDIT"] >=604478,"FLAG_OWN_REALTY"] = 1
test.loc[test["FLAG_OWN_REALTY"].isnull() & test["AMT_CREDIT"] <604478,"FLAG_OWN_REALTY"] = 0

#年間の返済額のnanを平均で埋める
train.loc[train["AMT_ANNUITY"].isna(),"AMT_ANNUITY"]= train["AMT_ANNUITY"].mean()
test.loc[test["AMT_ANNUITY"].isna(),"AMT_ANNUITY"]= test["AMT_ANNUITY"].mean()

#購入金額のnanを平均で埋める
train.loc[train["AMT_GOODS_PRICE"].isna(),"AMT_GOODS_PRICE"]= train["AMT_GOODS_PRICE"].mean()
test.loc[test["AMT_GOODS_PRICE"].isna(),"AMT_GOODS_PRICE"]= test["AMT_GOODS_PRICE"].mean()

#カウントエンコーディング
nametype_ce = train["NAME_TYPE_SUITE"].value_counts()
nametype_na = train["NAME_TYPE_SUITE"].isnull().sum()
train["NAME_TYPE_SUITE"] = train["NAME_TYPE_SUITE"].map(nametype_ce)
train.loc[train["NAME_TYPE_SUITE"].isna(),"NAME_TYPE_SUITE"]= nametype_na
test["NAME_TYPE_SUITE"] = test["NAME_TYPE_SUITE"].map(nametype_ce)
test.loc[test["NAME_TYPE_SUITE"].isna(),"NAME_TYPE_SUITE"]= nametype_na

#カウントエンコーディング
incometype_ce = train["NAME_INCOME_TYPE"].value_counts()
train["NAME_INCOME_TYPE"] = train["NAME_INCOME_TYPE"].map(incometype_ce)
test["NAME_INCOME_TYPE"] = test["NAME_INCOME_TYPE"].map(incometype_ce)

#カウントエンコーディング
educationtype_ce = train["NAME_EDUCATION_TYPE"].value_counts()
train["NAME_EDUCATION_TYPE"] = train["NAME_EDUCATION_TYPE"].map(educationtype_ce)
test["NAME_EDUCATION_TYPE"] = test["NAME_EDUCATION_TYPE"].map(educationtype_ce)

#カウントエンコーディング
familytype_ce = train["NAME_FAMILY_STATUS"].value_counts()
train["NAME_FAMILY_STATUS"] = train["NAME_FAMILY_STATUS"].map(familytype_ce)
test["NAME_FAMILY_STATUS"] = test["NAME_FAMILY_STATUS"].map(familytype_ce)

#カウントエンコーディング
housetype_ce = train["NAME_HOUSING_TYPE"].value_counts()
train["NAME_HOUSING_TYPE"] = train["NAME_HOUSING_TYPE"].map(housetype_ce)
test["NAME_HOUSING_TYPE"] = test["NAME_HOUSING_TYPE"].map(housetype_ce)

#マイナスはプラスに変換
train["DAYS_BIRTH"]=abs(train["DAYS_BIRTH"])
test["DAYS_BIRTH"]=abs(test["DAYS_BIRTH"])

#変な値がおおいので削除
train.drop('DAYS_EMPLOYED',axis=1, inplace=True)
test.drop('DAYS_EMPLOYED',axis=1, inplace=True)

#マイナスはプラスに変換
train["DAYS_REGISTRATION"]=abs(train["DAYS_REGISTRATION"])
test["DAYS_REGISTRATION"]=abs(test["DAYS_REGISTRATION"])

#マイナスはプラスに変換
train["DAYS_ID_PUBLISH"]=abs(train["DAYS_ID_PUBLISH"])
test["DAYS_ID_PUBLISH"]=abs(test["DAYS_ID_PUBLISH"])

#みんな持ってるので削除
train.drop('FLAG_MOBIL',axis=1, inplace=True)
test.drop('FLAG_MOBIL',axis=1, inplace=True)

#カウントエンコーディング
ocupationtype_ce = train["OCCUPATION_TYPE"].value_counts()
ocupationtype_na = train["OCCUPATION_TYPE"].isnull().sum()
train["OCCUPATION_TYPE"] = train["OCCUPATION_TYPE"].map(ocupationtype_ce)
train.loc[train["OCCUPATION_TYPE"].isna(),"OCCUPATION_TYPE"]= ocupationtype_na
test["OCCUPATION_TYPE"] = test["OCCUPATION_TYPE"].map(ocupationtype_ce)
test.loc[test["OCCUPATION_TYPE"].isna(),"OCCUPATION_TYPE"]= ocupationtype_na

#nullは削除
train=train[~train["CNT_FAM_MEMBERS"].isnull()]
test=test[~test["CNT_FAM_MEMBERS"].isnull()]

#カウントエンコーディング
orgtype_ce = train["ORGANIZATION_TYPE"].value_counts()
train["ORGANIZATION_TYPE"] = train["ORGANIZATION_TYPE"].map(orgtype_ce)
test["ORGANIZATION_TYPE"] = test["ORGANIZATION_TYPE"].map(orgtype_ce)


#欠損値が多いので削除
train["IS_SOURCE1_NAN"]=train["EXT_SOURCE_1"].isnull()
test["IS_SOURCE1_NAN"]=test["EXT_SOURCE_1"].isnull()
train.drop('EXT_SOURCE_1',axis=1, inplace=True)
test.drop('EXT_SOURCE_1',axis=1, inplace=True)


train["IS_SOURCE2_NAN"]=train["EXT_SOURCE_2"].isnull()
test["IS_SOURCE2_NAN"]=test["EXT_SOURCE_2"].isnull()

#source3の欠損値はsource2でうめる
mean_score_ratio=train["EXT_SOURCE_3"].mean()/train["EXT_SOURCE_2"].mean()
train.loc[train["EXT_SOURCE_3"].isnull(),"EXT_SOURCE_3"] = train[train["EXT_SOURCE_3"].isnull()]["EXT_SOURCE_2"]*mean_score_ratio
test.loc[test["EXT_SOURCE_3"].isnull(),"EXT_SOURCE_3"] = test[test["EXT_SOURCE_3"].isnull()]["EXT_SOURCE_2"]*mean_score_ratio

#欠損値は0で埋める
train.loc[train["OBS_30_CNT_SOCIAL_CIRCLE"].isnull(),["OBS_30_CNT_SOCIAL_CIRCLE","DEF_30_CNT_SOCIAL_CIRCLE","OBS_60_CNT_SOCIAL_CIRCLE","DEF_60_CNT_SOCIAL_CIRCLE"]]=0
test.loc[test["OBS_30_CNT_SOCIAL_CIRCLE"].isnull(),["OBS_30_CNT_SOCIAL_CIRCLE","DEF_30_CNT_SOCIAL_CIRCLE","OBS_60_CNT_SOCIAL_CIRCLE","DEF_60_CNT_SOCIAL_CIRCLE"]]=0

#0でうめ、絶対値を取る
train.loc[train["DAYS_LAST_PHONE_CHANGE"].isnull(),"DAYS_LAST_PHONE_CHANGE"]=0
train["DAYS_LAST_PHONE_CHANGE"]=abs(train["DAYS_LAST_PHONE_CHANGE"])
test.loc[test["DAYS_LAST_PHONE_CHANGE"].isnull(),"DAYS_LAST_PHONE_CHANGE"]=0
test["DAYS_LAST_PHONE_CHANGE"]=abs(test["DAYS_LAST_PHONE_CHANGE"])

train["IS_AMT_REQ_NAN"]=train["AMT_REQ_CREDIT_BUREAU_QRT"].isnull()
test["IS_AMT_REQ_NAN"]=test["AMT_REQ_CREDIT_BUREAU_QRT"].isnull()

train.loc[train["AMT_REQ_CREDIT_BUREAU_QRT"].isnull(),[ "AMT_REQ_CREDIT_BUREAU_HOUR", "AMT_REQ_CREDIT_BUREAU_MON", "AMT_REQ_CREDIT_BUREAU_QRT", "AMT_REQ_CREDIT_BUREAU_YEAR"]]=0
test.loc[test["AMT_REQ_CREDIT_BUREAU_QRT"].isnull(),[ "AMT_REQ_CREDIT_BUREAU_HOUR", "AMT_REQ_CREDIT_BUREAU_MON", "AMT_REQ_CREDIT_BUREAU_QRT", "AMT_REQ_CREDIT_BUREAU_YEAR"]]=0

train.loc[train["AMT_REQ_CREDIT_BUREAU_QRT"]>=9,"AMT_REQ_CREDIT_BUREAU_QRT"]=0
test.loc[test["AMT_REQ_CREDIT_BUREAU_QRT"]>=9,"AMT_REQ_CREDIT_BUREAU_QRT"]=0

train.columns

Index(['TARGET', 'NAME_CONTRACT_TYPE', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY',
       'CNT_CHILDREN', 'AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_ANNUITY',
       'AMT_GOODS_PRICE', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE',
       'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE',
       'REGION_POPULATION_RELATIVE', 'DAYS_BIRTH', 'DAYS_REGISTRATION',
       'DAYS_ID_PUBLISH', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE',
       'FLAG_CONT_MOBILE', 'FLAG_PHONE', 'FLAG_EMAIL', 'OCCUPATION_TYPE',
       'CNT_FAM_MEMBERS', 'REGION_RATING_CLIENT',
       'REGION_RATING_CLIENT_W_CITY', 'REG_REGION_NOT_LIVE_REGION',
       'REG_REGION_NOT_WORK_REGION', 'LIVE_REGION_NOT_WORK_REGION',
       'REG_CITY_NOT_LIVE_CITY', 'REG_CITY_NOT_WORK_CITY',
       'LIVE_CITY_NOT_WORK_CITY', 'ORGANIZATION_TYPE', 'EXT_SOURCE_2',
       'EXT_SOURCE_3', 'OBS_30_CNT_SOCIAL_CIRCLE', 'DEF_30_CNT_SOCIAL_CIRCLE',
       'OBS_60_CNT_SOCIAL_CIRCLE', 'DEF_60_CNT_SOCIAL_CIRCLE',
       'DAYS_LAST_PHONE_CHANGE', 'AMT_REQ_CREDIT_

train.drop('AMT_ANNUITY',axis=1, inplace=True)
test.drop('AMT_ANNUITY',axis=1, inplace=True)

train.drop('AMT_GOODS_PRICE',axis=1, inplace=True)
test.drop('AMT_GOODS_PRICE',axis=1, inplace=True)

train.drop('CNT_FAM_MEMBERS',axis=1, inplace=True)
test.drop('CNT_FAM_MEMBERS',axis=1, inplace=True)

train.drop('REGION_RATING_CLIENT_W_CITY',axis=1, inplace=True)
test.drop('REGION_RATING_CLIENT_W_CITY',axis=1, inplace=True)

train.drop('REG_REGION_NOT_WORK_REGION',axis=1, inplace=True)
test.drop('REG_REGION_NOT_WORK_REGION',axis=1, inplace=True)

train.drop('REG_CITY_NOT_WORK_CITY',axis=1, inplace=True)
test.drop('REG_CITY_NOT_WORK_CITY',axis=1, inplace=True)

train.drop('OBS_30_CNT_SOCIAL_CIRCLE',axis=1, inplace=True)
test.drop('OBS_30_CNT_SOCIAL_CIRCLE',axis=1, inplace=True)

train.drop('DEF_30_CNT_SOCIAL_CIRCLE',axis=1, inplace=True)
test.drop('DEF_30_CNT_SOCIAL_CIRCLE',axis=1, inplace=True)


In [None]:
X = train.drop("TARGET",axis=1).values
y = train["TARGET"].values
X_test = test.values

In [None]:
from sklearn.preprocessing import PolynomialFeatures
# 交差項作成器の作成
pf = PolynomialFeatures(degree=2, include_bias=False, interaction_only=True)

# 交差項の作成
X_pf = pf.fit_transform(X)
X_test_pf=pf.fit_transform(X_test)


In [None]:
# 標準化
sc = StandardScaler()
sc.fit(X)
X_std = sc.transform(X)
X_test_std = sc.transform(X_test)

In [None]:
# 訓練データと評価データに分割
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

!pip install optuna

from gc import collect                                # ガーベッジコレクション
from lightgbm import LGBMClassifier as LGBMC
import optuna
from optuna import Trial, trial, create_study
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

from sklearn.model_selection import cross_val_score
from lightgbm import LGBMClassifier
import optuna
import numpy as np

# Optunaの目的関数を定義
def objective(trial):
    params = {
        'objective': 'binary',
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 100, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 300, 1000),
        'max_depth': trial.suggest_int('max_depth', 1, 15),
    }
    model = LGBMClassifier(**params)

    # cross_val_scoreを使用してスコアを計算
    scores = cross_val_score(
        model,
        X_train,
        y_train,
        cv=5,  # StratifiedKFoldの代わりに分割数を指定
        scoring='roc_auc'
    )
    return np.mean(scores)

# Optunaのスタディを作成して最適化
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)


# 結果を出力
print(f'Best objective value: {study.best_value}')
print(f'Best parameter: {study.best_params}')

In [None]:
# XGBClassifier
from xgboost import XGBClassifier

xgb = XGBClassifier(random_state=42)
xgb.fit(X_train, y_train)

xgb_train_pred = xgb.predict_proba(X_train)[:, 1]
xgb_valid_pred = xgb.predict_proba(X_valid)[:, 1]
print(f"Train Score: {roc_auc_score(y_train, xgb_train_pred)}")
print(f"Valid Score: {roc_auc_score(y_valid, xgb_valid_pred)}")

Train Score: 0.8919904583088274
Valid Score: 0.7235700456723569


In [None]:
# LogisticRegression
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(random_state=0)
lr.fit(X_train, y_train)

lr_train_pred = lr.predict_proba(X_train)[:, 1]
lr_valid_pred = lr.predict_proba(X_valid)[:, 1]
print(f"Train Score: {roc_auc_score(y_train, lr_train_pred)}")
print(f"Valid Score: {roc_auc_score(y_valid, lr_valid_pred)}")

Train Score: 0.6236966605957391
Valid Score: 0.6207716322990626


In [None]:
# XGBClassifier
from xgboost import XGBClassifier

xgb = XGBClassifier(random_state=0)
xgb.fit(X_train, y_train)

xgb_train_pred = xgb.predict_proba(X_train)[:, 1]
xgb_valid_pred = xgb.predict_proba(X_valid)[:, 1]
print(f"Train Score: {roc_auc_score(y_train, xgb_train_pred)}")
print(f"Valid Score: {roc_auc_score(y_valid, xgb_valid_pred)}")

Train Score: 0.8919904583088274
Valid Score: 0.7235700456723569


In [None]:
# LGBMClassifier
from lightgbm import LGBMClassifier

lgb = LGBMClassifier(random_state=42, max_depth= 5, subsample= 0.9011436382317181, colsample_bytree= 0.2073310201425725,num_leaves= 95,reg_alpha= 0.5082743117069234, reg_lambda=0.45645588322121566)
lgb.fit(X_train, y_train)

lgb_train_pred = lgb.predict_proba(X_train)[:, 1]
lgb_valid_pred = lgb.predict_proba(X_valid)[:, 1]
print(f"Train Score: {roc_auc_score(y_train, lgb_train_pred)}")
print(f"Valid Score: {roc_auc_score(y_valid, lgb_valid_pred)}")

[LightGBM] [Info] Number of positive: 9675, number of negative: 110162
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.042157 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2870
[LightGBM] [Info] Number of data points in the train set: 119837, number of used features: 52
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080735 -> initscore=-2.432407
[LightGBM] [Info] Start training from score -2.432407
Train Score: 0.7810835102280893
Valid Score: 0.7394328362695448


In [None]:
# テストデータに対する予測値の作成
pred = lgb.predict_proba(X_test)[:, 1]
# 予測結果を提出用のフォーマットに格納
sample_sub['TARGET'] = pred
name='submission_202412102049__739.csv'
# 提出用のcsvファイルを作成
sample_sub.to_csv(name,index=False)

from google.colab import files

# ファイルをダウンロード
files.download(name)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
sample_sub

Unnamed: 0,SK_ID_CURR,TARGET
0,171202,0.016838
1,171203,0.129536
2,171204,0.134549
3,171205,0.096082
4,171206,0.142569
...,...,...
61495,232697,0.100266
61496,232698,0.046713
61497,232699,0.042678
61498,232700,0.144745


In [None]:
test

Unnamed: 0,NAME_CONTRACT_TYPE,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,...,OWN_CAR_AGE_1.0,OWN_CAR_AGE_2.0,OWN_CAR_AGE_3.0,OWN_CAR_AGE_4.0,OWN_CAR_AGE_5.0,F,M,IS_SOURCE1_NAN,IS_SOURCE2_NAN,IS_AMT_REQ_NAN
0,0,0,0,1,144000.0,961146.0,28233.0,688500.0,138442.0,88265,...,False,False,False,False,False,True,False,True,False,True
1,0,0,0,0,103500.0,296280.0,16069.5,225000.0,138442.0,88265,...,False,False,False,False,False,True,False,True,False,True
2,0,0,0,1,180000.0,183694.5,11236.5,139500.0,1775.0,40006,...,False,False,False,False,False,True,False,True,False,False
3,1,0,0,2,225000.0,450000.0,22500.0,450000.0,138442.0,88265,...,False,False,False,False,False,True,False,True,False,False
4,0,1,0,2,144000.0,545040.0,26640.0,450000.0,138442.0,88265,...,True,False,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61495,0,1,0,0,315000.0,1288350.0,37800.0,1125000.0,22355.0,40006,...,True,False,False,False,False,True,False,False,False,False
61496,0,1,0,0,90000.0,273636.0,15408.0,247500.0,138442.0,88265,...,False,False,False,False,False,True,False,True,False,False
61497,0,0,0,0,144000.0,291384.0,26725.5,270000.0,22355.0,40006,...,False,False,False,False,False,True,False,False,False,False
61498,0,0,0,1,193500.0,746280.0,59094.0,675000.0,138442.0,12007,...,False,False,False,False,False,True,False,False,False,False
