# Imports

In [70]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import IsolationForest
from sklearn.metrics import average_precision_score, roc_auc_score


In [72]:
# --------------------
# Load data
# --------------------
train_df = pd.read_csv(r'sample_loans_train.csv')
valid_df = pd.read_csv(r'sample_loans_valid.csv')

In [74]:
 pd.set_option('display.max_columns', None)
train_df.head()

Unnamed: 0,index,target,CreditScore,FirstPaymentDate,FirstTimeHomebuyerFlag,MaturityDate,MSA,MI_Pct,NumberOfUnits,OccupancyStatus,OriginalCLTV,OriginalDTI,OriginalUPB,OriginalLTV,OriginalInterestRate,Channel,PPM_Flag,ProductType,PropertyState,PropertyType,PostalCode,LoanPurpose,OriginalLoanTerm,NumberOfBorrowers,SellerName,ServicerName,SuperConformingFlag,PreHARP_Flag,ProgramIndicator,ReliefRefinanceIndicator,PropertyValMethod,InterestOnlyFlag,BalloonIndicator,0_CurrentActualUPB,0_CurrentInterestRate,0_CurrentNonInterestBearingUPB,0_EstimatedLTV,0_InterestBearingUPB,0_LoanAge,0_MonthlyReportingPeriod,0_RemainingMonthsToLegalMaturity,1_CurrentActualUPB,1_CurrentInterestRate,1_CurrentNonInterestBearingUPB,1_EstimatedLTV,1_InterestBearingUPB,1_LoanAge,1_MonthlyReportingPeriod,1_RemainingMonthsToLegalMaturity,2_CurrentActualUPB,2_CurrentInterestRate,2_CurrentNonInterestBearingUPB,2_EstimatedLTV,2_InterestBearingUPB,2_LoanAge,2_MonthlyReportingPeriod,2_RemainingMonthsToLegalMaturity,3_CurrentActualUPB,3_CurrentInterestRate,3_CurrentNonInterestBearingUPB,3_EstimatedLTV,3_InterestBearingUPB,3_LoanAge,3_MonthlyReportingPeriod,3_RemainingMonthsToLegalMaturity,4_CurrentActualUPB,4_CurrentInterestRate,4_CurrentNonInterestBearingUPB,4_EstimatedLTV,4_InterestBearingUPB,4_LoanAge,4_MonthlyReportingPeriod,4_RemainingMonthsToLegalMaturity,5_CurrentActualUPB,5_CurrentInterestRate,5_CurrentNonInterestBearingUPB,5_EstimatedLTV,5_InterestBearingUPB,5_LoanAge,5_MonthlyReportingPeriod,5_RemainingMonthsToLegalMaturity,6_CurrentActualUPB,6_CurrentInterestRate,6_CurrentNonInterestBearingUPB,6_EstimatedLTV,6_InterestBearingUPB,6_LoanAge,6_MonthlyReportingPeriod,6_RemainingMonthsToLegalMaturity,7_CurrentActualUPB,7_CurrentInterestRate,7_CurrentNonInterestBearingUPB,7_EstimatedLTV,7_InterestBearingUPB,7_LoanAge,7_MonthlyReportingPeriod,7_RemainingMonthsToLegalMaturity,8_CurrentActualUPB,8_CurrentInterestRate,8_CurrentNonInterestBearingUPB,8_EstimatedLTV,8_InterestBearingUPB,8_LoanAge,8_MonthlyReportingPeriod,8_RemainingMonthsToLegalMaturity,9_CurrentActualUPB,9_CurrentInterestRate,9_CurrentNonInterestBearingUPB,9_EstimatedLTV,9_InterestBearingUPB,9_LoanAge,9_MonthlyReportingPeriod,9_RemainingMonthsToLegalMaturity,10_CurrentActualUPB,10_CurrentInterestRate,10_CurrentNonInterestBearingUPB,10_EstimatedLTV,10_InterestBearingUPB,10_LoanAge,10_MonthlyReportingPeriod,10_RemainingMonthsToLegalMaturity,11_CurrentActualUPB,11_CurrentInterestRate,11_CurrentNonInterestBearingUPB,11_EstimatedLTV,11_InterestBearingUPB,11_LoanAge,11_MonthlyReportingPeriod,11_RemainingMonthsToLegalMaturity,12_CurrentActualUPB,12_CurrentInterestRate,12_CurrentNonInterestBearingUPB,12_EstimatedLTV,12_InterestBearingUPB,12_LoanAge,12_MonthlyReportingPeriod,12_RemainingMonthsToLegalMaturity,13_CurrentActualUPB,13_CurrentInterestRate,13_CurrentNonInterestBearingUPB,13_EstimatedLTV,13_InterestBearingUPB,13_LoanAge,13_MonthlyReportingPeriod,13_RemainingMonthsToLegalMaturity
0,0,0,747,202403,N,205402,,0,1,P,67,26,82000,67,8.0,R,N,FRM,KS,MH,67600,P,360,1,Other sellers,Other servicers,,,9,,2,N,7,82000.0,8.0,0.0,999,82000.0,0,202402,360,82000.0,8.0,0.0,999,82000.0,1,202403,359,82000.0,8.0,0.0,999,82000.0,2,202404,358,82000.0,8.0,0.0,999,82000.0,3,202405,357,82000.0,8.0,0.0,999,82000.0,4,202406,356,82000.0,8.0,0.0,999,82000.0,5,202407,355,82000.0,8.0,0.0,999,82000.0,6,202408,354,81546.88,8.0,0.0,999,81546.88,7,202409,353,81488.53,8.0,0.0,999,81488.53,8,202410,352,81429.79,8.0,0.0,999,81429.79,9,202411,351,81370.66,8.0,0.0,999,81370.66,10,202412,350,81311.13,8.0,0.0,999,81311.13,11,202501,349,81251.2,8.0,0.0,999,81251.2,12,202502,348,81190.87,8.0,0.0,999,81190.87,13,202503,347
1,1,0,659,202403,N,205402,,0,1,P,68,32,305000,68,7.875,R,N,FRM,MI,SF,49000,P,360,2,Other sellers,Other servicers,,,9,,2,N,7,305000.0,7.875,0.0,67,305000.0,0,202402,360,305000.0,7.875,0.0,66,305000.0,1,202403,359,304000.0,7.875,0.0,64,304000.0,2,202404,358,304000.0,7.875,0.0,63,304000.0,3,202405,357,304000.0,7.875,0.0,61,304000.0,4,202406,356,304000.0,7.875,0.0,63,304000.0,5,202407,355,304000.0,7.875,0.0,66,304000.0,6,202408,354,303281.74,7.875,0.0,66,303281.74,7,202409,353,303060.57,7.875,0.0,66,303060.57,8,202410,352,302837.94,7.875,0.0,64,302837.94,9,202411,351,302613.85,7.875,0.0,63,302613.85,10,202412,350,302388.29,7.875,0.0,63,302388.29,11,202501,349,302161.25,7.875,0.0,64,302161.25,12,202502,348,301932.72,7.875,0.0,64,301932.72,13,202503,347
2,3,0,775,202403,N,205402,46540.0,0,2,I,70,15,154000,70,7.625,R,N,FRM,NY,SF,13500,C,360,2,Other sellers,Other servicers,,,9,,2,N,7,154000.0,7.625,0.0,72,154000.0,0,202402,360,154000.0,7.625,0.0,73,154000.0,1,202403,359,154000.0,7.625,0.0,72,154000.0,2,202404,358,154000.0,7.625,0.0,75,154000.0,3,202405,357,153000.0,7.625,0.0,999,153000.0,4,202406,356,153000.0,7.625,0.0,999,153000.0,5,202407,355,153000.0,7.625,0.0,999,153000.0,6,202408,354,152967.02,7.625,0.0,999,152967.02,7,202409,353,152815.68,7.625,0.0,999,152815.68,8,202410,352,152663.38,7.625,0.0,999,152663.38,9,202411,351,152510.11,7.625,0.0,999,152510.11,10,202412,350,152355.86,7.625,0.0,999,152355.86,11,202501,349,152200.63,7.625,0.0,999,152200.63,12,202502,348,152044.42,7.625,0.0,999,152044.42,13,202503,347
3,4,0,815,202403,Y,205402,,25,1,P,88,49,51000,88,7.25,R,N,FRM,IL,SF,61400,P,360,1,Other sellers,Other servicers,,,H,,2,N,N,51000.0,7.25,0.0,87,51000.0,0,202402,360,51000.0,7.25,0.0,87,51000.0,1,202403,359,51000.0,7.25,0.0,84,51000.0,2,202404,358,50000.0,7.25,0.0,79,50000.0,3,202405,357,50000.0,7.25,0.0,78,50000.0,4,202406,356,50000.0,7.25,0.0,78,50000.0,5,202407,355,50000.0,7.25,0.0,84,50000.0,6,202408,354,50309.53,7.25,0.0,85,50309.53,7,202409,353,50266.25,7.25,0.0,84,50266.25,8,202410,352,50222.71,7.25,0.0,78,50222.71,9,202411,351,50222.71,7.25,0.0,76,50222.71,10,202412,350,50134.84,7.25,0.0,78,50134.84,11,202501,349,50090.51,7.25,0.0,72,50090.51,12,202502,348,50045.91,7.25,0.0,70,50045.91,13,202503,347
4,6,0,772,202403,N,205402,10900.0,0,1,P,69,41,155000,69,7.75,R,N,FRM,PA,SF,18000,C,360,1,Other sellers,NATIONSTAR MORTGAGE LLC DBA MR. COOPER,,,9,,2,N,7,155000.0,7.75,0.0,71,155000.0,0,202402,360,155000.0,7.75,0.0,70,155000.0,1,202403,359,155000.0,7.75,0.0,68,155000.0,2,202404,358,155000.0,7.75,0.0,66,155000.0,3,202405,357,154000.0,7.75,0.0,65,154000.0,4,202406,356,154000.0,7.75,0.0,63,154000.0,5,202407,355,154000.0,7.75,0.0,64,154000.0,6,202408,354,154104.77,7.75,0.0,65,154104.77,7,202409,353,153989.59,7.75,0.0,63,153989.59,8,202410,352,153873.67,7.75,0.0,63,153873.67,9,202411,351,153873.67,7.75,0.0,65,153873.67,10,202412,350,153757.0,7.75,0.0,64,153757.0,11,202501,349,153639.57,7.75,0.0,65,153639.57,12,202502,348,153521.39,7.75,0.0,63,153521.39,13,202503,347


In [80]:
train_df[train_df.target==0].head(1)

Unnamed: 0,index,target,CreditScore,FirstPaymentDate,FirstTimeHomebuyerFlag,MaturityDate,MSA,MI_Pct,NumberOfUnits,OccupancyStatus,OriginalCLTV,OriginalDTI,OriginalUPB,OriginalLTV,OriginalInterestRate,Channel,PPM_Flag,ProductType,PropertyState,PropertyType,PostalCode,LoanPurpose,OriginalLoanTerm,NumberOfBorrowers,SellerName,ServicerName,SuperConformingFlag,PreHARP_Flag,ProgramIndicator,ReliefRefinanceIndicator,PropertyValMethod,InterestOnlyFlag,BalloonIndicator,0_CurrentActualUPB,0_CurrentInterestRate,0_CurrentNonInterestBearingUPB,0_EstimatedLTV,0_InterestBearingUPB,0_LoanAge,0_MonthlyReportingPeriod,0_RemainingMonthsToLegalMaturity,1_CurrentActualUPB,1_CurrentInterestRate,1_CurrentNonInterestBearingUPB,1_EstimatedLTV,1_InterestBearingUPB,1_LoanAge,1_MonthlyReportingPeriod,1_RemainingMonthsToLegalMaturity,2_CurrentActualUPB,2_CurrentInterestRate,2_CurrentNonInterestBearingUPB,2_EstimatedLTV,2_InterestBearingUPB,2_LoanAge,2_MonthlyReportingPeriod,2_RemainingMonthsToLegalMaturity,3_CurrentActualUPB,3_CurrentInterestRate,3_CurrentNonInterestBearingUPB,3_EstimatedLTV,3_InterestBearingUPB,3_LoanAge,3_MonthlyReportingPeriod,3_RemainingMonthsToLegalMaturity,4_CurrentActualUPB,4_CurrentInterestRate,4_CurrentNonInterestBearingUPB,4_EstimatedLTV,4_InterestBearingUPB,4_LoanAge,4_MonthlyReportingPeriod,4_RemainingMonthsToLegalMaturity,5_CurrentActualUPB,5_CurrentInterestRate,5_CurrentNonInterestBearingUPB,5_EstimatedLTV,5_InterestBearingUPB,5_LoanAge,5_MonthlyReportingPeriod,5_RemainingMonthsToLegalMaturity,6_CurrentActualUPB,6_CurrentInterestRate,6_CurrentNonInterestBearingUPB,6_EstimatedLTV,6_InterestBearingUPB,6_LoanAge,6_MonthlyReportingPeriod,6_RemainingMonthsToLegalMaturity,7_CurrentActualUPB,7_CurrentInterestRate,7_CurrentNonInterestBearingUPB,7_EstimatedLTV,7_InterestBearingUPB,7_LoanAge,7_MonthlyReportingPeriod,7_RemainingMonthsToLegalMaturity,8_CurrentActualUPB,8_CurrentInterestRate,8_CurrentNonInterestBearingUPB,8_EstimatedLTV,8_InterestBearingUPB,8_LoanAge,8_MonthlyReportingPeriod,8_RemainingMonthsToLegalMaturity,9_CurrentActualUPB,9_CurrentInterestRate,9_CurrentNonInterestBearingUPB,9_EstimatedLTV,9_InterestBearingUPB,9_LoanAge,9_MonthlyReportingPeriod,9_RemainingMonthsToLegalMaturity,10_CurrentActualUPB,10_CurrentInterestRate,10_CurrentNonInterestBearingUPB,10_EstimatedLTV,10_InterestBearingUPB,10_LoanAge,10_MonthlyReportingPeriod,10_RemainingMonthsToLegalMaturity,11_CurrentActualUPB,11_CurrentInterestRate,11_CurrentNonInterestBearingUPB,11_EstimatedLTV,11_InterestBearingUPB,11_LoanAge,11_MonthlyReportingPeriod,11_RemainingMonthsToLegalMaturity,12_CurrentActualUPB,12_CurrentInterestRate,12_CurrentNonInterestBearingUPB,12_EstimatedLTV,12_InterestBearingUPB,12_LoanAge,12_MonthlyReportingPeriod,12_RemainingMonthsToLegalMaturity,13_CurrentActualUPB,13_CurrentInterestRate,13_CurrentNonInterestBearingUPB,13_EstimatedLTV,13_InterestBearingUPB,13_LoanAge,13_MonthlyReportingPeriod,13_RemainingMonthsToLegalMaturity
0,0,0,747,202403,N,205402,,0,1,P,67,26,82000,67,8.0,R,N,FRM,KS,MH,67600,P,360,1,Other sellers,Other servicers,,,9,,2,N,7,82000.0,8.0,0.0,999,82000.0,0,202402,360,82000.0,8.0,0.0,999,82000.0,1,202403,359,82000.0,8.0,0.0,999,82000.0,2,202404,358,82000.0,8.0,0.0,999,82000.0,3,202405,357,82000.0,8.0,0.0,999,82000.0,4,202406,356,82000.0,8.0,0.0,999,82000.0,5,202407,355,82000.0,8.0,0.0,999,82000.0,6,202408,354,81546.88,8.0,0.0,999,81546.88,7,202409,353,81488.53,8.0,0.0,999,81488.53,8,202410,352,81429.79,8.0,0.0,999,81429.79,9,202411,351,81370.66,8.0,0.0,999,81370.66,10,202412,350,81311.13,8.0,0.0,999,81311.13,11,202501,349,81251.2,8.0,0.0,999,81251.2,12,202502,348,81190.87,8.0,0.0,999,81190.87,13,202503,347


In [82]:
valid_df[valid_df.target==1].head(1)

Unnamed: 0,index,target,CreditScore,FirstPaymentDate,FirstTimeHomebuyerFlag,MaturityDate,MSA,MI_Pct,NumberOfUnits,OccupancyStatus,OriginalCLTV,OriginalDTI,OriginalUPB,OriginalLTV,OriginalInterestRate,Channel,PPM_Flag,ProductType,PropertyState,PropertyType,PostalCode,LoanPurpose,OriginalLoanTerm,NumberOfBorrowers,SellerName,ServicerName,SuperConformingFlag,PreHARP_Flag,ProgramIndicator,ReliefRefinanceIndicator,PropertyValMethod,InterestOnlyFlag,BalloonIndicator,0_CurrentActualUPB,0_CurrentInterestRate,0_CurrentNonInterestBearingUPB,0_EstimatedLTV,0_InterestBearingUPB,0_LoanAge,0_MonthlyReportingPeriod,0_RemainingMonthsToLegalMaturity,1_CurrentActualUPB,1_CurrentInterestRate,1_CurrentNonInterestBearingUPB,1_EstimatedLTV,1_InterestBearingUPB,1_LoanAge,1_MonthlyReportingPeriod,1_RemainingMonthsToLegalMaturity,2_CurrentActualUPB,2_CurrentInterestRate,2_CurrentNonInterestBearingUPB,2_EstimatedLTV,2_InterestBearingUPB,2_LoanAge,2_MonthlyReportingPeriod,2_RemainingMonthsToLegalMaturity,3_CurrentActualUPB,3_CurrentInterestRate,3_CurrentNonInterestBearingUPB,3_EstimatedLTV,3_InterestBearingUPB,3_LoanAge,3_MonthlyReportingPeriod,3_RemainingMonthsToLegalMaturity,4_CurrentActualUPB,4_CurrentInterestRate,4_CurrentNonInterestBearingUPB,4_EstimatedLTV,4_InterestBearingUPB,4_LoanAge,4_MonthlyReportingPeriod,4_RemainingMonthsToLegalMaturity,5_CurrentActualUPB,5_CurrentInterestRate,5_CurrentNonInterestBearingUPB,5_EstimatedLTV,5_InterestBearingUPB,5_LoanAge,5_MonthlyReportingPeriod,5_RemainingMonthsToLegalMaturity,6_CurrentActualUPB,6_CurrentInterestRate,6_CurrentNonInterestBearingUPB,6_EstimatedLTV,6_InterestBearingUPB,6_LoanAge,6_MonthlyReportingPeriod,6_RemainingMonthsToLegalMaturity,7_CurrentActualUPB,7_CurrentInterestRate,7_CurrentNonInterestBearingUPB,7_EstimatedLTV,7_InterestBearingUPB,7_LoanAge,7_MonthlyReportingPeriod,7_RemainingMonthsToLegalMaturity,8_CurrentActualUPB,8_CurrentInterestRate,8_CurrentNonInterestBearingUPB,8_EstimatedLTV,8_InterestBearingUPB,8_LoanAge,8_MonthlyReportingPeriod,8_RemainingMonthsToLegalMaturity,9_CurrentActualUPB,9_CurrentInterestRate,9_CurrentNonInterestBearingUPB,9_EstimatedLTV,9_InterestBearingUPB,9_LoanAge,9_MonthlyReportingPeriod,9_RemainingMonthsToLegalMaturity,10_CurrentActualUPB,10_CurrentInterestRate,10_CurrentNonInterestBearingUPB,10_EstimatedLTV,10_InterestBearingUPB,10_LoanAge,10_MonthlyReportingPeriod,10_RemainingMonthsToLegalMaturity,11_CurrentActualUPB,11_CurrentInterestRate,11_CurrentNonInterestBearingUPB,11_EstimatedLTV,11_InterestBearingUPB,11_LoanAge,11_MonthlyReportingPeriod,11_RemainingMonthsToLegalMaturity,12_CurrentActualUPB,12_CurrentInterestRate,12_CurrentNonInterestBearingUPB,12_EstimatedLTV,12_InterestBearingUPB,12_LoanAge,12_MonthlyReportingPeriod,12_RemainingMonthsToLegalMaturity,13_CurrentActualUPB,13_CurrentInterestRate,13_CurrentNonInterestBearingUPB,13_EstimatedLTV,13_InterestBearingUPB,13_LoanAge,13_MonthlyReportingPeriod,13_RemainingMonthsToLegalMaturity
13,124,1,676,202403,Y,205402,35380.0,0,1,P,73,49,197000,73,6.875,R,N,FRM,LA,PU,70000,P,360,2,Other sellers,NATIONSTAR MORTGAGE LLC DBA MR. COOPER,,,9,,2,N,7,197000.0,6.875,0.0,74,197000.0,0,202402,360,197000.0,6.875,0.0,71,197000.0,1,202403,359,197000.0,6.875,0.0,70,197000.0,2,202404,358,197000.0,6.875,0.0,70,197000.0,3,202405,357,197000.0,6.875,0.0,70,197000.0,4,202406,356,196000.0,6.875,0.0,71,196000.0,5,202407,355,196000.0,6.875,0.0,72,196000.0,6,202408,354,196089.77,6.875,0.0,72,196089.77,7,202409,353,195917.28,6.875,0.0,72,195917.28,8,202410,352,195743.8,6.875,0.0,72,195743.8,9,202411,351,195569.33,6.875,0.0,71,195569.33,10,202412,350,195393.86,6.875,0.0,70,195393.86,11,202501,349,195217.38,6.875,0.0,71,195217.38,12,202502,348,195217.38,6.875,0.0,72,195217.38,13,202503,347


In [84]:
valid_df.describe()

Unnamed: 0,index,target,CreditScore,FirstPaymentDate,MaturityDate,MSA,MI_Pct,NumberOfUnits,OriginalCLTV,OriginalDTI,OriginalUPB,OriginalLTV,OriginalInterestRate,PostalCode,OriginalLoanTerm,NumberOfBorrowers,PreHARP_Flag,ReliefRefinanceIndicator,PropertyValMethod,0_CurrentActualUPB,0_CurrentInterestRate,0_CurrentNonInterestBearingUPB,0_EstimatedLTV,0_InterestBearingUPB,0_LoanAge,0_MonthlyReportingPeriod,0_RemainingMonthsToLegalMaturity,1_CurrentActualUPB,1_CurrentInterestRate,1_CurrentNonInterestBearingUPB,1_EstimatedLTV,1_InterestBearingUPB,1_LoanAge,1_MonthlyReportingPeriod,1_RemainingMonthsToLegalMaturity,2_CurrentActualUPB,2_CurrentInterestRate,2_CurrentNonInterestBearingUPB,2_EstimatedLTV,2_InterestBearingUPB,2_LoanAge,2_MonthlyReportingPeriod,2_RemainingMonthsToLegalMaturity,3_CurrentActualUPB,3_CurrentInterestRate,3_CurrentNonInterestBearingUPB,3_EstimatedLTV,3_InterestBearingUPB,3_LoanAge,3_MonthlyReportingPeriod,3_RemainingMonthsToLegalMaturity,4_CurrentActualUPB,4_CurrentInterestRate,4_CurrentNonInterestBearingUPB,4_EstimatedLTV,4_InterestBearingUPB,4_LoanAge,4_MonthlyReportingPeriod,4_RemainingMonthsToLegalMaturity,5_CurrentActualUPB,5_CurrentInterestRate,5_CurrentNonInterestBearingUPB,5_EstimatedLTV,5_InterestBearingUPB,5_LoanAge,5_MonthlyReportingPeriod,5_RemainingMonthsToLegalMaturity,6_CurrentActualUPB,6_CurrentInterestRate,6_CurrentNonInterestBearingUPB,6_EstimatedLTV,6_InterestBearingUPB,6_LoanAge,6_MonthlyReportingPeriod,6_RemainingMonthsToLegalMaturity,7_CurrentActualUPB,7_CurrentInterestRate,7_CurrentNonInterestBearingUPB,7_EstimatedLTV,7_InterestBearingUPB,7_LoanAge,7_MonthlyReportingPeriod,7_RemainingMonthsToLegalMaturity,8_CurrentActualUPB,8_CurrentInterestRate,8_CurrentNonInterestBearingUPB,8_EstimatedLTV,8_InterestBearingUPB,8_LoanAge,8_MonthlyReportingPeriod,8_RemainingMonthsToLegalMaturity,9_CurrentActualUPB,9_CurrentInterestRate,9_CurrentNonInterestBearingUPB,9_EstimatedLTV,9_InterestBearingUPB,9_LoanAge,9_MonthlyReportingPeriod,9_RemainingMonthsToLegalMaturity,10_CurrentActualUPB,10_CurrentInterestRate,10_CurrentNonInterestBearingUPB,10_EstimatedLTV,10_InterestBearingUPB,10_LoanAge,10_MonthlyReportingPeriod,10_RemainingMonthsToLegalMaturity,11_CurrentActualUPB,11_CurrentInterestRate,11_CurrentNonInterestBearingUPB,11_EstimatedLTV,11_InterestBearingUPB,11_LoanAge,11_MonthlyReportingPeriod,11_RemainingMonthsToLegalMaturity,12_CurrentActualUPB,12_CurrentInterestRate,12_CurrentNonInterestBearingUPB,12_EstimatedLTV,12_InterestBearingUPB,12_LoanAge,12_MonthlyReportingPeriod,12_RemainingMonthsToLegalMaturity,13_CurrentActualUPB,13_CurrentInterestRate,13_CurrentNonInterestBearingUPB,13_EstimatedLTV,13_InterestBearingUPB,13_LoanAge,13_MonthlyReportingPeriod,13_RemainingMonthsToLegalMaturity
count,1075.0,1075.0,1075.0,1075.0,1075.0,903.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,0.0,0.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0,1075.0
mean,4991.012093,0.126512,746.16093,202402.966512,205328.663256,28960.157254,10.157209,1.031628,75.405581,37.43907,295063.3,75.302326,6.799724,49413.209302,351.202791,1.458605,,,1.91814,294438.1,6.799724,0.0,163.524651,294438.1,0.033488,202402.0,351.169302,292861.4,6.799724,0.0,146.306047,292861.4,1.033488,202403.0,350.169302,290869.8,6.799724,0.0,145.35907,290869.8,2.033488,202404.0,349.169302,288852.1,6.799724,0.0,141.263256,288852.1,3.033488,202405.0,348.169302,287934.0,6.799724,0.0,138.875349,287934.0,4.033488,202406.0,347.169302,287454.0,6.799724,0.0,139.835349,287454.0,5.033488,202407.0,346.169302,286834.3,6.799724,0.0,141.791628,286834.3,6.033488,202408.0,345.169302,286166.3,6.799724,0.0,140.796279,286166.3,7.033488,202409.0,344.169302,285546.1,6.799724,0.0,139.637209,285546.1,8.033488,202410.0,343.169302,284932.7,6.799724,0.0,140.049302,284932.7,9.033488,202411.0,342.169302,284247.4,6.799724,0.0,136.553488,284247.4,10.033488,202412.0,341.169302,283526.6,6.799724,9.631367,135.376744,283517.0,11.033488,202501.0,340.169302,282948.3,6.799724,13.919553,134.222326,282934.3,12.033488,202502.0,339.169302,279034.6,6.799724,13.919553,142.768372,279020.7,13.033488,202503.0,338.169302
std,2915.327332,0.33258,45.641845,0.179992,314.187475,11324.787334,13.548466,0.256978,19.889714,9.429971,180779.5,19.841756,0.540205,27419.356149,37.703511,0.539767,,,0.471533,180273.5,0.540205,0.0,272.799232,180273.5,0.179992,0.0,37.696119,180169.8,0.540205,0.0,250.714908,180169.8,0.179992,0.0,37.696119,179529.7,0.540205,0.0,252.567629,179529.7,0.179992,0.0,37.696119,179011.4,0.540205,0.0,248.963973,179011.4,0.179992,0.0,37.696119,179021.5,0.540205,0.0,246.409515,179021.5,0.179992,0.0,37.696119,178923.2,0.540205,0.0,247.774763,178923.2,0.179992,0.0,37.696119,178820.4,0.540205,0.0,247.32207,178820.4,0.179992,0.0,37.696119,178755.7,0.540205,0.0,245.980133,178755.7,0.179992,0.0,37.696119,178653.3,0.540205,0.0,244.677473,178653.3,0.179992,0.0,37.696119,178398.5,0.540205,0.0,246.189539,178398.5,0.179992,0.0,37.696119,178442.9,0.540205,0.0,242.207357,178442.9,0.179992,0.0,37.696119,178463.1,0.540205,315.7855,242.479584,178457.6,0.179992,0.0,37.696119,178284.2,0.540205,345.550984,241.124372,178282.7,0.179992,0.0,37.696119,179709.4,0.540205,345.550984,256.526387,179707.6,0.179992,0.0,37.696119
min,2.0,0.0,606.0,202402.0,203402.0,10420.0,0.0,1.0,12.0,7.0,25000.0,12.0,3.25,600.0,120.0,1.0,,,1.0,25000.0,3.25,0.0,12.0,25000.0,0.0,202402.0,120.0,25000.0,3.25,0.0,12.0,25000.0,1.0,202403.0,119.0,25000.0,3.25,0.0,8.0,25000.0,2.0,202404.0,118.0,25000.0,3.25,0.0,7.0,25000.0,3.0,202405.0,117.0,25000.0,3.25,0.0,7.0,25000.0,4.0,202406.0,116.0,24000.0,3.25,0.0,6.0,24000.0,5.0,202407.0,115.0,22000.0,3.25,0.0,5.0,22000.0,6.0,202408.0,114.0,20236.48,3.25,0.0,5.0,20236.48,7.0,202409.0,113.0,18946.47,3.25,0.0,4.0,18946.47,8.0,202410.0,112.0,18870.26,3.25,0.0,4.0,18870.26,9.0,202411.0,111.0,15620.62,3.25,0.0,3.0,15620.62,10.0,202412.0,110.0,13556.99,3.25,0.0,2.0,13556.99,11.0,202501.0,109.0,10136.05,3.25,0.0,2.0,10136.05,12.0,202502.0,108.0,0.0,3.25,0.0,1.0,0.0,13.0,202503.0,107.0
25%,2433.5,0.0,715.0,202403.0,205402.0,18000.0,0.0,1.0,66.0,32.0,160000.0,66.0,6.5,29200.0,360.0,1.0,,,2.0,160000.0,6.5,0.0,67.0,160000.0,0.0,202402.0,360.0,159000.0,6.5,0.0,65.0,159000.0,1.0,202403.0,359.0,156500.0,6.5,0.0,64.0,156500.0,2.0,202404.0,358.0,155000.0,6.5,0.0,61.0,155000.0,3.0,202405.0,357.0,153500.0,6.5,0.0,61.0,153500.0,4.0,202406.0,356.0,153500.0,6.5,0.0,61.0,153500.0,5.0,202407.0,355.0,153500.0,6.5,0.0,61.5,153500.0,6.0,202408.0,354.0,153038.6,6.5,0.0,62.0,153038.6,7.0,202409.0,353.0,152584.0,6.5,0.0,61.0,152584.0,8.0,202410.0,352.0,152381.9,6.5,0.0,60.5,152381.9,9.0,202411.0,351.0,151891.0,6.5,0.0,60.0,151891.0,10.0,202412.0,350.0,151495.8,6.5,0.0,59.0,151495.8,11.0,202501.0,349.0,151523.3,6.5,0.0,59.0,151523.3,12.0,202502.0,348.0,149032.1,6.5,0.0,58.0,149032.1,13.0,202503.0,347.0
50%,5001.0,0.0,754.0,202403.0,205402.0,29484.0,0.0,1.0,80.0,39.0,250000.0,80.0,6.75,48000.0,360.0,1.0,,,2.0,250000.0,6.75,0.0,80.0,250000.0,0.0,202402.0,360.0,248000.0,6.75,0.0,78.0,248000.0,1.0,202403.0,359.0,247000.0,6.75,0.0,77.0,247000.0,2.0,202404.0,358.0,245000.0,6.75,0.0,75.0,245000.0,3.0,202405.0,357.0,244000.0,6.75,0.0,75.0,244000.0,4.0,202406.0,356.0,244000.0,6.75,0.0,75.0,244000.0,5.0,202407.0,355.0,244000.0,6.75,0.0,77.0,244000.0,6.0,202408.0,354.0,243486.8,6.75,0.0,77.0,243486.8,7.0,202409.0,353.0,243043.2,6.75,0.0,77.0,243043.2,8.0,202410.0,352.0,242597.2,6.75,0.0,76.0,242597.2,9.0,202411.0,351.0,241890.3,6.75,0.0,75.0,241890.3,10.0,202412.0,350.0,241375.4,6.75,0.0,74.0,241375.4,11.0,202501.0,349.0,241152.2,6.75,0.0,74.0,241152.2,12.0,202502.0,348.0,235959.3,6.75,0.0,73.0,235959.3,13.0,202503.0,347.0
75%,7495.0,0.0,784.0,202403.0,205402.0,38880.0,25.0,1.0,94.0,45.0,384000.0,93.0,7.125,75000.0,360.0,2.0,,,2.0,384000.0,7.125,0.0,95.0,384000.0,0.0,202402.0,360.0,384000.0,7.125,0.0,93.0,384000.0,1.0,202403.0,359.0,379000.0,7.125,0.0,90.0,379000.0,2.0,202404.0,358.0,377500.0,7.125,0.0,88.0,377500.0,3.0,202405.0,357.0,374500.0,7.125,0.0,87.0,374500.0,4.0,202406.0,356.0,374500.0,7.125,0.0,87.0,374500.0,5.0,202407.0,355.0,374000.0,7.125,0.0,91.0,374000.0,6.0,202408.0,354.0,373179.6,7.125,0.0,90.0,373179.6,7.0,202409.0,353.0,372737.8,7.125,0.0,90.0,372737.8,8.0,202410.0,352.0,372396.8,7.125,0.0,90.0,372396.8,9.0,202411.0,351.0,372029.3,7.125,0.0,88.0,372029.3,10.0,202412.0,350.0,371669.6,7.125,0.0,86.0,371669.6,11.0,202501.0,349.0,370989.3,7.125,0.0,86.0,370989.3,12.0,202502.0,348.0,366390.4,7.125,0.0,85.0,366390.4,13.0,202503.0,347.0
max,10153.0,1.0,825.0,202403.0,205402.0,49740.0,35.0,4.0,105.0,50.0,1294000.0,97.0,8.625,99600.0,360.0,4.0,,,4.0,1294000.0,8.625,0.0,999.0,1294000.0,1.0,202402.0,360.0,1293000.0,8.625,0.0,999.0,1293000.0,2.0,202403.0,359.0,1292000.0,8.625,0.0,999.0,1292000.0,3.0,202404.0,358.0,1291000.0,8.625,0.0,999.0,1291000.0,4.0,202405.0,357.0,1290000.0,8.625,0.0,999.0,1290000.0,5.0,202406.0,356.0,1289000.0,8.625,0.0,999.0,1289000.0,6.0,202407.0,355.0,1288000.0,8.625,0.0,999.0,1288000.0,7.0,202408.0,354.0,1286731.0,8.625,0.0,999.0,1286731.0,8.0,202409.0,353.0,1285703.0,8.625,0.0,999.0,1285703.0,9.0,202410.0,352.0,1284669.0,8.625,0.0,999.0,1284669.0,10.0,202411.0,351.0,1283629.0,8.625,0.0,999.0,1283629.0,11.0,202412.0,350.0,1282582.0,8.625,10353.72,999.0,1282582.0,12.0,202501.0,349.0,1281529.0,8.625,10353.72,999.0,1281529.0,13.0,202502.0,348.0,1280470.0,8.625,10353.72,999.0,1280470.0,14.0,202503.0,347.0


In [86]:
train_df.describe()

Unnamed: 0,index,target,CreditScore,FirstPaymentDate,MaturityDate,MSA,MI_Pct,NumberOfUnits,OriginalCLTV,OriginalDTI,OriginalUPB,OriginalLTV,OriginalInterestRate,PostalCode,OriginalLoanTerm,NumberOfBorrowers,PreHARP_Flag,ReliefRefinanceIndicator,PropertyValMethod,0_CurrentActualUPB,0_CurrentInterestRate,0_CurrentNonInterestBearingUPB,0_EstimatedLTV,0_InterestBearingUPB,0_LoanAge,0_MonthlyReportingPeriod,0_RemainingMonthsToLegalMaturity,1_CurrentActualUPB,1_CurrentInterestRate,1_CurrentNonInterestBearingUPB,1_EstimatedLTV,1_InterestBearingUPB,1_LoanAge,1_MonthlyReportingPeriod,1_RemainingMonthsToLegalMaturity,2_CurrentActualUPB,2_CurrentInterestRate,2_CurrentNonInterestBearingUPB,2_EstimatedLTV,2_InterestBearingUPB,2_LoanAge,2_MonthlyReportingPeriod,2_RemainingMonthsToLegalMaturity,3_CurrentActualUPB,3_CurrentInterestRate,3_CurrentNonInterestBearingUPB,3_EstimatedLTV,3_InterestBearingUPB,3_LoanAge,3_MonthlyReportingPeriod,3_RemainingMonthsToLegalMaturity,4_CurrentActualUPB,4_CurrentInterestRate,4_CurrentNonInterestBearingUPB,4_EstimatedLTV,4_InterestBearingUPB,4_LoanAge,4_MonthlyReportingPeriod,4_RemainingMonthsToLegalMaturity,5_CurrentActualUPB,5_CurrentInterestRate,5_CurrentNonInterestBearingUPB,5_EstimatedLTV,5_InterestBearingUPB,5_LoanAge,5_MonthlyReportingPeriod,5_RemainingMonthsToLegalMaturity,6_CurrentActualUPB,6_CurrentInterestRate,6_CurrentNonInterestBearingUPB,6_EstimatedLTV,6_InterestBearingUPB,6_LoanAge,6_MonthlyReportingPeriod,6_RemainingMonthsToLegalMaturity,7_CurrentActualUPB,7_CurrentInterestRate,7_CurrentNonInterestBearingUPB,7_EstimatedLTV,7_InterestBearingUPB,7_LoanAge,7_MonthlyReportingPeriod,7_RemainingMonthsToLegalMaturity,8_CurrentActualUPB,8_CurrentInterestRate,8_CurrentNonInterestBearingUPB,8_EstimatedLTV,8_InterestBearingUPB,8_LoanAge,8_MonthlyReportingPeriod,8_RemainingMonthsToLegalMaturity,9_CurrentActualUPB,9_CurrentInterestRate,9_CurrentNonInterestBearingUPB,9_EstimatedLTV,9_InterestBearingUPB,9_LoanAge,9_MonthlyReportingPeriod,9_RemainingMonthsToLegalMaturity,10_CurrentActualUPB,10_CurrentInterestRate,10_CurrentNonInterestBearingUPB,10_EstimatedLTV,10_InterestBearingUPB,10_LoanAge,10_MonthlyReportingPeriod,10_RemainingMonthsToLegalMaturity,11_CurrentActualUPB,11_CurrentInterestRate,11_CurrentNonInterestBearingUPB,11_EstimatedLTV,11_InterestBearingUPB,11_LoanAge,11_MonthlyReportingPeriod,11_RemainingMonthsToLegalMaturity,12_CurrentActualUPB,12_CurrentInterestRate,12_CurrentNonInterestBearingUPB,12_EstimatedLTV,12_InterestBearingUPB,12_LoanAge,12_MonthlyReportingPeriod,12_RemainingMonthsToLegalMaturity,13_CurrentActualUPB,13_CurrentInterestRate,13_CurrentNonInterestBearingUPB,13_EstimatedLTV,13_InterestBearingUPB,13_LoanAge,13_MonthlyReportingPeriod,13_RemainingMonthsToLegalMaturity
count,6101.0,6101.0,6101.0,6101.0,6101.0,5230.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,0.0,0.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0,6101.0
mean,4928.648582,0.0,754.230782,202402.971972,205327.869857,29755.718547,10.062121,1.03147,75.743321,37.584003,300859.2,75.611539,6.812081,49630.011474,351.108179,1.445665,,,1.937715,300233.7,6.812081,0.0,167.641206,300233.7,0.027864,202402.0,351.080315,299197.2,6.812081,0.0,145.808556,299197.2,1.027864,202403.0,350.080315,297587.0,6.812081,0.0,144.062777,297587.0,2.027864,202404.0,349.080315,296572.9,6.812081,0.0,138.718243,296572.9,3.027864,202405.0,348.080315,295636.9,6.812081,0.0,138.654319,295636.9,4.027864,202406.0,347.080315,294630.9,6.812081,0.0,139.45091,294630.9,5.027864,202407.0,346.080315,293668.0,6.812081,0.0,141.278151,293668.0,6.027864,202408.0,345.080315,293127.4,6.812081,0.0,140.27979,293127.4,7.027864,202409.0,344.080315,292557.1,6.812081,0.0,139.080806,292557.1,8.027864,202410.0,343.080315,291896.0,6.812081,0.0,138.267661,291896.0,9.027864,202411.0,342.080315,291263.9,6.812081,0.0,136.990657,291263.9,10.027864,202412.0,341.080315,290533.6,6.812081,0.0,136.907064,290533.6,11.027864,202501.0,340.080315,289827.5,6.812081,0.0,135.799213,289827.5,12.027864,202502.0,339.080315,286537.9,6.812081,0.0,142.242255,286537.9,13.027864,202503.0,338.080315
std,2850.6751,0.0,173.089661,0.165067,320.554665,11306.468675,13.42581,0.218023,19.325571,9.470854,182295.9,19.260362,0.521142,27852.209363,38.466009,0.536723,,,0.524847,182196.4,0.521142,0.0,277.237516,182196.4,0.164597,0.0,38.46222,181862.2,0.521142,0.0,248.382843,181862.2,0.164597,0.0,38.46222,180987.5,0.521142,0.0,249.407275,180987.5,0.164597,0.0,38.46222,180706.1,0.521142,0.0,243.485633,180706.1,0.164597,0.0,38.46222,180371.9,0.521142,0.0,244.661436,180371.9,0.164597,0.0,38.46222,179994.5,0.521142,0.0,246.181489,179994.5,0.164597,0.0,38.46222,179618.1,0.521142,0.0,245.750085,179618.1,0.164597,0.0,38.46222,179366.7,0.521142,0.0,244.308033,179366.7,0.164597,0.0,38.46222,179214.9,0.521142,0.0,242.902182,179214.9,0.164597,0.0,38.46222,179014.7,0.521142,0.0,242.25859,179014.7,0.164597,0.0,38.46222,178720.9,0.521142,0.0,242.001567,178720.9,0.164597,0.0,38.46222,178462.4,0.521142,0.0,244.011984,178462.4,0.164597,0.0,38.46222,178231.1,0.521142,0.0,242.866829,178231.1,0.164597,0.0,38.46222,179378.1,0.521142,0.0,254.71313,179378.1,0.164597,0.0,38.46222
min,0.0,0.0,600.0,202402.0,203103.0,10180.0,0.0,1.0,8.0,2.0,25000.0,8.0,4.75,800.0,85.0,1.0,,,1.0,25000.0,4.75,0.0,7.0,25000.0,0.0,202402.0,85.0,25000.0,4.75,0.0,7.0,25000.0,1.0,202403.0,84.0,25000.0,4.75,0.0,6.0,25000.0,2.0,202404.0,83.0,25000.0,4.75,0.0,6.0,25000.0,3.0,202405.0,82.0,25000.0,4.75,0.0,5.0,25000.0,4.0,202406.0,81.0,25000.0,4.75,0.0,5.0,25000.0,5.0,202407.0,80.0,24000.0,4.75,0.0,5.0,24000.0,6.0,202408.0,79.0,24414.0,4.75,0.0,5.0,24414.0,7.0,202409.0,78.0,12410.57,4.75,0.0,5.0,12410.57,8.0,202410.0,77.0,11301.14,4.75,0.0,4.0,11301.14,9.0,202411.0,76.0,11301.14,4.75,0.0,3.0,11301.14,10.0,202412.0,75.0,6210.86,4.75,0.0,2.0,6210.86,11.0,202501.0,74.0,5000.0,4.75,0.0,2.0,5000.0,12.0,202502.0,73.0,0.0,4.75,0.0,1.0,0.0,13.0,202503.0,72.0
25%,2464.0,0.0,723.0,202403.0,205402.0,19124.0,0.0,1.0,67.0,31.0,161000.0,67.0,6.5,28200.0,360.0,1.0,,,2.0,160000.0,6.5,0.0,69.0,160000.0,0.0,202402.0,360.0,160000.0,6.5,0.0,67.0,160000.0,1.0,202403.0,359.0,160000.0,6.5,0.0,64.0,160000.0,2.0,202404.0,358.0,159000.0,6.5,0.0,62.0,159000.0,3.0,202405.0,357.0,159000.0,6.5,0.0,61.0,159000.0,4.0,202406.0,356.0,159000.0,6.5,0.0,61.0,159000.0,5.0,202407.0,355.0,158000.0,6.5,0.0,62.0,158000.0,6.0,202408.0,354.0,157312.1,6.5,0.0,62.0,157312.1,7.0,202409.0,353.0,156989.0,6.5,0.0,61.0,156989.0,8.0,202410.0,352.0,156831.6,6.5,0.0,61.0,156831.6,9.0,202411.0,351.0,156130.5,6.5,0.0,60.0,156130.5,10.0,202412.0,350.0,155357.2,6.5,0.0,60.0,155357.2,11.0,202501.0,349.0,154873.0,6.5,0.0,60.0,154873.0,12.0,202502.0,348.0,152711.8,6.5,0.0,59.0,152711.8,13.0,202503.0,347.0
50%,4887.0,0.0,760.0,202403.0,205402.0,31052.0,0.0,1.0,80.0,39.0,259000.0,80.0,6.775,48100.0,360.0,1.0,,,2.0,258000.0,6.775,0.0,80.0,258000.0,0.0,202402.0,360.0,257000.0,6.775,0.0,79.0,257000.0,1.0,202403.0,359.0,256000.0,6.775,0.0,77.0,256000.0,2.0,202404.0,358.0,255000.0,6.775,0.0,76.0,255000.0,3.0,202405.0,357.0,254000.0,6.775,0.0,75.0,254000.0,4.0,202406.0,356.0,253000.0,6.775,0.0,75.0,253000.0,5.0,202407.0,355.0,253000.0,6.775,0.0,77.0,253000.0,6.0,202408.0,354.0,251991.6,6.775,0.0,77.0,251991.6,7.0,202409.0,353.0,251275.1,6.775,0.0,77.0,251275.1,8.0,202410.0,352.0,250320.6,6.775,0.0,76.0,250320.6,9.0,202411.0,351.0,249878.9,6.775,0.0,75.0,249878.9,10.0,202412.0,350.0,249501.2,6.775,0.0,74.0,249501.2,11.0,202501.0,349.0,249174.6,6.775,0.0,74.0,249174.6,12.0,202502.0,348.0,246956.9,6.775,0.0,73.0,246956.9,13.0,202503.0,347.0
75%,7407.0,0.0,786.0,202403.0,205402.0,38940.0,25.0,1.0,92.0,45.0,399000.0,91.0,7.125,75000.0,360.0,2.0,,,2.0,399000.0,7.125,0.0,95.0,399000.0,0.0,202402.0,360.0,397000.0,7.125,0.0,92.0,397000.0,1.0,202403.0,359.0,394000.0,7.125,0.0,90.0,394000.0,2.0,202404.0,358.0,393000.0,7.125,0.0,88.0,393000.0,3.0,202405.0,357.0,391000.0,7.125,0.0,87.0,391000.0,4.0,202406.0,356.0,390000.0,7.125,0.0,87.0,390000.0,5.0,202407.0,355.0,389000.0,7.125,0.0,90.0,389000.0,6.0,202408.0,354.0,388897.0,7.125,0.0,90.0,388897.0,7.0,202409.0,353.0,388172.7,7.125,0.0,90.0,388172.7,8.0,202410.0,352.0,387028.5,7.125,0.0,89.0,387028.5,9.0,202411.0,351.0,386380.3,7.125,0.0,88.0,386380.3,10.0,202412.0,350.0,385613.8,7.125,0.0,86.0,385613.8,11.0,202501.0,349.0,385185.9,7.125,0.0,85.0,385185.9,12.0,202502.0,348.0,382935.1,7.125,0.0,84.0,382935.1,13.0,202503.0,347.0
max,9851.0,0.0,9999.0,202403.0,205402.0,49740.0,35.0,4.0,105.0,50.0,1369000.0,97.0,9.125,99700.0,360.0,4.0,,,9.0,1368000.0,9.125,0.0,999.0,1368000.0,1.0,202402.0,360.0,1368000.0,9.125,0.0,999.0,1368000.0,2.0,202403.0,359.0,1367000.0,9.125,0.0,999.0,1367000.0,3.0,202404.0,358.0,1366000.0,9.125,0.0,999.0,1366000.0,4.0,202405.0,357.0,1365000.0,9.125,0.0,999.0,1365000.0,5.0,202406.0,356.0,1363000.0,9.125,0.0,999.0,1363000.0,6.0,202407.0,355.0,1361000.0,9.125,0.0,999.0,1361000.0,7.0,202408.0,354.0,1361324.0,9.125,0.0,999.0,1361324.0,8.0,202409.0,353.0,1360236.0,9.125,0.0,999.0,1360236.0,9.0,202410.0,352.0,1359143.0,9.125,0.0,999.0,1359143.0,10.0,202411.0,351.0,1358042.0,9.125,0.0,999.0,1358042.0,11.0,202412.0,350.0,1356935.0,9.125,0.0,999.0,1356935.0,12.0,202501.0,349.0,1355821.0,9.125,0.0,999.0,1355821.0,13.0,202502.0,348.0,1354700.0,9.125,0.0,999.0,1354700.0,14.0,202503.0,347.0


In [88]:
panel_cols = [c for c in train_df.columns if "_" in c and any(c.startswith(f"{i}_") for i in range(14))]

In [90]:
panel_cols

['0_CurrentActualUPB',
 '0_CurrentInterestRate',
 '0_CurrentNonInterestBearingUPB',
 '0_EstimatedLTV',
 '0_InterestBearingUPB',
 '0_LoanAge',
 '0_MonthlyReportingPeriod',
 '0_RemainingMonthsToLegalMaturity',
 '1_CurrentActualUPB',
 '1_CurrentInterestRate',
 '1_CurrentNonInterestBearingUPB',
 '1_EstimatedLTV',
 '1_InterestBearingUPB',
 '1_LoanAge',
 '1_MonthlyReportingPeriod',
 '1_RemainingMonthsToLegalMaturity',
 '2_CurrentActualUPB',
 '2_CurrentInterestRate',
 '2_CurrentNonInterestBearingUPB',
 '2_EstimatedLTV',
 '2_InterestBearingUPB',
 '2_LoanAge',
 '2_MonthlyReportingPeriod',
 '2_RemainingMonthsToLegalMaturity',
 '3_CurrentActualUPB',
 '3_CurrentInterestRate',
 '3_CurrentNonInterestBearingUPB',
 '3_EstimatedLTV',
 '3_InterestBearingUPB',
 '3_LoanAge',
 '3_MonthlyReportingPeriod',
 '3_RemainingMonthsToLegalMaturity',
 '4_CurrentActualUPB',
 '4_CurrentInterestRate',
 '4_CurrentNonInterestBearingUPB',
 '4_EstimatedLTV',
 '4_InterestBearingUPB',
 '4_LoanAge',
 '4_MonthlyReportingPeriod

In [98]:
list(train_df.columns)

['index',
 'target',
 'CreditScore',
 'FirstPaymentDate',
 'FirstTimeHomebuyerFlag',
 'MaturityDate',
 'MSA',
 'MI_Pct',
 'NumberOfUnits',
 'OccupancyStatus',
 'OriginalCLTV',
 'OriginalDTI',
 'OriginalUPB',
 'OriginalLTV',
 'OriginalInterestRate',
 'Channel',
 'PPM_Flag',
 'ProductType',
 'PropertyState',
 'PropertyType',
 'PostalCode',
 'LoanPurpose',
 'OriginalLoanTerm',
 'NumberOfBorrowers',
 'SellerName',
 'ServicerName',
 'SuperConformingFlag',
 'PreHARP_Flag',
 'ProgramIndicator',
 'ReliefRefinanceIndicator',
 'PropertyValMethod',
 'InterestOnlyFlag',
 'BalloonIndicator',
 '0_CurrentActualUPB',
 '0_CurrentInterestRate',
 '0_CurrentNonInterestBearingUPB',
 '0_EstimatedLTV',
 '0_InterestBearingUPB',
 '0_LoanAge',
 '0_MonthlyReportingPeriod',
 '0_RemainingMonthsToLegalMaturity',
 '1_CurrentActualUPB',
 '1_CurrentInterestRate',
 '1_CurrentNonInterestBearingUPB',
 '1_EstimatedLTV',
 '1_InterestBearingUPB',
 '1_LoanAge',
 '1_MonthlyReportingPeriod',
 '1_RemainingMonthsToLegalMaturity'

In [92]:
static_cols = [c for c in train_df.columns if c not in panel_cols]

In [94]:
static_cols

['index',
 'target',
 'CreditScore',
 'FirstPaymentDate',
 'FirstTimeHomebuyerFlag',
 'MaturityDate',
 'MSA',
 'MI_Pct',
 'NumberOfUnits',
 'OccupancyStatus',
 'OriginalCLTV',
 'OriginalDTI',
 'OriginalUPB',
 'OriginalLTV',
 'OriginalInterestRate',
 'Channel',
 'PPM_Flag',
 'ProductType',
 'PropertyState',
 'PropertyType',
 'PostalCode',
 'LoanPurpose',
 'OriginalLoanTerm',
 'NumberOfBorrowers',
 'SellerName',
 'ServicerName',
 'SuperConformingFlag',
 'PreHARP_Flag',
 'ProgramIndicator',
 'ReliefRefinanceIndicator',
 'PropertyValMethod',
 'InterestOnlyFlag',
 'BalloonIndicator']

In [108]:
lst = []
for i in range(14):
    lst.append(f"{i}_MonthlyReportingPeriod")

In [112]:
lst.extend(["FirstPaymentDate", "MaturityDate", "index"])


In [114]:
categorical_cols = [
    "FirstTimeHomebuyerFlag","MSA", "OccupancyStatus", "Channel",
    "PPM_Flag", "ProductType", "PropertyState", "PropertyType", "PostalCode",
    "LoanPurpose","SellerName", "ServicerName",
    "SuperConformingFlag", "PreHARP_Flag", "ProgramIndicator",
    "ReliefRefinanceIndicator", "PropertyValMethod",
    "InterestOnlyFlag", "BalloonIndicator"
]
numeric_cols = [c for c in X_train.columns if c not in categorical_cols and c not in lst]

In [116]:
numeric_cols

['index',
 'CreditScore',
 'MI_Pct',
 'NumberOfUnits',
 'OriginalCLTV',
 'OriginalDTI',
 'OriginalUPB',
 'OriginalLTV',
 'OriginalInterestRate',
 'OriginalLoanTerm',
 'NumberOfBorrowers',
 '0_CurrentActualUPB',
 '0_CurrentInterestRate',
 '0_CurrentNonInterestBearingUPB',
 '0_EstimatedLTV',
 '0_InterestBearingUPB',
 '0_LoanAge',
 '0_RemainingMonthsToLegalMaturity',
 '1_CurrentActualUPB',
 '1_CurrentInterestRate',
 '1_CurrentNonInterestBearingUPB',
 '1_EstimatedLTV',
 '1_InterestBearingUPB',
 '1_LoanAge',
 '1_RemainingMonthsToLegalMaturity',
 '2_CurrentActualUPB',
 '2_CurrentInterestRate',
 '2_CurrentNonInterestBearingUPB',
 '2_EstimatedLTV',
 '2_InterestBearingUPB',
 '2_LoanAge',
 '2_RemainingMonthsToLegalMaturity',
 '3_CurrentActualUPB',
 '3_CurrentInterestRate',
 '3_CurrentNonInterestBearingUPB',
 '3_EstimatedLTV',
 '3_InterestBearingUPB',
 '3_LoanAge',
 '3_RemainingMonthsToLegalMaturity',
 '4_CurrentActualUPB',
 '4_CurrentInterestRate',
 '4_CurrentNonInterestBearingUPB',
 '4_Estimate

In [130]:
process_ids_valid = valid_df["index"].values
X_train_feat = X_train[categorical_cols + numeric_cols]
X_valid_feat = X_valid[categorical_cols + numeric_cols]

In [132]:
X_train = train_df.drop(columns=["target"])
X_valid = valid_df.drop(columns=["target"])
y_valid = valid_df["target"].values

# categorical_cols = ["processName", "eventName", "hostName"]
# numeric_cols = ["argsNum", "returnValue", "userId", "parentProcessId", "threadId"]

preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=True), categorical_cols),
        ("num", StandardScaler(with_mean=False), numeric_cols),
    ],
    sparse_threshold=1.0, 
)

# --------------------
# Baseline Model: Isolation Forest
# --------------------
model = IsolationForest(
    n_estimators=200,
    max_samples="auto",
    contamination="auto",
    random_state=42,
    n_jobs=-1,
)

pipe = Pipeline([("prep", preprocess), ("clf", model)])

# Train on normal only
pipe.fit(X_train_feat)

# Score validation
# IsolationForest: higher score_samples = more normal
scores_normal = pipe["clf"].score_samples(pipe["prep"].transform(X_valid_feat))
raw_anom = -scores_normal  # invert: higher = more anomalous

# Normalize to [0,1]
min_v, max_v = np.min(raw_anom), np.max(raw_anom)
anom_score = (raw_anom - min_v) / (max_v - min_v + 1e-12)

# --------------------
# Evaluation
# --------------------
ap = average_precision_score(y_valid, anom_score)
roc_auc = roc_auc_score(y_valid, anom_score)

print(f"AP: {ap:.4f}")
print(f"AUC-ROC : {roc_auc:.4f}")

# Sample Output CSV
out_df = pd.DataFrame({"index": process_ids_valid, "anomaly_score": anom_score})
out_df.to_csv("valid_pred.csv", index=False)
print("Wrote valid_pred.csv")

AP: 0.1424
AUC-ROC : 0.5159
Wrote valid_pred.csv


In [134]:
from sklearn.svm import OneClassSVM

model = OneClassSVM(
    kernel="rbf",     # RBF works well for non-linear anomalies
    nu=0.05,          # Upper bound on fraction of anomalies in training set
    gamma="scale"     # Kernel coefficient (can tune)
)


pipe = Pipeline([("prep", preprocess), ("clf", model)])

# Train on normal only
pipe.fit(X_train_feat)

# Score validation
# OneClassSVM: higher score_samples = more normal
scores_normal = pipe["clf"].score_samples(pipe["prep"].transform(X_valid_feat))
raw_anom = -scores_normal  # invert: higher = more anomalous

# Normalize to [0,1]
min_v, max_v = np.min(raw_anom), np.max(raw_anom)
anom_score = (raw_anom - min_v) / (max_v - min_v + 1e-12)

# --------------------
# Evaluation
# --------------------
ap = average_precision_score(y_valid, anom_score)
roc_auc = roc_auc_score(y_valid, anom_score)

print(f"AP: {ap:.4f}")
print(f"AUC-ROC : {roc_auc:.4f}")

# Sample Output CSV
out_df = pd.DataFrame({"index": process_ids_valid, "anomaly_score": anom_score})
out_df.to_csv("valid_pred.csv", index=False)
print("Wrote valid_pred.csv")

AP: 0.1638
AUC-ROC : 0.5590


PermissionError: [Errno 13] Permission denied: 'valid_pred.csv'

In [58]:
train_df.columns[0:50]

Index(['index', 'target', 'CreditScore', 'FirstPaymentDate',
       'FirstTimeHomebuyerFlag', 'MaturityDate', 'MSA', 'MI_Pct',
       'NumberOfUnits', 'OccupancyStatus', 'OriginalCLTV', 'OriginalDTI',
       'OriginalUPB', 'OriginalLTV', 'OriginalInterestRate', 'Channel',
       'PPM_Flag', 'ProductType', 'PropertyState', 'PropertyType',
       'PostalCode', 'LoanPurpose', 'OriginalLoanTerm', 'NumberOfBorrowers',
       'SellerName', 'ServicerName', 'SuperConformingFlag', 'PreHARP_Flag',
       'ProgramIndicator', 'ReliefRefinanceIndicator', 'PropertyValMethod',
       'InterestOnlyFlag', 'BalloonIndicator', '0_CurrentActualUPB',
       '0_CurrentInterestRate', '0_CurrentNonInterestBearingUPB',
       '0_EstimatedLTV', '0_InterestBearingUPB', '0_LoanAge',
       '0_MonthlyReportingPeriod', '0_RemainingMonthsToLegalMaturity',
       '1_CurrentActualUPB', '1_CurrentInterestRate',
       '1_CurrentNonInterestBearingUPB', '1_EstimatedLTV',
       '1_InterestBearingUPB', '1_LoanAge', '1_Mon

In [None]:
static_cols = [
    "CreditScore","FirstPaymentDate", "MaturityDate", "MSA", "MI_Pct" "OriginalUPB", "OriginalLTV", "OriginalDTI", "OriginalInterestRate",
    "OriginalLoanTerm", "NumberOfBorrowers", "NumberOfUnits"
    # add other origination features you want
]

In [None]:
process_ids_valid = valid_df["index"].values
X_train = train_df.drop(columns=["target"])
X_valid = valid_df.drop(columns=["target"])

y_valid = valid_df["target"].values

# --------------------
# Preprocessing & Feature engineering
# --------------------
categorical_cols = ["processName", "eventName", "hostName"]
numeric_cols = ["argsNum", "returnValue", "userId", "parentProcessId", "threadId"]

X_train_feat = X_train[categorical_cols + numeric_cols]
X_valid_feat = X_valid[categorical_cols + numeric_cols]

preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=True), categorical_cols),
        ("num", StandardScaler(with_mean=False), numeric_cols),
    ],
    sparse_threshold=1.0, 
)

# --------------------
# Baseline Model: Isolation Forest
# --------------------
model = IsolationForest(
    n_estimators=200,
    max_samples="auto",
    contamination="auto",
    random_state=42,
    n_jobs=-1,
)

pipe = Pipeline([("prep", preprocess), ("clf", model)])

# Train on normal only
pipe.fit(X_train_feat)

# Score validation
# IsolationForest: higher score_samples = more normal
scores_normal = pipe["clf"].score_samples(pipe["prep"].transform(X_valid_feat))
raw_anom = -scores_normal  # invert: higher = more anomalous

# Normalize to [0,1]
min_v, max_v = np.min(raw_anom), np.max(raw_anom)
anom_score = (raw_anom - min_v) / (max_v - min_v + 1e-12)

# --------------------
# Evaluation
# --------------------
ap = average_precision_score(y_valid, anom_score)
roc_auc = roc_auc_score(y_valid, anom_score)

print(f"AP: {ap:.4f}")
print(f"AUC-ROC : {roc_auc:.4f}")

# Sample Output CSV
out_df = pd.DataFrame({"index": process_ids_valid, "anomaly_score": anom_score})
out_df.to_csv("valid_pred.csv", index=False)
print("Wrote valid_pred.csv")
