In [79]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.metrics import roc_auc_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
from tqdm import tqdm
from tensorflow.keras.preprocessing.text import Tokenizer
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, Input, ZeroPadding1D, Add, Flatten, Reshape, concatenate, GlobalMaxPooling1D, Dense, Dropout, MaxPooling1D, Concatenate
from tensorflow.keras.models import Model

from utils import preprocess_chunk, preprocess_text
from utils_app import process_app, preprocess_chunk
import multiprocessing

from sklearn.preprocessing import StandardScaler
import joblib

In [80]:
num_cores = multiprocessing.cpu_count()

# 数据加载

In [23]:
data_app = pd.read_csv("./feature_original_data.csv", sep = '/001XG')
data_app.shape

  data_app = pd.read_csv("./feature_original_data.csv", sep = '/001XG')


(89648, 9)

In [24]:
data_app = data_app[['f_public_uid', 'f_app_data', 'f_public_last_submit_time']]
data_app = data_app.rename(columns={'f_public_last_submit_time':'submit_time'})

In [26]:
data_final = pd.read_csv("./data_final_sms_number.csv")

In [27]:
data_final = pd.merge(data_app, data_final, on=['f_public_uid'])
print(data_final.shape)
data_final.head()

data_final = data_final.drop_duplicates(subset=['f_public_uid']
                                        , keep='first', inplace=False)
print(data_final.shape)

(75604, 921)
(75604, 921)


In [28]:
data_final['preprocessed_sms'] = data_final['preprocessed_sms'].fillna('')

In [29]:
data_final.to_csv('data_final_sms_number_app.csv', index = False)

## 先加工applist数据

In [31]:
def pocess_data(df, save_file):
    # 将数据分成多个部分，每个部分处理一次
    chunks = np.array_split(df, num_cores)
    
    # 使用进度条tqdm展示进度
    with multiprocessing.Pool(processes=num_cores) as pool:
        processed_chunks = list(tqdm(pool.imap(preprocess_chunk, chunks), total=num_cores))

    # 合并处理后的结果
    df = pd.concat(processed_chunks, ignore_index=True)
    df.to_csv(save_file, index = False)
    return df

In [32]:
data_final = pocess_data(data_final, 'data_final_preprocessed_sms_num_app.csv')

100%|██████████| 12/12 [01:56<00:00,  9.68s/it]


33

## 数据集划分

In [38]:
data_final['repayment_time'] = pd.to_datetime(data_final['repayment_time'])

In [39]:
data_final.repayment_time.min(), data_final.repayment_time.max()

(Timestamp('2023-08-01 00:00:00'), Timestamp('2023-10-25 00:00:00'))

In [40]:
train_data = data_final[data_final.repayment_time < pd.Timestamp(2023, 10, 15)]
print(train_data.shape)

(66467, 922)


In [41]:
IDT_test = data_final[data_final.repayment_time >= pd.Timestamp(2023, 10, 15)]
print(IDT_test.shape)


(9137, 922)


In [42]:
train_data, test_data, _, _ = train_test_split(train_data, range(train_data.shape[0]), 
                                                test_size=0.1, random_state=1314)


In [43]:
train_data.shape, test_data.shape, IDT_test.shape

((59820, 922), (6647, 922), (9137, 922))

In [44]:
train_data.d7.mean(), test_data.d7.mean(), IDT_test.d7.mean()

(0.3834002006018054, 0.378065292613209, 0.3329320345846558)

In [45]:
train_data.to_csv('train_data_num_sms_app.csv', index = False)
test_data.to_csv('test_data_num_sms_app.csv', index = False)
IDT_test.to_csv('IDT_test_num_sms_app.csv', index = False)

In [None]:
train_data = pd.read_csv("./train_data_num_sms_app.csv")
test_data = pd.read_csv("./test_data_num_sms_app.csv")
IDT_test = pd.read_csv("./IDT_test_num_sms_app.csv")

In [None]:
train_data['preprocessed_sms'] = train_data['preprocessed_sms'].fillna('')
test_data['preprocessed_sms'] = test_data['preprocessed_sms'].fillna('')
IDT_test['preprocessed_sms'] = IDT_test['preprocessed_sms'].fillna('')

# 模型数据处理

## 处理短信文本

In [47]:
# 从 JSON 文件加载 Tokenizer 配置
with open('tokenizer_config.json', 'r', encoding='utf-8') as json_file:
    loaded_tokenizer_json = json_file.read()
    tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(loaded_tokenizer_json)
tokenizer.num_words = 15000

In [48]:
# 对文本数据进行标记和填充
max_words = 15000  # 选择词汇表的大小
max_len_sms = 7000      # 选择文本的最大长度

In [49]:
X_train_sms = tokenizer.texts_to_sequences(train_data['preprocessed_sms'])
X_test_sms = tokenizer.texts_to_sequences(test_data['preprocessed_sms'])
X_idt_sms = tokenizer.texts_to_sequences(IDT_test['preprocessed_sms'])

In [50]:
X_train_sms = pad_sequences(X_train_sms, padding='post', maxlen=max_len_sms)
X_test_sms = pad_sequences(X_test_sms, padding='post', maxlen=max_len_sms)
X_idt_sms = pad_sequences(X_idt_sms, padding='post', maxlen=max_len_sms)

## 数值特征处理

In [83]:
feature_num = [
'f_sms_loan_v2_action_overdue_distinct_amount_avg_3m','f_current_app_nearly_2_months_app_percentage','f_sms_loan_v2_action_overdue_distinct_amount_avg_3w','f_current_app_last_app_install_days','f_app_list_retentive_time_220622_com_transsnetfinancial_palmcredit','f_sms_loan_v2_action_approval_3m','f_sms_basic_v2_square_count_1m','f_sms_loan_v2_action_approval_3w','f_app_crc_classification_ins_balance_overdue_rate','f_app_list_retentive_time_220622_com_whatsapp_w4b','f_sms_loan_v2_action_amount_1m','f_sms_basic_v2_square_count_2w','f_app_crc_classification_ins_accounts_sum','f_app_borrowing_avg_prepayment_hours','f_sms_loan_v2_action_raise_amount_count_2m','f_scrapy_apps_video_editors_count','f_scrapy_apps_music_count','f_sms_loan_v2_action_approval_1m','f_sms_basic_v2_square_count_3m','f_sms_loan_v2_action_raise_amount_count_2w','f_sms_loan_v2_action_amount_1w','f_sms_loan_v2_action_approval_1w','f_sms_loan_v2_action_amount_2m','f_scrapy_apps_travel_count','f_sms_basic_v2_square_count_1w','f_sms_loan_v2_action_raise_amount_count_3m','f_app_borrowing_curr_vip005_max_overdue','f_intermediate_feature_sd_can_use_size','f_app_user_identity_id_card_handheld_path_is_null','f_scrapy_apps_education_count','f_sms_loan_v2_action_raise_amount_count_3w','f_sms_basic_v2_square_count_2m','f_sms_loan_v2_action_approval_2m','f_app_apply_account_name','f_sms_loan_v2_action_amount_2w','f_sms_loan_v2_action_approval_2w','f_sms_loan_v2_action_approval_distinct_amount_count_1m','f_sms_loan_v2_action_approval_amount_max_2w','f_app_borrowing_curr_not_vip005_max_loan_amount','f_app_crc_classification_prod_type_sanctioned_amount_sum','f_sms_loan_v2_action_approval_amount_max_3m','f_sms_loan_v2_action_approval_amount_max_1w','f_app_user_identity_word_card_path_is_null','f_sms_loan_v2_action_raise_amount_count_1m','f_user_device_time_country_2','f_sms_loan_v2_action_raise_amount_count_1w','f_user_device_time_country_1','f_sms_loan_v2_action_approval_amount_max_2m','f_user_device_time_country_4','f_user_device_time_country_3','f_intermediate_feature_cpu_num','f_user_device_time_country_9','f_user_device_time_country_6','f_user_device_time_country_5','f_user_device_time_country_8','f_app_apply_before_apply_count','f_user_device_time_country_7','f_app_crc_classification_ins_credit_overdue_minus_single','f_app_list_retentive_time_220622_com_ng_easecash','f_sms_loan_v2_action_overdue_distinct_amount_avg_1m','f_app_crc_classification_prod_type_total_outstanding_balance_sum','f_app_user_identity_driving_licence_path_is_null','f_sms_loan_v2_action_overdue_distinct_amount_avg_1w','f_sms_loan_v2_action_coupon_distinct_amount_media_1m','f_app_list_retentive_time_220622_com_credit_nicenaira','f_scrapy_apps_food_count','f_app_crc_classification_ins_oustanding_balance_single_avg','f_app_list_retentive_time_220622_com_fair_credit_faircreditobrej','f_sms_loan_v2_action_approval_amount_max_3w','f_sms_loan_v2_action_overdue_distinct_amount_avg_2m','f_sms_basic_v2_phone_num_count_1w','f_sms_loan_v2_action_overdue_distinct_amount_avg_2w','f_scrapy_apps_events_count','f_sms_basic_v2_phone_num_count_1m','f_app_list_retentive_time_220622_com_loan_cash_credit_easemoni_nigeria','f_sms_basic_v2_phone_num_count_2w','f_scrapy_apps_family_count','f_app_crc_classification_ins_overdue_balance_minus_single','f_sms_loan_v2_action_remind_distinct_amount_avg_2m','f_sms_loan_v2_action_overdue_distinct_amount_count_1w','f_sms_basic_v2_phone_num_count_2m','f_current_app_first_app_install_days','f_sms_loan_v2_action_raise_distinct_amount_media_2m','f_sms_loan_v2_action_overdue_distinct_amount_count_1m','f_sms_loan_v2_action_amount_distinct_amount_avg_1m','f_sms_loan_v2_action_raise_distinct_amount_media_2w','f_sms_loan_v2_action_remind_distinct_amount_avg_2w','f_app_list_retentive_time_220622_com_facebook_lite','f_current_app_nearly_1_months_app_percentage','f_app_borrowing_avg_loan_and_risk_interval_hours','f_scrapy_apps_entertainment_count','f_intermediate_feature_gender_male_marital_married','f_sms_basic_v2_phone_num_count_3w','f_sms_loan_v2_action_amount_distinct_amount_avg_1w','f_sms_loan_v2_action_remind_distinct_amount_avg_1m','f_sms_loan_v2_action_raise_distinct_amount_media_3m','f_app_borrowing_curr_not_vip005_overdue_count','f_app_crc_classification_ins_amount_overdue_avg','f_sms_loan_v2_action_raise_distinct_amount_media_3w','f_sms_loan_v2_action_remind_distinct_amount_avg_1w','f_sms_loan_v2_action_amount_distinct_amount_avg_2m','f_app_borrowing_curr_vip005_overdue_count','f_sms_basic_v2_phone_num_count_3m','f_scrapy_apps_communication_count','f_sms_loan_v2_action_amount_distinct_amount_avg_2w','f_app_borrowing_curr_all_overdue_amount','f_sms_loan_v2_action_approval_distinct_amount_count_3m','f_sms_loan_v2_action_coupon_3w','f_sms_loan_v2_action_approval_amount_min_1w','f_sms_loan_v2_action_approval_distinct_amount_count_3w','f_sms_loan_v2_action_overdue_distinct_amount_count_3w','f_sms_loan_v2_action_approval_amount_min_1m','f_sms_loan_v2_action_amount_distinct_amount_avg_3m','f_sms_loan_v2_action_overdue_distinct_amount_count_3m','f_sms_loan_v2_action_amount_distinct_amount_avg_3w','f_app_crc_classification_ins_credit_sanctioned_is_zero_sum','f_sms_loan_v2_action_approval_amount_min_2w','f_sms_loan_v2_action_approval_distinct_amount_count_2m','f_sms_loan_v2_action_approval_distinct_amount_count_2w','f_app_crc_classification_ins_legal_flag_sum','f_scrapy_apps_comics_count','f_sms_loan_v2_action_overdue_distinct_amount_count_2w','f_app_crc_classification_prod_type_no_of_accounts_sum','f_sms_loan_v2_action_overdue_1w','f_app_user_personal_residential_type','f_sms_loan_v2_action_approval_distinct_amount_count_1w','f_app_user_personal_marital_status','f_sms_loan_v2_action_overdue_distinct_amount_count_2m','f_app_crc_classification_ins_balance_credit_rate','f_sms_loan_v2_action_approval_amount_min_2m','f_sms_loan_v2_action_overdue_1m','f_sms_loan_v2_action_approval_amount_min_3w','f_sms_loan_v2_action_settle_amount_count_1w','f_sms_loan_v2_action_coupon_1w','f_sms_loan_v2_all_total_3m','f_sms_loan_v2_action_settle_amount_count_1m','f_sms_loan_v2_action_overdue_2w','f_app_list_retentive_time_220622_com_pbank_newcredit','f_sms_loan_v2_action_approval_amount_min_3m','f_sms_loan_v2_all_total_3w','f_sms_loan_v2_action_overdue_2m','f_sms_loan_v2_action_coupon_2m','f_sms_loan_v2_action_overdue_3m','f_app_borrowing_curr_borrowing_num','f_scrapy_apps_books_reference_count','f_sms_loan_v2_action_coupon_2w','f_current_app_nearly_3_months_app_count','f_user_device_sd_total_size','f_intermediate_feature_gender_female_marital_married','f_sms_loan_v2_action_overdue_3w','f_app_user_is_blacklist','f_sms_loan_v2_action_coupon_3m','f_app_user_personal_highest_school_record','f_sms_loan_v2_action_settle_amount_count_3w','f_sms_loan_v2_all_total_1m','f_sms_loan_v2_action_settle_amount_count_3m','f_app_user_permission_sms','f_app_borrowing_min_overdue_days','f_sms_loan_v2_all_total_1w','f_app_crc_classification_ins_oustanding_balance_sum','f_sms_loan_v2_action_settle_amount_count_2w','f_sms_basic_v2_same_opposite_2m_6','f_sms_loan_v2_all_total_2m','f_user_device_resolution_height','f_sms_loan_v2_action_remind_distinct_amount_media_1m','f_app_apply_bvn_status','f_sms_basic_v2_same_opposite_2m_9','f_sms_basic_v2_same_opposite_2m_2','f_sms_loan_v2_action_remind_distinct_amount_avg_3m','f_sms_loan_v2_action_settle_amount_count_2m','f_sms_loan_v2_action_raise_distinct_amount_media_1m','f_sms_loan_v2_action_remind_distinct_amount_avg_3w','f_sms_loan_v2_action_raise_distinct_amount_media_1w','f_app_list_retentive_time_220622_com_tingo_credit_money_nigeria','f_sms_loan_v2_action_coupon_1m','f_sms_loan_v2_all_total_2w','f_sms_basic_v2_same_opposite_2m_20','f_app_user_identity_other_licence_path_is_null','f_sms_basic_v2_same_opposite_2m_25','f_sms_loan_v2_action_raise_2m','f_sms_loan_v2_action_coupon_amount_min_1m','f_sms_loan_v2_action_raise_2w','f_sms_loan_v2_action_coupon_amount_count_1m','f_sms_loan_v2_action_approval_distinct_amount_avg_3w','f_sms_basic_v2_same_opposite_3m_60','f_sms_loan_v2_action_coupon_amount_min_1w','f_sms_loan_v2_action_approval_distinct_amount_avg_3m','f_sms_loan_v2_action_coupon_amount_count_1w','f_app_borrowing_curr_max_overdue','f_sms_loan_v2_action_raise_3m','f_sms_loan_v2_invite_3w','f_sms_basic_v2_same_opposite_2m_12','f_sms_basic_v2_same_opposite_2m_15','f_sms_loan_v2_action_raise_3w','f_sms_loan_v2_action_coupon_amount_count_2m','f_app_user_identity_birthday_age','f_intermediate_feature_gender_male_marital_married_lr','f_sms_loan_v2_action_coupon_amount_count_2w','f_app_borrowing_curr_num','f_have_crc','f_sms_loan_v2_action_raise_distinct_amount_count_1m','f_app_list_retentive_time_220622_com_facebook_orca','f_user_device_can_use_storage_size','f_sms_loan_v2_action_coupon_amount_count_3m','f_app_install_detail_same_app_count','f_sms_loan_v2_action_coupon_amount_count_3w','f_sms_loan_v2_action_raise_distinct_amount_count_2m','f_app_install_detail_same_curr_device_id','f_sms_loan_v2_action_raise_1m','f_sms_loan_v2_action_settle_distinct_amount_avg_3m','f_sms_loan_v2_action_raise_1w','f_scrapy_apps_finance_top_10','f_sms_basic_v2_same_opposite_3m_30','f_sms_loan_v2_action_settle_distinct_amount_avg_3w','f_sms_loan_v2_action_raise_distinct_amount_count_1w','f_app_borrowing_curr_vip005_all_overdue_amount','f_sms_loan_v2_action_distinct_overdue_1w','f_sms_basic_v2_same_opposite_3m_25','f_sms_loan_v2_action_settle_distinct_amount_avg_2m','f_sms_loan_v2_action_raise_distinct_amount_count_3m','f_sms_basic_v2_same_opposite_3m_20','f_sms_loan_v2_action_distinct_overdue_1m','f_sms_loan_v2_action_settle_distinct_amount_avg_2w','f_sms_loan_v2_action_raise_distinct_amount_count_2w','f_app_user_identity_id_card_positive_path_is_null','f_sms_basic_v2_same_opposite_3m_15','f_sms_loan_v2_action_settle_distinct_amount_avg_1m','f_sms_basic_v2_same_opposite_3m_12','f_app_crc_classification_ins_amount_overdue_zero_avg','f_scrapy_apps_art_design_count','f_sms_loan_v2_action_settle_distinct_amount_avg_1w','f_sms_loan_v2_action_raise_distinct_amount_count_3w','f_scrapy_apps_personalization_count','f_sms_basic_v2_same_opposite_1w_6','f_sms_basic_v2_same_opposite_1w_2','f_sms_loan_v2_action_distinct_overdue_3w','f_scrapy_apps_sports_count','f_sms_loan_v2_action_coupon_amount_min_3m','f_app_install_detail_same_uid','f_sms_loan_v2_action_coupon_amount_min_3w','f_user_device_total_storage_size','f_sms_loan_v2_action_distinct_overdue_3m','f_sms_basic_v2_same_opposite_1w_9','f_sms_loan_v2_action_coupon_amount_min_2m','f_scrapy_apps_finance_top_50','f_sms_loan_v2_action_distinct_overdue_2w','f_sms_loan_v2_action_coupon_amount_min_2w','f_app_borrowing_min_prepayment_hours','f_sms_loan_v2_action_distinct_overdue_2m','f_sms_loan_v2_action_distinct_approval_1w','f_sms_basic_v2_same_opposite_1m_60','f_sms_loan_v2_action_remind_amount_count_1w','f_app_borrowing_curr_vip005_max_overdue_amount','f_app_crc_classification_ins_approved_credit_sanctioned_single_avg','f_sms_loan_v2_action_distinct_approval_1m','f_app_list_retentive_time_220622_com_microsoft_office_word','f_app_borrowing_max_overdue_days','f_scrapy_apps_productivity_count','f_scrapy_apps_photography_count','f_sms_loan_v2_action_remind_amount_count_1m','f_app_crc_classification_prod_type_recent_overdue_date_diff_days_avg','f_app_borrowing_curr_overdue_count','f_scrapy_apps_dating_count','f_sms_loan_v2_action_distinct_approval_3w','f_sms_loan_v2_action_distinct_approval_3m','f_app_install_detail_same_device_id','f_intermediate_feature_total_storage_size','f_app_borrowing_curr_not_vip005_max_overdue_amount','f_app_borrowing_max_prepayment_hours','f_sms_loan_v2_action_distinct_approval_2w','f_sms_basic_v2_same_opposite_1m_30','f_app_crc_classification_prod_type_sanctioned_amount_avg','f_sms_loan_v2_action_distinct_approval_2m','f_app_crc_classification_prod_type_amount_overdue_sum','f_sms_basic_v2_same_opposite_3w_6','f_app_user_device_brand_8','f_app_user_device_brand_9','f_sms_loan_v2_action_overdue_distinct_amount_media_1w','f_app_user_device_brand_6','f_sms_basic_v2_same_opposite_3w_2','f_app_user_device_brand_7','f_app_user_device_brand_4','f_app_user_device_brand_5','f_user_device_time_country_15','f_user_device_time_country_14','f_sms_loan_v2_action_overdue_distinct_amount_media_1m','f_sms_basic_v2_same_opposite_1m_20','f_user_device_time_country_11','f_user_device_time_country_10','f_user_device_time_country_13','f_user_device_time_country_12','f_sms_basic_v2_same_opposite_1m_25','f_app_user_device_brand_2','f_app_user_device_brand_3','f_app_user_device_brand_1','f_sms_basic_v2_same_opposite_3w_9','f_sms_basic_v2_same_opposite_2m_60','f_app_borrowing_curr_not_vip005_all_overdue_amount','f_sms_basic_v2_square_count_3w','f_sms_basic_v2_same_opposite_1m_12','f_app_user_identity_gender','f_sms_basic_v2_same_opposite_1m_15','f_app_apply_bvn_mobile','f_sms_loan_v2_action_overdue_distinct_amount_media_3w','f_sms_loan_v2_action_remind_amount_count_3w','f_intermediate_feature_gender_male_marital_single','f_sms_loan_v2_action_approval_distinct_amount_avg_1w','f_sms_loan_v2_action_overdue_distinct_amount_media_3m','f_sms_loan_v2_action_approval_distinct_amount_avg_1m','f_app_crc_classification_ins_credit_sanctioned_is_zero_avg_single','f_sms_loan_v2_action_remind_amount_count_3m','f_app_borrowing_curr_vip005_max_loan_amount','f_sms_loan_v2_action_overdue_distinct_amount_media_2w','f_scrapy_apps_maps_count','f_sms_loan_v2_action_remind_amount_count_2w','f_user_device_resolution_width','f_scrapy_apps_weather_count','f_current_app_nearly_3_months_app_percentage','f_sms_loan_v2_action_approval_distinct_amount_avg_2w','f_sms_loan_v2_action_overdue_distinct_amount_media_2m','f_app_crc_classification_prod_type_amount_overdue_zero_avg','f_sms_loan_v2_action_approval_distinct_amount_avg_2m','f_app_user_location_type','f_sms_loan_v2_action_remind_amount_count_2m','f_sms_basic_v2_same_opposite_2m_30','f_app_list_retentive_time_220622_com_loan_cash_credit_easycredit_nigeria','f_app_user_device_brand_48','f_app_user_device_brand_49','f_app_apply_apply_submit_diff_time','f_app_user_device_brand_44','f_app_user_device_brand_45','f_app_user_device_brand_46','f_app_user_device_brand_47','f_app_user_device_brand_40','f_sms_loan_v2_action_amount_amount_min_2w','f_app_user_device_brand_41','f_app_user_device_brand_42','f_sms_loan_v2_action_overdue_amount_count_3m','f_app_user_device_brand_43','f_sms_loan_v2_action_amount_amount_max_1w','f_app_user_device_brand_50','f_sms_loan_v2_action_overdue_amount_count_2w','f_sms_loan_v2_action_amount_amount_max_1m','f_app_crc_classification_ins_count','f_sms_loan_v2_action_amount_amount_min_3m','f_app_borrowing_avg_overdue_days','f_sms_loan_v2_action_amount_amount_min_3w','f_app_user_device_brand_37','f_app_user_device_brand_38','f_app_user_device_brand_39','f_app_user_device_brand_33','f_app_user_device_brand_34','f_app_user_device_brand_35','f_app_user_device_brand_36','f_app_user_device_brand_30','f_app_user_device_brand_31','f_sms_loan_v2_action_amount_amount_max_2w','f_app_user_device_brand_32','f_sms_loan_v2_action_overdue_amount_count_3w','f_sms_loan_v2_action_amount_amount_max_2m','f_app_borrowing_curr_vip005_outstanding_loan_age','f_sms_loan_v2_action_coupon_distinct_amount_count_1w','f_sms_basic_v2_same_opposite_1m_9','f_sms_loan_v2_action_distinct_remind_2w','f_app_crc_classification_prod_type_total_outstanding_balance_avg','f_sms_basic_v2_send_count_2w','f_sms_basic_v2_same_opposite_1m_6','f_scrapy_apps_business_count','f_sms_loan_v2_action_coupon_distinct_amount_count_1m','f_app_user_device_brand_62','f_sms_loan_v2_action_overdue_amount_count_1m','f_app_user_device_brand_63','f_sms_basic_v2_same_opposite_1m_2','f_app_user_device_brand_64','f_sms_loan_v2_action_settle_amount_min_1m','f_sms_basic_v2_send_count_2m','f_sms_loan_v2_action_amount_amount_min_1m','f_sms_loan_v2_action_settle_amount_min_1w','f_sms_loan_v2_action_distinct_remind_2m','f_app_user_device_brand_59','f_sms_loan_v2_action_distinct_remind_3w','f_sms_basic_v2_send_count_3w','f_app_user_device_brand_55','f_app_user_device_brand_56','f_app_user_device_brand_57','f_app_user_device_brand_58','f_app_user_device_brand_51','f_app_user_device_brand_52','f_sms_loan_v2_action_amount_amount_min_1w','f_app_user_device_brand_53','f_app_user_device_brand_54','f_sms_loan_v2_action_overdue_amount_count_2m','f_sms_loan_v2_action_distinct_remind_3m','f_sms_basic_v2_send_count_3m','f_app_user_device_brand_60','f_sms_loan_v2_action_distinct_settle_1m','f_app_crc_classification_prod_type_amount_overdue_zero_sum','f_app_user_device_brand_61','f_sms_loan_v2_action_settle_amount_min_2m','f_sms_loan_v2_action_settle_amount_min_2w','f_sms_loan_v2_action_overdue_amount_count_1w','f_sms_loan_v2_action_amount_amount_min_2m','f_sms_loan_v2_action_distinct_settle_1w','f_sms_loan_v2_action_approval_amount_count_2w','f_sms_loan_v2_action_settle_amount_max_2m','f_sms_loan_v2_action_remind_amount_max_3m','f_app_crc_classification_ins_overdue_credit_rate','f_app_crc_classification_prod_type_recent_overdue_date_diff_days_sum','f_sms_loan_v2_action_remind_amount_max_2w','f_sms_loan_v2_action_distinct_settle_2m','f_sms_loan_v2_action_settle_amount_max_1m','f_app_borrowing_curr_user_count','f_intermediate_feature_sd_total_size','f_sms_loan_v2_action_settle_amount_max_1w','f_sms_loan_v2_action_distinct_settle_2w','f_sms_loan_v2_action_approval_amount_count_2m','f_sms_basic_v2_phone_rate_2w','f_sms_loan_v2_action_distinct_remind_1w','f_sms_loan_v2_action_approval_amount_count_3w','f_sms_loan_v2_action_settle_amount_max_3m','f_sms_loan_v2_action_remind_amount_max_2m','f_user_device_cpu_num','f_app_borrowing_curr_not_vip005_outstanding_loan_age','f_sms_loan_v2_action_distinct_settle_3m','f_sms_loan_v2_action_remind_amount_max_1w','f_sms_basic_v2_phone_rate_3m','f_sms_loan_v2_action_approval_amount_count_3m','f_app_crc_classification_ins_credit_overdue_minus','f_sms_loan_v2_action_distinct_settle_3w','f_sms_basic_v2_receive_count_3w','f_sms_basic_v2_phone_rate_3w','f_sms_loan_v2_action_distinct_remind_1m','f_sms_loan_v2_action_settle_amount_max_2w','f_app_user_device_brand_26','f_app_user_device_brand_27','f_intermediate_feature_gender_female_marital_single','f_app_user_device_brand_28','f_app_user_device_brand_29','f_app_user_device_brand_22','f_app_user_device_brand_23','f_sms_loan_v2_action_remind_amount_max_1m','f_app_user_device_brand_24','f_app_user_device_brand_25','f_app_user_device_brand_20','f_app_user_device_brand_21','f_sms_loan_v2_action_settle_amount_max_3w','f_app_user_device_brand_19','f_app_user_device_brand_15','f_sms_loan_v2_action_approval_amount_count_1w','f_app_user_device_brand_16','f_app_user_device_brand_17','f_app_user_device_brand_18','f_app_user_device_brand_11','f_app_user_device_brand_12','f_app_user_device_brand_13','f_app_user_device_brand_14','f_app_user_device_brand_10','f_sms_loan_v2_action_approval_amount_count_1m','f_sms_loan_v2_action_distinct_coupon_2w','f_sms_loan_v2_action_distinct_raise_3m','f_sms_basic_v2_same_opposite_3m_9','f_sms_basic_v2_same_opposite_3m_6','f_sms_loan_v2_action_distinct_coupon_2m','f_app_borrowing_last_rank_select_amount','f_sms_loan_v2_action_distinct_raise_2w','f_app_list_retentive_time_220622_team_opay_pay','f_scrapy_apps_social_count','f_sms_loan_v2_action_distinct_coupon_3w','f_current_app_app_count','f_sms_loan_v2_action_distinct_coupon_3m','f_sms_basic_v2_same_opposite_3m_2','f_sms_loan_v2_action_distinct_raise_3w','f_sms_loan_v2_action_amount_amount_count_3m','f_sms_loan_v2_action_amount_amount_count_3w','f_sms_basic_v2_phone_rate_1w','f_sms_loan_v2_action_distinct_raise_1m','f_sms_basic_v2_phone_rate_2m','f_app_crc_classification_ins_oustanding_balance_avg','f_user_device_mac_address_is_blank','f_sms_loan_v2_action_distinct_coupon_1w','f_sms_loan_v2_action_distinct_raise_2m','f_sms_loan_v2_action_remind_amount_max_3w','f_sms_basic_v2_phone_rate_1m','f_sms_loan_v2_action_distinct_raise_1w','f_app_crc_classification_prod_type_no_acc_last_six_mon_avg','f_intermediate_feature_can_use_storage_size','f_app_list_retentive_time_220622_ng_com_fairmoney_fairmoney','f_sms_loan_v2_action_distinct_coupon_1m','f_app_borrowing_last_select_amount','f_sms_loan_v2_action_coupon_distinct_amount_count_3w','f_sms_loan_v2_action_coupon_distinct_amount_avg_1m','f_sms_loan_v2_action_coupon_distinct_amount_count_3m','f_sms_loan_v2_action_remind_amount_min_3w','f_app_apply_bank_match_percentage','f_sms_loan_v2_action_coupon_distinct_amount_avg_1w','f_sms_loan_v2_action_settle_amount_min_3m','f_app_list_retentive_time_220622_com_loan_cash_credit_okash_nigeria','f_sms_loan_v2_action_coupon_amount_max_3w','f_sms_loan_v2_action_remind_amount_min_3m','f_sms_loan_v2_action_settle_amount_min_3w','f_sms_loan_v2_invite_1m','f_app_crc_classification_ins_amount_overdue_sum','f_sms_loan_v2_action_raise_amount_min_3w','f_sms_basic_v2_send_count_1w','f_sms_loan_v2_action_coupon_distinct_amount_count_2w','f_sms_loan_v2_action_raise_amount_min_3m','f_sms_loan_v2_action_coupon_distinct_amount_count_2m','f_app_crc_classification_ins_approved_credit_sanctioned_sum','f_sms_loan_v2_action_coupon_distinct_amount_avg_2w','f_sms_loan_v2_action_coupon_amount_max_3m','f_sms_basic_v2_send_count_1m','f_sms_loan_v2_action_coupon_amount_max_2w','f_sms_loan_v2_action_coupon_distinct_amount_avg_2m','f_sms_loan_v2_action_settle_3w','f_sms_loan_v2_action_settle_3m','f_sms_loan_v2_action_raise_distinct_amount_avg_1w','f_sms_loan_v2_action_remind_amount_min_1w','f_app_borrowing_curr_max_overdue_amount','f_sms_loan_v2_action_amount_amount_max_3w','f_sms_loan_v2_invite_2w','f_sms_loan_v2_action_coupon_amount_max_2m','f_intermediate_feature_total_memory_size','f_sms_loan_v2_action_coupon_distinct_amount_avg_3w','f_scrapy_apps_android_wear_count','f_sms_loan_v2_action_coupon_amount_max_1w','f_sms_loan_v2_action_amount_amount_max_3m','f_sms_loan_v2_action_remind_amount_min_1m','f_sms_loan_v2_action_raise_distinct_amount_avg_2w','f_sms_loan_v2_action_settle_2w','f_sms_loan_v2_invite_3m','f_sms_loan_v2_action_coupon_distinct_amount_avg_3m','f_app_crc_classification_ins_credit_sanctioned_is_zero_avg','f_sms_loan_v2_action_raise_distinct_amount_avg_2m','f_app_user_street_is_null','f_app_list_retentive_time_220622_cn_xender','f_sms_loan_v2_action_settle_2m','f_sms_loan_v2_action_remind_amount_min_2w','f_sms_loan_v2_invite_1w','f_sms_loan_v2_action_coupon_amount_max_1m','f_app_list_retentive_time_220622_us_zoom_videomeetings','f_sms_loan_v2_action_remind_amount_min_2m','f_sms_loan_v2_action_raise_distinct_amount_avg_3w','f_app_crc_classification_ins_legal_flag_avg','f_sms_loan_v2_invite_2m','f_sms_loan_v2_action_raise_distinct_amount_avg_3m','f_sms_loan_v2_action_settle_1w','f_sms_loan_v2_action_settle_1m','f_scrapy_apps_house_count','f_app_crc_classification_prod_count','f_sms_loan_v2_action_overdue_amount_min_3w','f_sms_loan_v2_action_remind_distinct_amount_media_2m','f_sms_loan_v2_action_amount_distinct_amount_media_1m','f_sms_loan_v2_platform_distinct_1m','f_sms_loan_v2_action_overdue_amount_max_2w','f_sms_loan_v2_action_amount_distinct_amount_media_1w','f_sms_loan_v2_action_overdue_amount_max_2m','f_sms_loan_v2_action_settle_distinct_amount_media_1m','f_sms_loan_v2_platform_distinct_1w','f_sms_loan_v2_action_remind_distinct_amount_media_1w','f_scrapy_apps_health_count','f_app_crc_classification_ins_amount_overdue_zero_sum','f_sms_loan_v2_action_remind_distinct_amount_media_3m','f_app_crc_classification_ins_approved_credit_sanctioned_avg','f_app_crc_classification_ins_credit_sanctioned_is_zero_sum_single','f_sms_loan_v2_action_overdue_amount_max_3w','f_sms_loan_v2_action_amount_distinct_amount_media_2m','f_scrapy_apps_tools_count','f_sms_loan_v2_action_amount_distinct_amount_media_2w','f_sms_basic_v2_same_opposite_2w_30','f_sms_loan_v2_action_raise_amount_max_1m','f_sms_loan_v2_action_overdue_amount_max_3m','f_sms_loan_v2_action_remind_distinct_amount_media_2w','f_sms_loan_v2_action_raise_distinct_amount_avg_1m','f_sms_loan_v2_action_raise_amount_max_1w','f_intermediate_feature_mac_address_is_blank','f_sms_loan_v2_platform_distinct_3w','f_app_user_user_type_4','f_sms_loan_v2_action_settle_distinct_amount_count_3m','f_app_user_user_type_5','f_app_user_user_type_2','f_sms_loan_v2_action_raise_amount_min_2m','f_app_user_user_type_3','f_app_user_user_type_1','f_sms_loan_v2_action_amount_distinct_amount_media_3m','f_sms_loan_v2_platform_distinct_3m','f_sms_loan_v2_action_overdue_amount_min_1w','f_sms_loan_v2_action_overdue_amount_min_2m','f_sms_basic_v2_same_opposite_2w_20','f_sms_loan_v2_action_settle_distinct_amount_count_3w','f_sms_loan_v2_action_remind_distinct_amount_media_3w','f_app_user_identity_loan_use','f_sms_loan_v2_action_amount_distinct_amount_media_3w','f_sms_loan_v2_action_raise_amount_min_2w','f_scrapy_apps_games_count','f_sms_basic_v2_same_opposite_2w_25','f_sms_basic_v2_same_opposite_2w_6','f_sms_loan_v2_platform_distinct_2w','f_app_user_identity_salary_sheet_recently_path_is_null','f_sms_basic_v2_same_opposite_2w_2','f_app_crc_classification_ins_credit_balance_minus','f_sms_loan_v2_action_settle_distinct_amount_count_2m','f_app_crc_classification_ins_no_overdue_amount_single_sum','f_sms_loan_v2_action_raise_amount_min_1m','f_sms_loan_v2_action_overdue_amount_min_2w','f_sms_loan_v2_action_overdue_amount_max_1w','f_app_borrowing_curr_not_vip005_num','f_sms_loan_v2_platform_distinct_2m','f_sms_loan_v2_action_settle_distinct_amount_count_2w','f_sms_loan_v2_action_overdue_amount_min_3m','f_sms_loan_v2_action_overdue_amount_max_1m','f_app_list_retentive_time_220622_com_google_android_talk','f_sms_basic_v2_same_opposite_2w_15','f_sms_basic_v2_same_opposite_2w_9','f_app_borrowing_curr_max_loan_amount','f_sms_loan_v2_action_raise_amount_min_1w','f_app_borrowing_curr_outstanding_loan_age','f_sms_basic_v2_same_opposite_2w_12','f_sms_loan_v2_action_amount_distinct_amount_count_1w','f_app_crc_classification_prod_type_no_acc_last_six_mon_sum','f_sms_loan_v2_action_remind_distinct_amount_count_2w','f_user_device_time_zone_11','f_user_device_time_zone_10','f_sms_loan_v2_action_remind_3w','f_sms_loan_v2_action_remind_distinct_amount_count_2m','f_sms_loan_v2_action_amount_distinct_amount_count_1m','f_sms_loan_v2_action_settle_distinct_amount_count_1m','f_scrapy_apps_shopping_count','f_user_device_is_emulator','f_sms_loan_v2_action_settle_distinct_amount_count_1w','f_sms_loan_v2_action_settle_distinct_amount_media_3w','f_sms_loan_v2_action_amount_distinct_amount_count_2w','f_intermediate_feature_gender_female_age','f_sms_loan_v2_action_remind_distinct_amount_count_3w','f_sms_loan_v2_action_remind_distinct_amount_count_3m','f_sms_loan_v2_action_amount_distinct_amount_count_2m','f_user_device_time_zone_8','f_sms_loan_v2_action_amount_distinct_amount_count_3m','f_user_device_time_zone_7','f_user_device_time_zone_9','f_app_crc_classification_ins_credit_balance_minus_single','f_user_device_time_zone_4','f_user_device_time_zone_3','f_app_apply_bvn_birthday','f_user_device_time_zone_6','f_sms_loan_v2_action_approval_distinct_amount_media_3w','f_user_device_time_zone_5','f_user_device_resolution_total','f_user_device_time_zone_2','f_user_device_time_zone_1','f_sms_loan_v2_action_remind_1m','f_sms_loan_v2_action_amount_distinct_amount_count_3w','f_sms_loan_v2_action_settle_distinct_amount_media_2w','f_app_list_retentive_time_220622_ni_lcredit_money_cash','f_intermediate_feature_gender_male_age_lr','f_app_user_personal_residential_detail_address_len','f_sms_loan_v2_action_remind_1w','f_sms_loan_v2_action_approval_distinct_amount_media_3m','f_sms_loan_v2_action_approval_distinct_amount_media_2m','f_sms_loan_v2_action_raise_amount_max_2m','f_sms_loan_v2_action_amount_amount_count_2m','f_sms_loan_v2_action_settle_distinct_amount_media_3m','f_scrapy_apps_libraries_count','f_sms_loan_v2_action_approval_distinct_amount_media_2w','f_sms_loan_v2_action_raise_amount_max_2w','f_sms_loan_v2_action_amount_amount_count_2w','f_sms_loan_v2_action_remind_2m','f_sms_loan_v2_action_settle_distinct_amount_media_1w','f_app_borrowing_last_loan_and_risk_interval_hours','f_sms_loan_v2_action_remind_2w','f_app_list_retentive_time_220622_com_whatsapp','f_user_device_total_memory_size','f_current_app_nearly_1_months_app_count','f_app_crc_classification_prod_type_no_of_accounts_avg','f_sms_loan_v2_action_approval_distinct_amount_media_1m','f_sms_loan_v2_action_raise_amount_max_3m','f_app_borrowing_curr_not_vip005_max_overdue','f_sms_loan_v2_action_amount_amount_count_1m','f_sms_loan_v2_action_approval_distinct_amount_media_1w','f_sms_loan_v2_action_settle_distinct_amount_media_2m','f_sms_loan_v2_action_amount_amount_count_1w','f_scrapy_apps_medical_count','f_sms_loan_v2_action_raise_amount_max_3w','f_scrapy_apps_lifestyle_count','f_sms_loan_v2_action_remind_3m','f_sms_basic_v2_receive_count_1m','f_scrapy_apps_parenting_count','f_sms_loan_v2_action_coupon_distinct_amount_media_3m','f_sms_basic_v2_same_opposite_3w_15','f_sms_basic_v2_same_opposite_3w_12','f_sms_loan_v2_action_coupon_distinct_amount_media_2w','f_scrapy_apps_auto_vehicles_count','f_sms_basic_v2_same_opposite_3w_20','f_app_list_retentive_time_220622_com_beatha_mkopokaka','f_intermediate_feature_gender_female_age_lr','f_sms_loan_v2_action_coupon_distinct_amount_media_2m','f_sms_basic_v2_same_opposite_3w_25','f_user_device_sd_can_use_size','f_app_crc_classification_ins_accounts_avg','f_sms_loan_v2_action_coupon_distinct_amount_media_1w','f_sms_basic_v2_receive_count_3m','f_sms_basic_v2_same_opposite_3w_30','f_sms_basic_v2_same_opposite_1w_60','f_app_crc_classification_ins_amount_overdue_single_avg','f_sms_basic_v2_receive_count_2w','f_sms_loan_v2_action_approval_amount_max_1m','f_current_app_nearly_2_months_app_count','f_sms_basic_v2_receive_count_2m','f_app_install_detail_same_last_submit_time','f_user_device_network_type_6','f_user_device_network_type_7','f_user_device_network_type_8','f_user_device_network_type_9','f_sms_loan_v2_action_remind_distinct_amount_count_1w','f_sms_loan_v2_action_remind_distinct_amount_count_1m','f_app_list_retentive_time_220622_com_lagos_mintloan_much_money','f_sms_loan_v2_action_coupon_distinct_amount_media_3w','f_user_device_network_type_2','f_user_device_network_type_3','f_user_device_network_type_4','f_user_device_network_type_5','f_sms_basic_v2_receive_count_1w','f_user_device_network_type_1','f_sms_loan_v2_action_amount_3m','f_sms_loan_v2_action_distinct_amount_1m','f_sms_loan_v2_action_amount_3w','f_scrapy_apps_finance_top_100','f_scrapy_apps_news_count','f_sms_basic_v2_same_opposite_3w_60','f_app_user_identity_middle_name_is_null','f_sms_basic_v2_same_opposite_1w_30','f_scrapy_apps_finance_count','f_sms_loan_v2_action_overdue_amount_min_1m','f_app_apply_bvn_name','f_app_crc_classification_ins_overdue_balance_minus','f_sms_basic_v2_same_opposite_1w_25','f_app_crc_classification_prod_type_amount_overdue_avg','f_intermediate_feature_gender_male_age','f_sms_basic_v2_same_opposite_1w_20','f_app_list_retentive_time_220622_com_loancash_android','f_sms_basic_v2_same_opposite_2w_60','f_sms_loan_v2_action_distinct_amount_3m','f_sms_loan_v2_action_distinct_amount_3w','f_sms_basic_v2_same_opposite_1w_12','f_sms_loan_v2_action_distinct_amount_1w','f_sms_basic_v2_same_opposite_1w_15','f_app_list_retentive_time_220622_com_microsoft_office_excel','f_sms_loan_v2_action_distinct_amount_2m','f_scrapy_apps_beauty_count','f_sms_loan_v2_action_distinct_amount_2w'
]

len(feature_num)

749

In [52]:
# 加载StandardScaler
loaded_scaler = joblib.load('scaler.joblib')

In [53]:
# 在需要标准化新数据时使用加载的scaler
X_train_num = loaded_scaler.transform(train_data[feature_num])
X_test_num = loaded_scaler.transform(test_data[feature_num])
X_idt_num = loaded_scaler.transform(IDT_test[feature_num])

## APP 数据处理

In [54]:
# 从 JSON 文件加载 Tokenizer 配置
with open('tokenizer_config_app.json', 'r', encoding='utf-8') as json_file:
    loaded_tokenizer_json = json_file.read()
    tokenizer_app = tf.keras.preprocessing.text.tokenizer_from_json(loaded_tokenizer_json)

In [55]:
# 文本向量化
max_len_app = 150  # 设定应用程序列表的最大长度
vocab_size = len(tokenizer_app.word_index) + 1

11925

In [57]:
X_train_app_seq = tokenizer_app.texts_to_sequences(train_data.preprocessed_app.apply(lambda x: list(x.keys())))
X_test_app_seq = tokenizer_app.texts_to_sequences(test_data.preprocessed_app.apply(lambda x: list(x.keys())))
X_idt_app_seq = tokenizer_app.texts_to_sequences(IDT_test.preprocessed_app.apply(lambda x: list(x.keys())))

In [60]:
X_train_app = pad_sequences(X_train_app_seq, maxlen=max_len_app, padding='post')
X_test_app = pad_sequences(X_test_app_seq, maxlen=max_len_app, padding='post')
X_idt_app = pad_sequences(X_idt_app_seq, maxlen=max_len_app, padding='post')

## 数据合并

In [63]:
train_dataset = tf.data.Dataset.from_tensor_slices(({"text_input": X_train_sms
                                                     , "numeric_input": X_train_num
                                                     , "app_input": X_train_app
                                                    }
                                                    , train_data['d7']))

test_dataset = tf.data.Dataset.from_tensor_slices(({"text_input": X_test_sms
                                                    , "numeric_input": X_test_num
                                                    , "app_input": X_test_app
                                                   }
                                                   , test_data['d7']))

idt_dataset = tf.data.Dataset.from_tensor_slices(({"text_input": X_idt_sms
                                                   , "numeric_input": X_idt_num
                                                   , "app_input": X_idt_app
                                                  }
                                                  , IDT_test['d7']))

In [64]:
# 设置批量大小和缓冲区大小
batch_size = 128
buffer_size = 512

In [65]:
# 打乱并批量处理数据集
train_dataset = train_dataset.shuffle(buffer_size).batch(batch_size)
test_dataset = test_dataset.batch(batch_size)
idt_dataset = idt_dataset.batch(batch_size)

# 模型定义

In [70]:
# 构建TextCNN模型
embedding_dim = 50  # 选择嵌入层的维度
num_filters = 128   # 选择卷积核的数量
filter_sizes = [3, 4, 5]  # 多核卷积的核尺寸

# 定义文本输入
text_input = Input(shape=(max_len_sms,), name="text_input")
embedding_layer = Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len_sms)(text_input)

# 多核卷积
conv_layers = []
for filter_size in filter_sizes:
    conv_layer = Conv1D(filters=num_filters, kernel_size=filter_size, activation='relu')(embedding_layer)
    conv_layers.append(GlobalMaxPooling1D()(conv_layer))

# 合并多核卷积的结果
if len(filter_sizes) > 1:
    merged_conv = concatenate(conv_layers, axis=1)
else:
    merged_conv = conv_layers[0]

# Dropout层
merged_conv = Dropout(0.5)(merged_conv)

# 定义数值特征输入
numeric_input = Input(shape=(len(feature_num),), name="numeric_input")
# 数值特征处理
numeric_input_reshaped = Reshape((len(feature_num), 1))(numeric_input)
# 计算填充长度
padding_length = max_len_sms - len(feature_num)
# 添加填充层
padded_numeric_input = ZeroPadding1D(padding=(0, padding_length))(numeric_input_reshaped)
# 添加卷积层或全连接层
conv1d_numeric = Conv1D(filters=num_filters, kernel_size=3, activation='relu')(padded_numeric_input)
# 池化
global_max_pooling_numeric = GlobalMaxPooling1D()(conv1d_numeric)

# Dropout层
global_max_pooling_numeric = Dropout(0.5)(global_max_pooling_numeric)

# 调整全连接层的形状使其与 merged_conv 的形状相匹配
dense_numeric = Dense(num_filters * len(filter_sizes))(global_max_pooling_numeric)

# 残差连接
residual_layer = Add()([merged_conv, dense_numeric])

# 定义应用程序输入
app_input = Input(shape=(max_len_app,), name="app_input")
embedding_layer_app = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len_app)(app_input)
conv_layer_app = Conv1D(filters=num_filters, kernel_size=3, activation='relu')(embedding_layer_app)
global_max_pooling_app = GlobalMaxPooling1D()(conv_layer_app)

# Dropout层
global_max_pooling_app = Dropout(0.5)(global_max_pooling_app)

# 合并文本、数值特征和应用程序特征
merged_layer = concatenate([residual_layer, global_max_pooling_app, numeric_input])

# 全连接层
dense_layer = Dense(128, activation='relu')(merged_layer)
output_layer = Dense(1, activation='sigmoid')(dense_layer)

# 构建模型
model = Model(inputs=[text_input, app_input, numeric_input], outputs=output_layer)

In [71]:
# 定义损失函数、优化器和评估指标
loss_fn = BinaryCrossentropy()
optimizer = Adam(learning_rate=0.001)
auc_metric = AUC()



In [72]:
# 编译模型
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[AUC()])

In [None]:
model.build(input_shape=(batch_size, max_len_sms, len(feature_num), max_len_app))
# 打印模型概要
model.summary()

In [75]:
# 设置保存模型的回调
checkpoint_filepath = "./checkpoint_num_sms_app"  # 替换为实际路径
model_checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(checkpoint_filepath, "model_{epoch:02d}"),
    monitor='val_auc',  # 监控 AUC 在验证集上的表现
    mode='auto',
    save_best_only=False,
    save_weights_only=False,
    save_format="tf",
    save_freq=1
    
)

In [76]:
early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
tensorboard = tf.keras.callbacks.TensorBoard(log_dir="logs")

In [77]:
# 自定义 Callback 来计算验证集 AUC 并直接输出
class CalculateAUCCallback(Callback):
    def __init__(self, validation_data):
        super(CalculateAUCCallback, self).__init__()
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs=None):
        val_predictions = self.model.predict(self.validation_data.take(len(list(self.validation_data))))
        val_labels = []
        for batch in self.validation_data:
            val_labels.append(batch[1])
        val_labels = np.concatenate(val_labels, axis=0)
        val_auc = roc_auc_score(val_labels, val_predictions)
        print(f'Epoch {epoch + 1}, IDT AUC: {val_auc:.4f}')

In [None]:
# 使用 model.fit() 进行训练
num_epochs = 20
model.fit(train_dataset,
           epochs=num_epochs,
           validation_data=test_dataset,
           callbacks=[
               model_checkpoint_callback,
               early_stopping,
               tensorboard,
               CalculateAUCCallback(validation_data=idt_dataset)
           ]
)