In [2]:
import os

# 使用os.getcwd()获取当前工作目录
current_working_directory = os.getcwd()

print(current_working_directory)

/Users/jiebaibai/Downloads/eco_fin/news


In [1]:
import os
import django
from django.conf import settings

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'eco_fin.settings')
if not settings.configured:
    django.setup()


In [29]:
from r12 import create_news_representation, create_category_feature_extractor,create_user_click_prediction_model,generate_user_representation_async
# generate_user_representation
from news.models import UserBehavior, News, Tag
from gensim.corpora import Dictionary
from gensim.models import LdaModel


In [18]:
from asgiref.sync import sync_to_async

@sync_to_async
def get_news_items():
    return list(News.objects.all().prefetch_related('news_tag'))
@sync_to_async
def get_user_behaviors():
    return list(UserBehavior.objects.select_related('user', 'news').all())
@sync_to_async
def get_negative_samples(exclude_id, R):
    return list(News.objects.exclude(id=exclude_id).order_by('?')[:R])
@sync_to_async
def get_tag_items():
    return list(Tag.objects.all().order_by('id'))

async def load_data_async():
    news_items = await get_news_items()
    user_behaviors = await get_user_behaviors()
    tag_items = await get_tag_items()
    tags = [tag.tag_name for tag in tag_items]
    return news_items, user_behaviors, tag_items, tags

# 运行异步加载数据
news_data, user_behaviors, tag_items, tags = await load_data_async()


In [4]:
from gensim.corpora import Dictionary
from gensim.models import LdaModel
import os

model_dir = './models_data'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
    # 保存LDA模型和字典
model_path = os.path.join(model_dir, 'lda_model.model')
dictionary_path = os.path.join(model_dir, 'lda_dictionary.dict')
loaded_model = LdaModel.load(model_path)
loaded_dictionary = Dictionary.load(dictionary_path)
# %%

In [5]:
tag_model, tag_encoder = create_category_feature_extractor(tags)
# %%
max_sequence_length = 100  # 最大序列长度
news_vector_dimension = 794  # 新闻向量维度
# %%

In [19]:
import numpy as np


async def prepare_data(user_behaviors, create_news_representation, generate_user_representation, tag_model, tag_encoder,
                 loaded_model, loaded_dictionary, R=5):
    X_user, X_news, y = [], [], []

    for behavior in user_behaviors:
        if behavior.behavior_type in ['Like', 'Read', 'Collect']:
            user_vector = await generate_user_representation(
                behavior.user.userbehavior_set.all(),
                create_news_representation,
                tag_model,
                tag_encoder,
                loaded_model,
                loaded_dictionary,
                max_sequence_length,
                news_vector_dimension
            )
            news_vector = create_news_representation(
                behavior.news,
                tag_model,
                tag_encoder,
                loaded_model,
                loaded_dictionary
            )

            # 添加正样本
            X_user.append(user_vector)
            X_news.append(news_vector)
            y.append(1)  # 正样本标记

            # 随机选择R个负样本
            negative_samples = await get_negative_samples(behavior.news.id, R)
            for negative_news in negative_samples:
                negative_news_vector = create_news_representation(
                    negative_news,
                    tag_model,
                    tag_encoder,
                    loaded_model,
                    loaded_dictionary
                )
                X_user.append(user_vector)
                X_news.append(negative_news_vector)
                y.append(0)  # 负样本标记

    return np.array(X_user), np.array(X_news), np.array(y)



In [21]:
# 假设 prepare_data 已适当调整为直接使用上面加载的数据
X_user, X_news, y = await prepare_data(
    user_behaviors,
    create_news_representation,
    generate_user_representation_async,
    tag_model,
    tag_encoder,
    loaded_model,
    loaded_dictionary,
    R=5
)

(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)
(100, 794)

In [23]:
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau


In [30]:
X_user_train, X_user_test, X_news_train, X_news_test, y_train, y_test = train_test_split(
    X_user, X_news, y, test_size=0.2, random_state=42
)

In [31]:
X_user_train

array([[-0.17223816,  0.1725724 , -0.02786588, ...,  0.19489168,
         0.02933551,  0.03868154],
       [-0.09364134, -0.07559231, -0.26513922, ...,  0.20574391,
         0.10839677, -0.00795333],
       [-0.29684007,  0.2562917 , -0.0477336 , ..., -0.06323379,
         0.3316292 , -0.20746158],
       ...,
       [ 0.03662137,  0.07238084,  0.04675443, ..., -0.12684356,
        -0.01503241, -0.14917055],
       [-0.1221838 , -0.03882807,  0.06110515, ...,  0.19476336,
         0.2256023 , -0.16716121],
       [ 0.1690186 ,  0.2801559 ,  0.02794617, ...,  0.38236198,
        -0.19674996,  0.27985093]], dtype=float32)

In [58]:
import importlib
import r12
importlib.reload(r12)

Path exists.


<module 'r12' from '/Users/jiebaibai/Downloads/eco_fin/news/r12.py'>

In [59]:
import contextlib

model = r12.create_user_click_prediction_model(
    news_vector_dim=X_news_train.shape[2],
    user_vector_dim=X_user_train.shape[1]
)



In [60]:
X_news_train.shape[0]

10123

In [61]:
X_user_train.shape[0]

10123

In [62]:
model.summary()

Model: "model_2498"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 news_input (InputLayer)     [(None, 1, 794)]             0         []                            
                                                                                                  
 flatten_2 (Flatten)         (None, 794)                  0         ['news_input[0][0]']          
                                                                                                  
 user_input (InputLayer)     [(None, 128)]                0         []                            
                                                                                                  
 dense_5 (Dense)             (None, 128)                  101760    ['flatten_2[0][0]']           
                                                                                         

In [63]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

In [1]:
# 训练模型并重定向输出到文件
with open('training_output.txt', 'w') as f, contextlib.redirect_stdout(f):
    history = model.fit(
        [X_user_train, X_news_train],
        y_train,
        validation_split=0.1,
        epochs=30,
        batch_size=16,
        callbacks=[early_stopping, model_checkpoint, reduce_lr],
        verbose=1
    )
    model.save('my_model.keras')
    test_loss, test_accuracy = model.evaluate([X_user_test, X_news_test], y_test, verbose=1)
    print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


NameError: name 'contextlib' is not defined