In [1]:
import numpy as np
import pandas as pd
import pickle
import os
from gensim.models import Word2Vec
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime, timedelta

In [3]:
# 현재 디렉토리 설정
current_dir = os.getcwd()  
# Word2Vec 모델 로드
word2vec_machinery = Word2Vec.load(os.path.join(current_dir, 'models', 'word2vec_machinery.model'))
word2vec_assembly = Word2Vec.load(os.path.join(current_dir, 'models', 'word2vec_assembly.model'))
word2vec_supplier = Word2Vec.load(os.path.join(current_dir, 'models', 'word2vec_supplier.model'))

# 고정 피처 데이터 로드
df_fixed = pd.read_csv(os.path.join(current_dir, '1001_features.csv'))

In [4]:
# 벡터화 함수
def sentence_vector(sentence, model):
    vectors = [model.wv[word] for word in sentence if word in model.wv]
    if len(vectors) > 0:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(model.vector_size)


In [7]:
# 벡터 사전 계산
def precompute_combined_vectors(df, word2vec_machinery, word2vec_assembly, word2vec_supplier):
    df['machinery_vector'] = df['cleaned_machinery'].apply(lambda x: sentence_vector(x.split(), word2vec_machinery))
    df['assembly_vector'] = df['cleaned_assembly'].apply(lambda x: sentence_vector(x.split(), word2vec_assembly))
    df['supplier_vector'] = df['cleaned_supplier'].apply(lambda x: sentence_vector(x.split(), word2vec_supplier))  # 이미 분할됨
    df['combined_vector'] = df.apply(lambda row: np.hstack((row['machinery_vector'], row['assembly_vector'], row['supplier_vector'])), axis=1)
    return df

In [8]:
# 벡터 사전 계산
df_fixed = precompute_combined_vectors(df_fixed, word2vec_machinery, word2vec_assembly, word2vec_supplier)


In [9]:
# combined_vector를 별도의 .npy 파일로 저장
combined_vectors = np.vstack(df_fixed['combined_vector'].values)
np.save(os.path.join(current_dir, 'combined_vectors.npy'), combined_vectors)


In [11]:
df_fixed.drop(['machinery_vector', 'assembly_vector', 'supplier_vector', 'combined_vector'], axis=1).to_csv(os.path.join(current_dir, 'features_fixed.csv'), index=False)


print("벡터 사전 계산 및 저장 완료!")

벡터 사전 계산 및 저장 완료!
