In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import re

# CSV 파일 로드
file_path = '/content/drive/MyDrive/computer_parts.csv'
computer_parts = pd.read_csv(file_path)
# 데이터 정제 함수 정의
def clean_text(text):
    # 정규 표현식을 사용하여 문자열에서 알파벳과 숫자만 남기고 나머지 제거
    cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return cleaned_text.strip()

# 데이터셋의 각 열에 대해 정제 적용
computer_parts['name'] = computer_parts['name'].apply(clean_text)
computer_parts['brand'] = computer_parts['brand'].apply(clean_text)
computer_parts['category'] = computer_parts['category'].apply(clean_text)
computer_parts['price'] = computer_parts['price'].apply(clean_text)
# 데이터 확인
print(computer_parts)

                         name   price     brand category
0        Intel Core i712700KF  284953     INTEL      CPU
1           AMD Ryzen 7 5800X  246712       AMD      CPU
2            AMD Ryzen 5 5500  115363       AMD      CPU
3           AMD Ryzen 9 5900X  346131       AMD      CPU
4           AMD Ryzen 7 5700X  223734       AMD      CPU
..                        ...     ...       ...      ...
217                      M170   16700  Logitech    Mouse
218  G304 LIGHTSPEED WIRELESS   31250  Logitech    Mouse
219                  SMM1300Q   39900   Samsung    Mouse
220                SPAJMALPUB    9000   Samsung    Mouse
221                 SMB1500QG   19800   Samsung    Mouse

[222 rows x 4 columns]


In [None]:
computer_parts['price'] = pd.to_numeric(computer_parts['price'], errors='coerce')
computer_parts.dropna(subset=['price'], inplace=True)

In [None]:
# 필요한 컬럼만 선택
features = ['name', 'category', 'brand', 'price']
computer_parts = computer_parts[features]

# NaN 값 처리
computer_parts.fillna('', inplace=True)

# 데이터프레임 기본 정보 확인
print(computer_parts.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 222 entries, 0 to 221
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   name      222 non-null    object
 1   category  222 non-null    object
 2   brand     222 non-null    object
 3   price     222 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 7.1+ KB
None


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def combine_features(row):
    return row['name'] + ' ' + row['category'] + ' ' + row['brand']

computer_parts['combined_features'] = computer_parts.apply(combine_features, axis=1)

# TF-IDF 벡터화
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(computer_parts['combined_features'])
print(tfidf_matrix)

# 코사인 유사도 계산
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
print(cosine_sim)

# 예산 분배 비율 설정
budget_distribution = {
    '게임용': {'CPU': 0.2, 'GPU': 0.3, 'RAM': 0.15, 'PC CASE': 0.05, 'MOTHERBOARD': 0.1, 'GRAPHIC CARD': 0.2},
    '가정용': {'CPU': 0.25, 'RAM': 0.2, 'PC CASE': 0.1, 'MOTHERBOARD': 0.15, 'PSU': 0.15, 'CABLE': 0.05, '방열판': 0.05, 'SSD': 0.05},
    '디자인용': {'CPU': 0.2, 'GPU': 0.2, 'RAM': 0.2, 'PC CASE': 0.05, 'MOTHERBOARD': 0.1, 'GRAPHIC CARD': 0.15, 'PSU': 0.05, 'CABLE': 0.025, '방열판': 0.025},
    '연구용': {'CPU': 0.3, 'RAM': 0.25, 'MOTHERBOARD': 0.15, 'PSU': 0.15, 'SSD': 0.1, 'KEYBOARD': 0.025, 'MONITOR': 0.025}
}

  (0, 150)	0.27814830845644645
  (0, 211)	0.48573259860177986
  (0, 147)	0.3852096971328584
  (0, 218)	0.7336967019097137
  (1, 63)	0.5995038974915855
  (1, 319)	0.363302917571964
  (1, 97)	0.6378752649075504
  (1, 150)	0.3189376324537752
  (2, 57)	0.5995038974915855
  (2, 319)	0.363302917571964
  (2, 97)	0.6378752649075504
  (2, 150)	0.3189376324537752
  (3, 64)	0.5995038974915855
  (3, 319)	0.363302917571964
  (3, 97)	0.6378752649075504
  (3, 150)	0.3189376324537752
  (4, 60)	0.5995038974915855
  (4, 319)	0.363302917571964
  (4, 97)	0.6378752649075504
  (4, 150)	0.3189376324537752
  (5, 59)	0.5995038974915855
  (5, 319)	0.363302917571964
  (5, 97)	0.6378752649075504
  (5, 150)	0.3189376324537752
  (6, 80)	0.5995038974915855
  :	:
  (214, 127)	0.5562330112151859
  (214, 355)	0.5017423825852572
  (215, 128)	0.6624646547191454
  (215, 127)	0.5562330112151859
  (215, 355)	0.5017423825852572
  (216, 270)	0.5481623777954931
  (216, 242)	0.7020913749598405
  (216, 237)	0.4545170060367284
  

In [None]:
# 추천 함수 정의
def recommend_computer_parts(usage, budget, preferred_brand='', cosine_sim=cosine_sim, data=computer_parts):
    # 용도에 맞는 예산 분배 비율 가져오기
    if usage in budget_distribution:
        parts_distribution = budget_distribution[usage]
    else:
        print(f"Usage '{usage}' is not defined in budget distribution.")
        return pd.DataFrame()

    selected_parts = []

    # 각 파트별로 예산 계산하고 추천 부품 선택
    for part, ratio in parts_distribution.items():
        category_budget = budget * ratio

        # 카테고리에 해당하는 부품 필터링
        filtered_parts = data[data['category'] == part]

        # 예산 내에 맞는 부품만 선택
        affordable_parts = filtered_parts[filtered_parts['price'] <= category_budget]

        if not affordable_parts.empty:
            if preferred_brand:
                # 선호 브랜드 필터링
                preferred_parts = affordable_parts[affordable_parts['brand'] == preferred_brand]
                if not preferred_parts.empty:
                    selected_part = preferred_parts.sort_values(by='price').iloc[0]
                else:
                    selected_part = affordable_parts.sort_values(by='price').iloc[0]
            else:
                selected_part = affordable_parts.sort_values(by='price').iloc[0]
        else:
            print(f"No parts within budget for category '{part}'.")
            selected_part = pd.Series({'name': 'Not Found', 'category': part, 'brand': '', 'price': float('inf')})

        selected_parts.append(selected_part)

    return selected_parts

In [None]:
def get_user_input():
    usage = input("사용 용도를 입력하세요 (게임용, 가정용, 디자인용, 연구용): ")
    budget = float(input("예산을 입력하세요 (원 단위): "))
    preferred_brand = input("선호하는 브랜드가 있으면 입력하세요 (없으면 엔터): ").strip()
    return usage, budget, preferred_brand

In [None]:
usage, budget, preferred_brand = get_user_input()
recommended_parts = recommend_computer_parts(usage, budget, preferred_brand)

print("\n추천 부품 리스트:")
sum = 0
for part in recommended_parts:
    print(f"부품명: {part['name']}, 카테고리: {part['category']}, 브랜드: {part['brand']}, 가격: {part['price']} 원")
    sum += part['price']
print("총 가격 : ", sum)

사용 용도를 입력하세요 (게임용, 가정용, 디자인용, 연구용): 게임용
예산을 입력하세요 (원 단위): 4000000
선호하는 브랜드가 있으면 입력하세요 (없으면 엔터): 

추천 부품 리스트:
부품명: AMD Ryzen 5 4500, 카테고리: CPU, 브랜드: AMD, 가격: 101267 원
부품명: KAER AMD Radeon RX 580, 카테고리: GPU, 브랜드: KAER, 가격: 131989 원
부품명: Crucial RAM 8GB DDR4, 카테고리: RAM, 브랜드: CRUCIAL, 가격: 25597 원
부품명: Cooler Master MasterBox Q300L MicroATX Tower, 카테고리: PC CASE, 브랜드: COOLER MASTER, 가격: 53349 원
부품명: ASUS Prime B450MA II AMD AM4 Ryzen 5000 3rd2nd1st Gen Ryzen Micro ATX Motherboard, 카테고리: MOTHERBOARD, 브랜드: ASUS, 가격: 80280 원
부품명: RTX 3080 12GB, 카테고리: GRAPHIC CARD, 브랜드: NVIDIA, 가격: 639000 원
총 가격 :  1031482
