In [1]:
#!pip install openai
#!pip install langchain
#!pip install langchain_openai
#!pip install chromadb

In [2]:
import pandas as pd
import os
import openai


from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders.csv_loader import CSVLoader

from langchain.vectorstores import Chroma # 向量資料庫

In [3]:
import os
api_key = '' # your openai api key
os.environ['OPENAI_API_KEY'] = api_key

## 資料預處理

In [4]:
anime = pd.read_csv('anime_with_synopsis.csv')
anime.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ..."
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...


In [5]:
# 移除缺失值
anime = anime.dropna()

In [6]:
# 結合標題、概要和類型
anime['combined_info'] = anime.apply(lambda row: f"Title: {row['Name']}. Overview: {row['sypnopsis']} Genres: {row['Genres']}", axis=1)
anime['combined_info'][0]

'Title: Cowboy Bebop. Overview: In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to keep peace in the galaxy, aided in part by outlaw bounty hunters, referred to as "Cowboys." The ragtag team aboard the spaceship Bebop are two such individuals. Mellow and carefree Spike Spiegel is balanced by his boisterous, pragmatic partner Jet Black as the pair makes a living chasing bounties and collecting rewards. Thrown off course by the addition of new members that they meet in their travels—Ein, a genetically engineered, highly intelligent Welsh Corgi; femme fatale Faye Valentine, an enigmatic trickster with memory loss; and the strange computer whiz kid Edward Wong—the crew embarks on thrilling adventures that unravel each member\'s dark and mysterious past little by little. Well-balanced with high density action and light-hearted comedy, Cowboy Bebo

In [7]:
anime[['combined_info']].to_csv('anime_updated.csv', index=False)

In [8]:
df = pd.read_csv('anime_updated.csv')
df

Unnamed: 0,combined_info
0,Title: Cowboy Bebop. Overview: In the year 207...
1,Title: Cowboy Bebop: Tengoku no Tobira. Overvi...
2,Title: Trigun. Overview: Vash the Stampede is ...
3,Title: Witch Hunter Robin. Overview: ches are ...
4,Title: Bouken Ou Beet. Overview: It is the dar...
...,...
16201,Title: Daomu Biji Zhi Qinling Shen Shu. Overvi...
16202,Title: Mieruko-chan. Overview: ko is a typical...
16203,Title: Higurashi no Naku Koro ni Sotsu. Overvi...
16204,Title: Yama no Susume: Next Summit. Overview: ...


## 使用 Langchain 的Loader和Vector DB

In [9]:
# data loader
loader = CSVLoader(file_path="anime_updated.csv", encoding="utf-8")
data = loader.load()

# TextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

# embeddings model
embeddings = OpenAIEmbeddings(openai_api_key=api_key)

#Vector DB
docsearch = Chroma.from_documents(texts, embeddings)

**Querying Vector DB Store for movie recommendation**

In [10]:
query = "我正在尋找一部動作片。 你能給我什麼建議？"
docs = docsearch.similarity_search(query, k=1)
docs

[Document(page_content='combined_info: Title: Wanmei Shijie. Overview: No synopsis information has been added to this title. Help improve our database by adding a synopsis here . Genres: Action, Adventure, Fantasy', metadata={'row': 16152, 'source': 'anime_updated.csv'})]

## 使用 QA 檢索進行動漫推薦

In [11]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

  warn_deprecated(


In [12]:
qa = RetrievalQA.from_chain_type(llm,
                  chain_type="stuff",
                  retriever=docsearch.as_retriever(),
                  return_source_documents=True)

In [13]:
query = "我正在尋找一部動作片。 你能給我什麼建議？"
result = qa({"query": query})
result['result']

  warn_deprecated(


'根據您提供的信息，這裡有一些動作片的建議：\n\n1. 《萬妹世界》：動作、冒險、奇幻類型的影片。\n2. 《擊敗獸神》：動作、冒險類型的影片。\n3. 《西遊奇燈》：一部關於經典《蓮花燈》故事的木偶動畫影片，同樣是動作、冒險、奇幻類型的作品。\n4. 《西遊奇燈 (2006)》：這部影片是1984年的續集，同樣是動作、冒險、奇幻類型的作品。\n\n希望這些建議能幫助您找到合適的動作片！'

In [14]:
result['source_documents'][0]

Document(page_content='combined_info: Title: Wanmei Shijie. Overview: No synopsis information has been added to this title. Help improve our database by adding a synopsis here . Genres: Action, Adventure, Fantasy', metadata={'row': 16152, 'source': 'anime_updated.csv'})

## Prompt Engineering

## 範例1

In [15]:
from langchain.prompts import PromptTemplate

template = """你是推薦系統，可以幫助使用者找到符合他們喜好的動漫。使用以下上下文來回答最後的問題。
        對於每個問題，建議三部動漫，並簡要描述情節以及用戶可能喜歡它的原因。
        如果你不知道答案，就說你不知道。

{context}

Question: {question}
Your response:"""


PROMPT = PromptTemplate(
    template=template, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}

llm=ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)

qa = RetrievalQA.from_chain_type(llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs)

query = "我正在尋找一部動作片。 你能給我什麼建議？"
result = qa({'query':query})
print(result['result'])

根據您對動作片的喜好，我建議您觀看以下三部動漫：「Wanmei Shijie」是一部動作、冒險、奇幻類型的動漫，雖然沒有提供劇情概要，但可能會吸引您的注意。另外，「Ji Jia Shou Shen」和「Xiyue Qi Tong」也是動作、冒險類型的動漫，可能會符合您的口味。希望您會喜歡這些推薦！


## 範例2 - 附加user_info

In [19]:
from langchain.prompts import PromptTemplate

template_prefix = """你是推薦系統，可以幫助使用者找到符合他們喜好的動漫。使用以下上下文來回答最後的問題。
            對於每個問題，建議三部動漫，請考慮上下文和使用者提供的個人資訊。
            如果你不知道答案，就說你不知道。

{context}"""

user_info = """下面是我們對用戶的了解，您可以使用下面這些資訊來更好地調整您的推薦：
Age: {age}
Gender: {gender}"""

template_suffix= """Question: {question}
Your response:"""

user_info = user_info.format(age = 30, gender = 'male')

COMBINED_PROMPT = template_prefix +'\n'+ user_info +'\n'+ template_suffix
print(COMBINED_PROMPT)

你是推薦系統，可以幫助使用者找到符合他們喜好的動漫。使用以下上下文來回答最後的問題。
            對於每個問題，建議三部動漫，請考慮上下文和使用者提供的個人資訊。
            如果你不知道答案，就說你不知道。

{context}
下面是我們對用戶的了解，您可以使用下面這些資訊來更好地調整您的推薦：
Age: 30
Gender: male
Question: {question}
Your response:


In [20]:
PROMPT = PromptTemplate(
    template=COMBINED_PROMPT, input_variables=["context", "question"])

chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs)

query = "我正在尋找一部動作片。 你能給我什麼建議？"
result = qa({'query':query})
print(result['result'])

根據您的喜好，我建議您觀看以下三部動漫： 
1. Wanmei Shijie - 這部動漫結合了動作、冒險和奇幻元素，可能符合您的口味。
2. Ji Jia Shou Shen - 這部動漫也是動作和冒險類型，您可能會喜歡。
3. Xiyue Qi Tong - 這部動漫是關於經典的蓮花燈故事，同樣包含動作和冒險元素。

希望這些推薦對您有幫助！如果您對這些動漫有興趣，可以嘗試觀看一下。


In [18]:
result['source_documents']

[Document(page_content='combined_info: Title: Wanmei Shijie. Overview: No synopsis information has been added to this title. Help improve our database by adding a synopsis here . Genres: Action, Adventure, Fantasy', metadata={'row': 16152, 'source': 'anime_updated.csv'}),
 Document(page_content='combined_info: Title: Ji Jia Shou Shen. Overview: No synopsis information has been added to this title. Help improve our database by adding a synopsis here . Genres: Action, Adventure', metadata={'row': 15759, 'source': 'anime_updated.csv'}),
 Document(page_content='combined_info: Title: Xiyue Qi Tong. Overview: puppet stop-motion film about the classic Lotus Lantern Chinese story. Genres: Action, Adventure, Fantasy', metadata={'row': 10207, 'source': 'anime_updated.csv'}),
 Document(page_content='combined_info: Title: Xiyue Qi Tong (2006). Overview: he end of the 1984 film "Part One End" was shown. After 20 years Shanghai Animation Film Studio continued the story for now aimed at the parents w