In [7]:
# Import library untuk NLP
import re
import nltk
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
# Inisialisasi NLTK
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [9]:
# Daftar gejala dan rekomendasi jamu
gejala_jamu = {
    "batuk": ["Jamu Kunyit Asam", "Jamu Jahe"],
    "demam": ["Jamu Temulawak", "Jamu Brotowali"],
    "sakit kepala": ["Jamu Beras Kencur"],
    "masuk angin": ["Jamu Jahe", "Jamu Temulawak"],
    "nyeri otot": ["Jamu Kencur", "Jamu Brotowali"]
}

In [10]:
# Inisialisasi model BERT dengan Hugging Face Transformers
nlp_model = pipeline("feature-extraction", model="distilbert-base-uncased")


In [11]:
# Fungsi untuk praproses teks
def preprocess_text(text):
    # Mengubah teks menjadi huruf kecil
    text = text.lower()
    # Menghapus karakter spesial
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Tokenisasi
    tokens = word_tokenize(text)
    # Menghapus stopwords
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)

In [12]:
# Fungsi untuk mendapatkan embedding BERT dari teks
def get_embedding(text):
    preprocessed_text = preprocess_text(text)
    embeddings = nlp_model(preprocessed_text)
    return embeddings[0][0]  # Mengambil vektor embedding pertama


In [13]:
# Fungsi untuk rekomendasi jamu menggunakan embedding
def rekomendasi_jamu_bert(gejala_input):
    gejala_embedding = get_embedding(gejala_input)
    rekomendasi = []

    # Loop melalui gejala di dictionary dan menghitung kemiripan kosinus
    for gejala, jamu_list in gejala_jamu.items():
        gejala_ref_embedding = get_embedding(gejala)
        similarity = cosine_similarity([gejala_embedding], [gejala_ref_embedding])[0][0]
        
        # Jika kemiripan di atas threshold (misalnya, 0.7), tambahkan jamu ke rekomendasi
        if similarity > 0.7:
            rekomendasi.extend(jamu_list)

    return list(set(rekomendasi)) if rekomendasi else ["Jamu tidak ditemukan untuk gejala yang dimasukkan"]


In [15]:
# Contoh penggunaan fungsi
gejala_user = input("Masukkan gejala yang Anda alami: ")  # Input gejala pengguna
hasil_rekomendasi = rekomendasi_jamu_bert(gejala_user)


LookupError: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - 'C:\\Users\\ASUS/nltk_data'
    - 'c:\\Users\\ASUS\\AppData\\Local\\Programs\\Python\\Python312\\nltk_data'
    - 'c:\\Users\\ASUS\\AppData\\Local\\Programs\\Python\\Python312\\share\\nltk_data'
    - 'c:\\Users\\ASUS\\AppData\\Local\\Programs\\Python\\Python312\\lib\\nltk_data'
    - 'C:\\Users\\ASUS\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************


In [None]:
# Menampilkan rekomendasi
print("\nRekomendasi jamu untuk gejala Anda:")
for jamu in hasil_rekomendasi:
    print("- " + jamu)