## 📌 Khmer Sentiment Analysis using Pretrained Hugging Face Model
## Based on Open-Source Model: tykea/khmer-text-sentiment-analysis-roberta

In [1]:
# Install Required Libraries
!pip install transformers datasets torch --quiet


In [2]:
# Import Libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import pandas as pd


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load Pretrained Khmer Sentiment Model
model_name = "tykea/khmer-text-sentiment-analysis-roberta"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

khmer_sentiment = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)


Device set to use mps:0


In [4]:
# Test Sample Khmer Sentences
sample_texts = [
    "ខ្ញុំស្រលាញ់ការសេវាកម្មនេះ។",  # I love this service.
    "ផលិតផលមិនល្អឡើយ។",  # The product is not good.
    "ការដឹកជញ្ជូនយឺតបន្តិច។",  # Delivery is a bit slow.
]

for text in sample_texts:
    result = khmer_sentiment(text)
    print(f"📝 Text: {text}\n🔍 Prediction: {result}\n")


📝 Text: ខ្ញុំស្រលាញ់ការសេវាកម្មនេះ។
🔍 Prediction: [{'label': 'LABEL_1', 'score': 0.9864945411682129}]

📝 Text: ផលិតផលមិនល្អឡើយ។
🔍 Prediction: [{'label': 'LABEL_0', 'score': 0.965076744556427}]

📝 Text: ការដឹកជញ្ជូនយឺតបន្តិច។
🔍 Prediction: [{'label': 'LABEL_0', 'score': 0.8326029777526855}]



In [10]:
import pandas as pd
df = pd.read_csv("khmer_sentiment_sample.csv")

# Predict sentiment for all reviews
df["predicted_sentiment"] = df["Review"].apply(lambda x: khmer_sentiment(x)[0]["label"])

df.head()


Unnamed: 0,Review,SENTIMEN,predicted_sentiment
0,ខ្ញុំស្រលាញ់ផលិតផលនេះ។,positive,LABEL_1
1,សេវាកម្មល្អណាស់។,positive,LABEL_1
2,ខ្ញុំពេញចិត្តជាមួយនឹងការទិញនេះ។,positive,LABEL_1
3,វាជាបទពិសោធន៍ដ៏អស្ចារ្យ។,positive,LABEL_1
4,ការផ្តល់សេវាជួសជុលលឿន និងមានប្រសិទ្ធភាព។,positive,LABEL_1


In [18]:
# Predict Sentiment for All Reviews
df["predicted_sentiment"] = df["Review"].apply(lambda x: khmer_sentiment(x)[0]["label"])

df.head(15)


Unnamed: 0,Review,SENTIMEN,predicted_sentiment
0,ខ្ញុំស្រលាញ់ផលិតផលនេះ។,positive,LABEL_1
1,សេវាកម្មល្អណាស់។,positive,LABEL_1
2,ខ្ញុំពេញចិត្តជាមួយនឹងការទិញនេះ។,positive,LABEL_1
3,វាជាបទពិសោធន៍ដ៏អស្ចារ្យ។,positive,LABEL_1
4,ការផ្តល់សេវាជួសជុលលឿន និងមានប្រសិទ្ធភាព។,positive,LABEL_1
5,ខ្ញុំសូមណែនាំឲ្យមនុស្សផ្សេងទៀត។,positive,LABEL_1
6,ផលិតផលមានគុណភាពខ្ពស់។,positive,LABEL_1
7,វាមានតម្លៃសមរម្យនឹងគុណភាព។,positive,LABEL_1
8,សេវាកម្មអតិថិជនល្អណាស់។,positive,LABEL_1
9,បុគ្គលិករួសរាយរាក់ទាក់។,positive,LABEL_1


In [13]:
# Save Results
df.to_csv("predicted_khmer_sentiment.csv", index=False)
print("✅ Prediction results saved to: dataset/predicted_khmer_sentiment.csv")


✅ Prediction results saved to: dataset/predicted_khmer_sentiment.csv


In [19]:
# Evaluate Accuracy if You Have True Labels


from sklearn.metrics import classification_report
print(classification_report(df["SENTIMEN"], df["predicted_sentiment"]))


              precision    recall  f1-score   support

     LABEL_0       0.00      0.00      0.00       0.0
     LABEL_1       0.00      0.00      0.00       0.0
    negative       0.00      0.00      0.00      28.0
    positive       0.00      0.00      0.00      20.0

    accuracy                           0.00      48.0
   macro avg       0.00      0.00      0.00      48.0
weighted avg       0.00      0.00      0.00      48.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
