# Metrics

https://docs.ragas.io/en/stable/concepts/metrics/index.html

In [None]:
'''
!pip install datasets
!pip install ragas
!pip install transformers
!pip install numpy
!pip install scipy
!pip install python-dotenv
'''

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")

## 1. Faithfulness

In [None]:
from datasets import load_dataset, Dataset
from ragas.metrics import faithfulness
from ragas import evaluate

# 데이터셋 로드
portmis_dataset = load_dataset("ysm99/port-mis-data-ver2", split="train")

# data_samples 초기화
data_samples = {
    'question': [],
    'answer': [],
    'contexts': []
}

# 데이터 추가
for row in portmis_dataset:
    data_samples['question'].append(row['instruction'])
    data_samples['answer'].append(row['output'])
    data_samples['contexts'].append([row['input']])

dataset = Dataset.from_dict(data_samples)
score = evaluate(dataset,metrics=[faithfulness])
score.to_pandas()

In [None]:
score_df = score.to_pandas()

# faithfulness 값 평균
faithfulness_mean = score_df['faithfulness'].mean()
print(f"Faithfulness: {faithfulness_mean}")

## 2. Answer Relevance

In [None]:
from ragas.metrics import answer_relevancy

score = evaluate(dataset,metrics=[answer_relevancy])
score.to_pandas()

In [None]:
score_df = score.to_pandas()

# answer_relevancy 값 평균
answer_relevancy_mean = score_df['answer_relevancy'].mean()
print(f"Answer_relevancy: {answer_relevancy_mean}")

## 3. Context Recall

In [None]:
from datasets import load_dataset, Dataset
from ragas.metrics import context_recall
from ragas import evaluate

# 데이터셋 로드
portmis_dataset = load_dataset("ysm99/port-mis-data-ver2", split="train")

# data_samples 초기화
data_samples = {
    'question': [],
    'answer': [],
    'contexts': [],
    'ground_truth': []
}

# 데이터 추가
for row in portmis_dataset:
    data_samples['question'].append(row['instruction'])
    data_samples['answer'].append(row['output'])
    data_samples['contexts'].append([row['input']])
    data_samples['ground_truth'].append(row['input'])

dataset = Dataset.from_dict(data_samples)
score = evaluate(dataset,metrics=[context_recall])
score.to_pandas()

In [None]:
score_df = score.to_pandas()

# context_recall 값 평균
context_recall_mean = score_df['context_recall'].mean()
print(f"Context_recall: {context_recall_mean}")

## 4. Context Precision

In [None]:
from ragas.metrics import context_precision

score = evaluate(dataset,metrics=[context_precision])
score.to_pandas()

In [None]:
score_df = score.to_pandas()

# context_precision 값 평균
context_precision_mean = score_df['context_precision'].mean()
print(f"Context_precision: {context_precision_mean}")