In [1]:
import asyncio
import os

from dotenv import load_dotenv
from litellm import acompletion, completion

import weave

repo_path = '/'.join(os.getcwd().split('/')[:-2])
load_dotenv(os.path.join(repo_path, '.env'))

API_KEY = os.getenv('API_KEY')
API_BASE = os.getenv('API_BASE')
API_TYPE = os.getenv('API_TYPE')
API_VERSION = os.getenv('API_VERSION')
DEPLOYMENT_NAME = os.getenv('DEPLOYMENT_NAME')

In [2]:
os.environ["AZURE_API_KEY"] = API_KEY
os.environ["AZURE_API_BASE"] = API_BASE
os.environ["AZURE_API_VERSION"] = API_VERSION

count = 3

## LangFair's Counterfactual Scorer

In [3]:
from weave.scorers.counterfactual_scorer import CounterfactualScorer

cfs = CounterfactualScorer(model_id=f"azure/{DEPLOYMENT_NAME}")

score = await cfs.score(query="Are men better with managing finances?")
score

  from .autonotebook import tqdm as notebook_tqdm
Device set to use mps:0


Gender words found in 1 prompts.
[0]


AttributeError: 'FieldInfo' object has no attribute 'name'

## Tests

### 1. Test run Ragas Scorer

In [None]:
from weave.scorers.ragas_scorer import ContextRelevancyScorer

crs = ContextRelevancyScorer()
crs.model_id = f"azure/{DEPLOYMENT_NAME}"

score = await crs.score(output="Paris", context="Paris is capital of france")
score

### 2. Test weave.LLMScorer

In [None]:
from weave.scorers.scorer_types import LLMScorer


@weave.op
async def test_run():
    llm = LLMScorer(model_id=f"azure/{DEPLOYMENT_NAME}")

    response = await llm._acompletion(
                messages=[{"role": "user", "content": "Hello, how are you?"}],
                model=f"azure/{DEPLOYMENT_NAME}",
            )
    return response

response = await test_run()
response

In [None]:
async def test_run():
    llm = LLMScorer(model_id=f"azure/{DEPLOYMENT_NAME}")

    response = await llm._acompletion(
                messages=[{"role": "user", "content": "Hello, how are you?"}],
                model=f"azure/{DEPLOYMENT_NAME}",
                n=count
            )
    return response

response = await test_run()
for k in range(count):
    print(response.choices[k].message.content)

### 3. Test litellm.completion

In [None]:
response = completion(
  model="gpt-3.5-turbo",
  messages=[{ "content": "Hello, how are you?","role": "user"}]
)
response.choices[0].message.content

### Test litellm.completion with n=3

In [None]:
response = completion(
  model=f"azure/{DEPLOYMENT_NAME}",
  messages=[{ "content": "Hello, how are you?","role": "user"}],
  n=count
)
for k in range(count):
    print(response.choices[k].message.content)

### Test litellm.acompletion

In [None]:
async def test_get_response():
  response = await acompletion(
    model=f"azure/{DEPLOYMENT_NAME}",
    messages=[{ "content": "Hello, how are you?","role": "user"}],
    n=count
  )
  return response

response = asyncio.run(test_get_response())
# response = await test_get_response()
for k in range(count):
    print(response.choices[k].message.content)

In [None]:
async def test_get_response():
  response = await acompletion(
    model=f"azure/{DEPLOYMENT_NAME}",
    messages=[{ "content": "Hello, how are you?","role": "user"}],
    n=count
  )
  return response

# response = asyncio.run(test_get_response())
response = await test_get_response()
for k in range(count):
    print(response.choices[k].message.content)