In [1]:
import time
# Start timer to time the notebook execution
start = time.time()

import os
import yaml
from dotmap import DotMap
from deepeval.synthesizer import Synthesizer
from os import listdir
from os.path import isfile, join
import ipywidgets as widgets
widgets.IntSlider()

from deepeval.synthesizer import Evolution

import sys
utils_path = "../../08-Utils"
if utils_path not in sys.path:
    sys.path.append(utils_path)
    
from helpers import (
    CustomSynthesizerModel,
    CustomEmbeddingModel,
    goldens_to_pandas,
    save_testset,
)

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
# Load configuration file
with open('../../07-Starter_Pack_config/improved_rag_config.yaml', 'r') as file:
    config = yaml.safe_load(file)
config = DotMap(config)

In [3]:
%%time

# Get the KB documents' file paths
PDF_FILES_PATH = config.deep_eval.kb_doc_dir
pdf_files = [f"{PDF_FILES_PATH}/{f}" for f in listdir(PDF_FILES_PATH) if isfile(join(PDF_FILES_PATH, f))]

# Instantiate the synthesizer and the embedder language models
deep_eval_cfg =  config.deep_eval
synth_llm = CustomSynthesizerModel(deep_eval_cfg.synth_llm_cfg)
embedding_model = CustomEmbeddingModel(deep_eval_cfg.embedding_cfg)

# Instantiate the synthesizer engine
synthesizer = Synthesizer(
        model=synth_llm,
        embedder=embedding_model,
        async_mode=deep_eval_cfg.synth_llm_cfg.async_mode,
)

# Generate the Q&A test set
synthesizer.generate_goldens_from_docs(
        document_paths=pdf_files,
        max_goldens_per_document=config.deep_eval.max_goldens_per_doc,
        include_expected_output=config.deep_eval.include_expected_output,
        chunk_size=config.deep_eval.chunk_size,
        chunk_overlap=config.deep_eval.chunk_overlap,
        num_evolutions=config.deep_eval.num_evolutions,
        evolutions=[
                Evolution.MULTICONTEXT,
                Evolution.REASONING,
                Evolution.COMPARATIVE,
                Evolution.IN_BREADTH,
        ],
)

Output()

Generating embeddings:   0%|          | 0/404 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/719 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/746 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/1710 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/595 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/81 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/495 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/700 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/822 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/531 [00:00<?, ?it/s]

CPU times: user 2min, sys: 3.74 s, total: 2min 4s
Wall time: 5min 22s


[Golden(input='Explore the application of space-derived cardiovascular monitoring technologies in predicting and preventing motor vehicle accidents.', actual_output=None, expected_output='Space-derived cardiovascular monitoring technologies, such as the Ecosan-2007, have been used to examine motor vehicle drivers and detect early health issues, with a study showing that over 30% of bus drivers were in prenosological and premorbid states, increasing the risk of motor vehicle accidents.', context=['165Heart Health and Biorhythms\nStudying spaceflight effects on the cardiovascular system has led to the creation of unique  \ninstruments that can be used on Earth for the detection of the earliest deviations in health status.  \nThese technologies are now used to examine motor vehicle drivers and civil aviation pilots to evaluate risks and prevent accidents. Twenty-four-hour electrocardiograms of astronauts were also analyzed to understand the space environment’s effect on biological rhythm 

In [4]:
# Display the generated test set as a Pandas dataframe
test_set_df = goldens_to_pandas(synthesizer.synthetic_goldens)
display(test_set_df)

Unnamed: 0,query,reference_answer
0,Explore the application of space-derived cardi...,Space-derived cardiovascular monitoring techno...
1,How do space cardiology research and technolog...,"Space cardiology research and technologies, su..."
2,Investigate the effects of microgravity on pla...,"Mazars, Christian, et al. ""Microsome-associate..."
3,Investigate the effects of microgravity on mou...,The effects of microgravity on mouse skeletal ...
4,What risks did Carl Sagan highlight in asteroi...,Carl Sagan highlighted the dangers of asteroid...
5,What specific asteroid-related danger did Carl...,Asteroid deflection.
6,What is the mean diameter of the satellite orb...,About 500 meters.
7,Compare the roles of Arecibo and Goldstone rad...,Arecibo and Goldstone radar observations playe...
8,Compare the objectives and outcomes of Zond 7 ...,"Zond 7, 5, and 6 all had circumlunar objective..."
9,What was the USSR's primary launch vehicle and...,The USSR's primary launch vehicle for lunar an...


In [6]:
print(f"Q/A test set successfully generated")
save_testset(test_set_df, config)
stop = time.time()
print(f"Synthesizer execution time: {(stop-start)/60:.1f} minutes")

Q/A test set successfully generated
Evaluation set saved at location: current/NASA_history_QA__Meta-Llama-3-70B-Instruct__generated.csv. 
Synthesizer execution time: 14.6 minutes
