Skip to content
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ By default, vCache uses:
- `InMemoryEmbeddingMetadataStorage`
- `NoEvictionPolicy`
- `StringComparisonSimilarityEvaluator`
- `DynamicLocalThresholdPolicy` with a maximum failure rate of 2%
- `VerifiedDecisionPolicy` with a maximum failure rate of 2%



Expand All @@ -77,14 +77,14 @@ from vcache.inference_engine.strategies.open_ai import OpenAIInferenceEngine
from vcache.vcache_core.cache.embedding_engine.strategies.open_ai import OpenAIEmbeddingEngine
from vcache.vcache_core.cache.embedding_store.embedding_metadata_storage.strategies.in_memory import InMemoryEmbeddingMetadataStorage
from vcache.vcache_core.similarity_evaluator.strategies.string_comparison import StringComparisonSimilarityEvaluator
from vcache.vcache_policy.strategies.dynamic_local_threshold import DynamicLocalThresholdPolicy
from vcache.vcache_policy.strategies.dynamic_local_threshold import VerifiedDecisionPolicy
from vcache.vcache_policy.vcache_policy import VCachePolicy
from vcache.vcache_core.cache.embedding_store.vector_db import HNSWLibVectorDB, SimilarityMetricType
```
</details>

```python
vcache_policy: VCachePolicy = DynamicLocalThresholdPolicy(delta=0.02)
vcache_policy: VCachePolicy = VerifiedDecisionPolicy(delta=0.02)
vcache_config: VCacheConfig = VCacheConfig(
inference_engine=OpenAIInferenceEngine(),
embedding_engine=OpenAIEmbeddingEngine(),
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/_plotter_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def convert_to_dataframe_from_benchmark(benchmark: "Benchmark") -> tuple:
"tn_list": benchmark.tn_list,
"fn_list": benchmark.fn_list,
"latency_direct_list": benchmark.latency_direct_list,
"latency_vectorq_list": benchmark.latency_vcach_list,
"latency_vectorq_list": benchmark.latency_vcache_list,
}
df = pd.DataFrame(data)

Expand Down
128 changes: 65 additions & 63 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,24 @@
HNSWLibVectorDB,
SimilarityMetricType,
)
from vcache.vcache_core.similarity_evaluator import SimilarityEvaluator
from vcache.vcache_core.similarity_evaluator.strategies.llm_comparison import (
LLMComparisonSimilarityEvaluator,
)
from vcache.vcache_core.similarity_evaluator.strategies.string_comparison import (
StringComparisonSimilarityEvaluator,
)
from vcache.vcache_policy.strategies.dynamic_global_threshold import (
DynamicGlobalThresholdPolicy,
from vcache.vcache_policy.strategies.benchmark_iid_verified import (
BenchmarkVerifiedIIDDecisionPolicy,
)
from vcache.vcache_policy.strategies.dynamic_local_threshold import (
DynamicLocalThresholdPolicy,
from vcache.vcache_policy.strategies.benchmark_static import (
BenchmarkStaticDecisionPolicy,
)
from vcache.vcache_policy.strategies.iid_local_threshold import (
IIDLocalThresholdPolicy,
from vcache.vcache_policy.strategies.benchmark_verified_global import (
BenchmarkVerifiedGlobalDecisionPolicy,
)
from vcache.vcache_policy.strategies.static_global_threshold import (
StaticGlobalThresholdPolicy,
from vcache.vcache_policy.strategies.verified import (
VerifiedDecisionPolicy,
)
from vcache.vcache_policy.vcache_policy import VCachePolicy

Expand Down Expand Up @@ -101,7 +102,7 @@ class Dataset(Enum):
ECOMMERCE_DATASET = "ecommerce_dataset"


class GenerateResultsOnly(Enum):
class GeneratePlotsOnly(Enum):
YES = True
NO = False

Expand All @@ -112,62 +113,37 @@ class GenerateResultsOnly(Enum):


MAX_SAMPLES: int = 60000
CONFIDENCE_INTERVALS_ITERATIONS: int = 5
IS_LLM_JUDGE_BENCHMARK: bool = False
DISABLE_PROGRESS_BAR: bool = True
CONFIDENCE_INTERVALS_ITERATIONS: int = 2
DISABLE_PROGRESS_BAR: bool = False
KEEP_SPLIT: int = 100

RUN_COMBINATIONS: List[
Tuple[EmbeddingModel, LargeLanguageModel, Dataset, GenerateResultsOnly]
Tuple[EmbeddingModel, LargeLanguageModel, Dataset, GeneratePlotsOnly]
] = [
(
EmbeddingModel.GTE,
LargeLanguageModel.LLAMA_3_8B,
Dataset.SEM_BENCHMARK_SEARCH_QUERIES,
GenerateResultsOnly.YES,
Dataset.SEM_BENCHMARK_CLASSIFICATION,
GeneratePlotsOnly.NO,
StringComparisonSimilarityEvaluator(),
),
(
EmbeddingModel.GTE,
LargeLanguageModel.GPT_4O_MINI,
Dataset.SEM_BENCHMARK_ARENA,
GenerateResultsOnly.YES,
),
(
EmbeddingModel.E5_LARGE_V2,
LargeLanguageModel.GPT_4O_MINI,
Dataset.SEM_BENCHMARK_ARENA,
GenerateResultsOnly.YES,
),
(
EmbeddingModel.E5_LARGE_V2,
LargeLanguageModel.LLAMA_3_8B,
Dataset.SEM_BENCHMARK_CLASSIFICATION,
GenerateResultsOnly.YES,
),
(
EmbeddingModel.GTE,
LargeLanguageModel.LLAMA_3_8B,
Dataset.SEM_BENCHMARK_CLASSIFICATION,
GenerateResultsOnly.YES,
),
(
EmbeddingModel.GTE,
LargeLanguageModel.LLAMA_3_70B,
Dataset.SEM_BENCHMARK_CLASSIFICATION,
GenerateResultsOnly.YES,
GeneratePlotsOnly.NO,
LLMComparisonSimilarityEvaluator(),
),
]

BASELINES_TO_RUN: List[Baseline] = [
# Baseline.IID,
# Baseline.GPTCache,
# Baseline.VCacheLocal,
Baseline.VCacheLocal,
# Baseline.BerkeleyEmbedding,
# Baseline.VCacheBerkeleyEmbedding,
]

DATASETS_TO_RUN: List[str] = [Dataset.SEM_BENCHMARK_SEARCH_QUERIES]

STATIC_THRESHOLDS: List[float] = [
0.80,
0.81,
Expand Down Expand Up @@ -220,7 +196,7 @@ def stats_set_up(self):
self.tn_list: List[int] = []
self.fn_list: List[int] = []
self.latency_direct_list: List[float] = []
self.latency_vcach_list: List[float] = []
self.latency_vcache_list: List[float] = []
self.observations_dict: Dict[str, Dict[str, float]] = {}
self.gammas_dict: Dict[str, float] = {}
self.t_hats_dict: Dict[str, float] = {}
Expand Down Expand Up @@ -291,6 +267,9 @@ def test_run_benchmark(self):
if not is_cache_hit:
latency_vcache += llm_generation_latency

# This is important for the async logic
time.sleep(0.002)

# 3) Update Stats
self.update_stats(
is_cache_hit=is_cache_hit,
Expand Down Expand Up @@ -465,7 +444,7 @@ def dump_results_to_json(self):
"tn_list": self.tn_list,
"fn_list": self.fn_list,
"latency_direct_list": self.latency_direct_list,
"latency_vectorq_list": self.latency_vcach_list,
"latency_vectorq_list": self.latency_vcache_list,
"observations_dict": self.observations_dict,
"gammas_dict": self.gammas_dict,
"t_hats_dict": self.t_hats_dict,
Expand Down Expand Up @@ -498,12 +477,8 @@ def __run_baseline(
timestamp: str,
delta: float,
threshold: float,
similarity_evaluator: SimilarityEvaluator,
):
if IS_LLM_JUDGE_BENCHMARK:
similarity_evaluator = LLMComparisonSimilarityEvaluator()
else:
similarity_evaluator = StringComparisonSimilarityEvaluator()

vcache_config: VCacheConfig = VCacheConfig(
inference_engine=BenchmarkInferenceEngine(),
embedding_engine=BenchmarkEmbeddingEngine(),
Expand Down Expand Up @@ -547,8 +522,15 @@ def main():

timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")

for embedding_model, llm_model, dataset, generate_results_only in RUN_COMBINATIONS:
for (
embedding_model,
llm_model,
dataset,
generate_plots_only,
similarity_evaluator,
) in RUN_COMBINATIONS:
try:
print(f"DatasetPath: {datasets_dir}, Dataset: {dataset.value}")
dataset_file = os.path.join(datasets_dir, f"{dataset.value}.json")
logging.info(
f"Running benchmark for dataset: {dataset}, embedding model: {embedding_model.value[1]}, LLM model: {llm_model.value[1]}\n"
Expand All @@ -557,7 +539,10 @@ def main():

#####################################################
### Baseline: vCache Local
if Baseline.VCacheLocal in BASELINES_TO_RUN and not generate_results_only:
if (
Baseline.VCacheLocal in BASELINES_TO_RUN
and not generate_plots_only.value
):
for delta in DELTAS:
for i in range(0, CONFIDENCE_INTERVALS_ITERATIONS):
path = os.path.join(
Expand All @@ -575,19 +560,23 @@ def main():
)

__run_baseline(
vcache_policy=DynamicLocalThresholdPolicy(delta=delta),
vcache_policy=VerifiedDecisionPolicy(delta=delta),
path=path,
dataset_file=dataset_file,
embedding_model=embedding_model.value,
llm_model=llm_model.value,
timestamp=timestamp,
delta=delta,
threshold=-1,
similarity_evaluator=similarity_evaluator,
)

#####################################################
### Baseline: vCache Global
if Baseline.VCacheGlobal in BASELINES_TO_RUN and not generate_results_only:
if (
Baseline.VCacheGlobal in BASELINES_TO_RUN
and not generate_plots_only.value
):
for delta in DELTAS:
path = os.path.join(
results_dir,
Expand All @@ -604,21 +593,24 @@ def main():
)

__run_baseline(
vcache_policy=DynamicGlobalThresholdPolicy(delta=delta),
vcache_policy=BenchmarkVerifiedGlobalDecisionPolicy(
delta=delta
),
path=path,
dataset_file=dataset_file,
embedding_model=embedding_model.value,
llm_model=llm_model.value,
timestamp=timestamp,
delta=delta,
threshold=-1,
similarity_evaluator=similarity_evaluator,
)

#####################################################
### Baseline: Berkeley Embedding
if (
Baseline.BerkeleyEmbedding in BASELINES_TO_RUN
and not generate_results_only
and not generate_plots_only.value
):
for threshold in STATIC_THRESHOLDS:
if embedding_model == EmbeddingModel.E5_MISTRAL_7B:
Expand Down Expand Up @@ -650,21 +642,24 @@ def main():
logging.info(f"Using static threshold: {threshold}")

__run_baseline(
vcache_policy=StaticGlobalThresholdPolicy(threshold=threshold),
vcache_policy=BenchmarkStaticDecisionPolicy(
threshold=threshold
),
path=path,
dataset_file=dataset_file,
embedding_model=berkeley_embedding_model.value,
llm_model=llm_model.value,
timestamp=timestamp,
delta=-1,
threshold=threshold,
similarity_evaluator=similarity_evaluator,
)

#####################################################
### Baseline: vCache + Berkeley Embedding
if (
Baseline.VCacheBerkeleyEmbedding in BASELINES_TO_RUN
and not generate_results_only
and not generate_plots_only.value
):
for delta in DELTAS:
for i in range(0, CONFIDENCE_INTERVALS_ITERATIONS):
Expand Down Expand Up @@ -699,19 +694,20 @@ def main():
)

__run_baseline(
vcache_policy=DynamicLocalThresholdPolicy(delta=delta),
vcache_policy=VerifiedDecisionPolicy(delta=delta),
path=path,
dataset_file=dataset_file,
embedding_model=berkeley_embedding_model.value,
llm_model=llm_model.value,
timestamp=timestamp,
delta=delta,
threshold=-1,
similarity_evaluator=similarity_evaluator,
)

#####################################################
### Baseline: IID Local
if Baseline.IID in BASELINES_TO_RUN and not generate_results_only:
if Baseline.IID in BASELINES_TO_RUN and not generate_plots_only.value:
for delta in DELTAS:
for i in range(0, CONFIDENCE_INTERVALS_ITERATIONS):
path = os.path.join(
Expand All @@ -729,19 +725,22 @@ def main():
)

__run_baseline(
vcache_policy=IIDLocalThresholdPolicy(delta=delta),
vcache_policy=BenchmarkVerifiedIIDDecisionPolicy(
delta=delta
),
path=path,
dataset_file=dataset_file,
embedding_model=embedding_model.value,
llm_model=llm_model.value,
timestamp=timestamp,
delta=delta,
threshold=-1,
similarity_evaluator=similarity_evaluator,
)

#####################################################
### Baseline: GPTCache
if Baseline.GPTCache in BASELINES_TO_RUN and not generate_results_only:
if Baseline.GPTCache in BASELINES_TO_RUN and not generate_plots_only.value:
for threshold in STATIC_THRESHOLDS:
path = os.path.join(
results_dir,
Expand All @@ -756,14 +755,17 @@ def main():
logging.info(f"Using static threshold: {threshold}")

__run_baseline(
vcache_policy=StaticGlobalThresholdPolicy(threshold=threshold),
vcache_policy=BenchmarkStaticDecisionPolicy(
threshold=threshold
),
path=path,
dataset_file=dataset_file,
embedding_model=embedding_model.value,
llm_model=llm_model.value,
timestamp=timestamp,
delta=-1,
threshold=threshold,
similarity_evaluator=similarity_evaluator,
)

#####################################################
Expand Down
6 changes: 3 additions & 3 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
from vcache.vcache_core.similarity_evaluator.strategies.string_comparison import (
StringComparisonSimilarityEvaluator,
)
from vcache.vcache_policy.strategies.dynamic_local_threshold import (
DynamicLocalThresholdPolicy,
from vcache.vcache_policy.strategies.verified import (
VerifiedDecisionPolicy,
)
from vcache.vcache_policy.vcache_policy import VCachePolicy

vcache_policy: VCachePolicy = DynamicLocalThresholdPolicy(delta=0.02)
vcache_policy: VCachePolicy = VerifiedDecisionPolicy(delta=0.02)
vcache_config: VCacheConfig = VCacheConfig(
inference_engine=OpenAIInferenceEngine(),
embedding_engine=OpenAIEmbeddingEngine(),
Expand Down
5 changes: 5 additions & 0 deletions tests/ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ vCache includes both **unit tests** and **integration tests** to ensure correctn
Unit tests verify the **logic of individual module strategies** (e.g., caching policies, embedding engines, similarity evaluators) in isolation.
They are designed to be fast, deterministic, and independent of external services.

#### Running Unit Tests

```bash
python -m pytest tests/unit/
```


### Integration Tests
Expand Down
Loading