In [0]:
# Databricks notebook source

# 1. Setup
%pip install -r /Workspace/Repos/vthedataeng@gmail.com/wfa_profile_analyzer/requirements.txt
dbutils.library.restartPython()


In [0]:
# COMMAND ----------
# 2. Imports & config

import sys
from pathlib import Path
import pandas as pd
from src.config import load_config
from src.processing.batch_processor import ProfileBatchProcessor
from src.evaluation.compare import evaluate_multi_model_dataframe

CONFIG_PATH = Path("/Workspace/Repos/vthedataeng@gmail.com/wfa_profile_analyzer/config/config.yaml")
data_path = "/Workspace/Users/vthedataeng@gmail.com/wfa_profile_analyzer/about_me_quality_dataset.csv"

config = load_config(CONFIG_PATH)
print(f"Models enabled: {len(config.enabled_models)}")
for i, m in enumerate(config.enabled_models, 1):
    print(f"  {i}. {m.name} -> {m.model_id}")

In [0]:
# COMMAND ----------
# 4. Initialize processor

processor = ProfileBatchProcessor(config)
print(processor.get_model_summary())


In [0]:
# COMMAND ----------
# 5. Provide input DataFrame

df = pd.read_csv(data_path)

print(f"Processing {len(df)} rows with {len(config.enabled_models)} model(s)...")
result_df = processor.process_dataframe(df, input_column=config.data.dataset_column, parse_outputs=True)

display(result_df)


In [0]:
# COMMAND ----------
# 6. Evaluate (if you have human labels)

evaluate_multi_model_dataframe(result_df, config, config.enabled_models)


In [0]:
# COMMAND ----------
# 7. (Optional) Save results

result_df.to_csv("/Workspace/Users/vthedataeng@gmail.com/wfa_profile_analyzer/results.csv", index=False)
print("Saved to /dbfs/tmp/wfa_profile_analysis_results.csv")

print("🎉 Done")
