In [None]:
# Setup
import pandas as pd
from analyzer import analyze_profiles, load_config
from evaluate import evaluate_all_models
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load configuration
config = load_config('config.yaml')

# Define output paths for results
RESULTS_OUTPUT_PATH = 'profile_analysis_results.csv'  # Change this to your desired folder path
METRICS_OUTPUT_PATH = 'model_metrics_{timestamp}.csv'  # Change this to your desired folder path

# Show enabled models
enabled_models = [m for m in config['models'] if m.get('enabled', True)]
print(f"Enabled models: {len(enabled_models)}")
for m in enabled_models:
    print(f"  - {m['name']}: {m['model_id']}")

In [None]:
# Load data
df = pd.read_csv('about_me_quality_dataset.csv')
print(f"Loaded {len(df)} profiles")
print(f"Columns: {list(df.columns)}")

# Preview
df.head()

In [None]:
# Run analysis

results = analyze_profiles(
    df,
    config,  # Pass the full config
    input_col=config.get('input_column', 'about_me'),
    batch_size=config.get('batch_size', 10),
    max_new_tokens=config.get('max_new_tokens', 2000)
)

In [None]:
# View results
model_names = [m['name'] for m in enabled_models]
display_cols = ['about_me'] + [f'{name}_quality' for name in model_names]
results[display_cols].head(10)

In [None]:

# Evaluate models against human labels
if config.get('human_label_column', 'Human_flag') in results.columns:
    # Evaluate all models and get comparison
    comparison = evaluate_all_models(
        results, 
        model_names,
        true_col=config['human_label_column'],
        print_individual_reports=False  # Set to True if you want detailed reports
    )
    
    # Save metrics
    timestamp = datetime.now().strftime('%Y-%m-%d_%H%M%S')
    metrics_filename = METRICS_OUTPUT_PATH.format(timestamp=timestamp)
    comparison.to_csv(metrics_filename, index=False)
    print(f"\nModel metrics saved to: {metrics_filename}")
else:
    print("No human labels found for evaluation")

In [None]:
# Save results
results.to_csv(RESULTS_OUTPUT_PATH, index=False)
print(f"Results saved to {RESULTS_OUTPUT_PATH}")

In [None]:
# Quick analysis of bad profiles
for model_name in model_names:
    quality_col = f'{model_name}_quality'
    tags_col = f'{model_name}_tags'
    
    bad_profiles = results[results[quality_col] == 'bad']
    print(f"\n{model_name}: {len(bad_profiles)} bad profiles")
    
    # Count tags
    all_tags = []
    for tags in bad_profiles[tags_col]:
        if isinstance(tags, list):
            all_tags.extend(tags)
    
    if all_tags:
        tag_counts = pd.Series(all_tags).value_counts()
        print(tag_counts)