# Experimentation results

## Using filtering system with 10 augmented and 1 real images processed through GPT Vision

## Pascal Results 0-1

In [5]:
import os
import pandas as pd
import glob

def count_images_in_folder(folder_path):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif']
    return sum(len([f for f in files if os.path.splitext(f)[1].lower() in image_extensions])
               for _, _, files in os.walk(folder_path))

def process_csv(file_path, target_epc, filtered_dir):
    df = pd.read_csv(file_path)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation') & (df['examples_per_class'] == target_epc)]
    if not filtered_df.empty:
        best_result = filtered_df['value'].max()
        filename = os.path.basename(file_path)
        strategy_folder = os.path.join(filtered_dir, f"filtered-pascal-0-{target_epc}", filename.replace('.csv', ''))
        image_count = count_images_in_folder(strategy_folder) if os.path.exists(strategy_folder) else 0
        result = pd.DataFrame({
            'filename': [filename],
            'examples_per_class': [target_epc],
            'value': [best_result],
            'image_count': [image_count]
        })
        return result
    return pd.DataFrame()

def analyze_pascal_results(examples_per_class_list):
    results = {}
    outperforming_strategies = {}

    for epc in examples_per_class_list:
        print(f"\n--- Analysis for pascal-7-{epc} ---")
        
        csv_files = glob.glob(f'my-results-final-10/*--*.csv')
        filtered_dir = 'my-results-final-7'
        
        if not csv_files:
            print(f"No files found for pascal-7-{epc}")
            continue
        
        print(f"Found {len(csv_files)} files for processing")
        
        all_results = pd.concat([process_csv(file, epc, filtered_dir) for file in csv_files], ignore_index=True)
        all_results = all_results.dropna()
        
        if all_results.empty:
            print(f"No valid results found for pascal-7-{epc}")
            continue
        
        all_results_sorted = all_results.sort_values('value', ascending=False)
        
        print("All results:")
        display(all_results_sorted[:20])
        
        baseline_files = all_results_sorted[all_results_sorted['filename'].str.contains(f'pascal-baseline-7-', case=False)]
        
        if baseline_files.empty:
            print(f"Warning: No baseline file found for pascal-7-{epc}")
            print("Available filenames:")
            print(all_results_sorted['filename'].tolist())
            continue
        
        baseline_score = baseline_files['value'].values[0]
        
        outperforming = all_results_sorted[all_results_sorted['value'] > baseline_score]
        
        print(f"\nStrategies outperforming baseline (score: {baseline_score:.6f}):")
        for _, row in outperforming.iterrows():
            print(f"{row['filename']}: {row['value']:.6f} (Images: {row['image_count']})")
        
        results[epc] = all_results_sorted
        outperforming_strategies[epc] = outperforming
        
        best_strategy = all_results_sorted.iloc[0]
        print(f"\nBest strategy for pascal-7-{epc}:")
        print(f"Filename: {best_strategy['filename']}")
        print(f"Examples per class: {best_strategy['examples_per_class']}")
        print(f"Best accuracy: {best_strategy['value']:.6f}")
        print(f"Image count: {best_strategy['image_count']}")

    return results, outperforming_strategies

# List of examples_per_class to analyze
examples_per_class_list = [1]

# Run the analysis
results, outperforming_strategies = analyze_pascal_results(examples_per_class_list)

# You can now access the results and outperforming strategies for each examples_per_class
# For example, to get the outperforming strategies for pascal-0-4:
# outperforming_strategies[4]


--- Analysis for pascal-7-1 ---
Found 68 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
62,pascal-7-1-zscore_class-1_50.csv,1,0.660103,0
35,pascal-7-1-top_n_overall-0_80.csv,1,0.657374,0
2,pascal-7-1-zscore_top_n_class-2-0_5.csv,1,0.654912,0
18,pascal-7-1-top_n_class-0_50.csv,1,0.653394,0
22,pascal-7-1-top_n_overall-0_70.csv,1,0.650016,0
7,pascal-7-1-zscore_top_n_class-2-0_7.csv,1,0.649492,0
10,pascal-7-1-zscore_top_n_class-2-0_6.csv,1,0.649308,0
16,pascal-7-1-top_n_class-0_90.csv,1,0.649185,0
9,pascal-7-1-zscore_top_n_overall-2-0_9.csv,1,0.648057,0
50,pascal-7-1-top_n_overall-0_90.csv,1,0.647554,0



Strategies outperforming baseline (score: 0.646176):
pascal-7-1-zscore_class-1_50.csv: 0.660103 (Images: 0)
pascal-7-1-top_n_overall-0_80.csv: 0.657374 (Images: 0)
pascal-7-1-zscore_top_n_class-2-0_5.csv: 0.654912 (Images: 0)
pascal-7-1-top_n_class-0_50.csv: 0.653394 (Images: 0)
pascal-7-1-top_n_overall-0_70.csv: 0.650016 (Images: 0)
pascal-7-1-zscore_top_n_class-2-0_7.csv: 0.649492 (Images: 0)
pascal-7-1-zscore_top_n_class-2-0_6.csv: 0.649308 (Images: 0)
pascal-7-1-top_n_class-0_90.csv: 0.649185 (Images: 0)
pascal-7-1-zscore_top_n_overall-2-0_9.csv: 0.648057 (Images: 0)
pascal-7-1-top_n_overall-0_90.csv: 0.647554 (Images: 0)
pascal-7-1-percentile_overall-0_10.csv: 0.647554 (Images: 0)
pascal-7-1-zscore_top_n_class-2-0_8.csv: 0.646670 (Images: 0)
pascal-7-1-zscore_top_n_overall-2-0_8.csv: 0.646283 (Images: 0)

Best strategy for pascal-7-1:
Filename: pascal-7-1-zscore_class-1_50.csv
Examples per class: 1
Best accuracy: 0.660103
Image count: 0


In [4]:
results

{1:                                            filename  examples_per_class  \
 62                 pascal-7-1-zscore_class-1_50.csv                   1   
 35                pascal-7-1-top_n_overall-0_80.csv                   1   
 2           pascal-7-1-zscore_top_n_class-2-0_5.csv                   1   
 18                  pascal-7-1-top_n_class-0_50.csv                   1   
 22                pascal-7-1-top_n_overall-0_70.csv                   1   
 ..                                              ...                 ...   
 28    pascal-7-1-percentile_top_n_class-0_3-0_7.csv                   1   
 51     pascal-7-1-percentile_columns_class-0_30.csv                   1   
 45     pascal-7-1-percentile_columns_class-0_40.csv                   1   
 11  pascal-7-1-percentile_top_n_overall-0_3-0_8.csv                   1   
 66  pascal-7-1-percentile_top_n_overall-0_3-0_7.csv                   1   
 
        value  image_count  
 62  0.660103            0  
 35  0.657374            

In [1]:
import os
import pandas as pd
import glob

def process_csv(file_path):
    df = pd.read_csv(file_path)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation')]
    best_results = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
    best_results['filename'] = os.path.basename(file_path)
    return best_results

# Get all CSV files with 'pascal-0-1' in the name
csv_files = glob.glob('test-researchers-pascal/*-0-1*.csv')

# Process all files and concatenate results
all_results = pd.concat([process_csv(file) for file in csv_files], ignore_index=True)

# Reorder columns to have filename first
all_results = all_results[['filename', 'examples_per_class', 'value']]

# Sort by value (accuracy) in descending order
all_results_sorted = all_results.sort_values('value', ascending=False)

print("All results:")
display(all_results_sorted)

All results:


Unnamed: 0,filename,examples_per_class,value
4,results-0-1-aug.csv,1,0.662918
2,my-pascal-0-1-aug-v1.csv,1,0.661101
3,my-pascal-0-1-no-aug-v2.csv,1,0.660327
5,my-pascal-0-1-no-aug-v1.csv,1,0.659004
0,my-pascal-0-1-aug-v2.csv,1,0.656475
1,results-0-1-no-aug.csv,1,0.649085


In [40]:
def process_csv(file_path):
    df = pd.read_csv(file_path)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation')]
    best_results = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
    best_results['filename'] = os.path.basename(file_path)
    return best_results

# Get all CSV files with 'pascal-0-1' in the name
csv_files = glob.glob('test-researchers-pascal/*-0-2*.csv')

# Process all files and concatenate results
all_results = pd.concat([process_csv(file) for file in csv_files], ignore_index=True)

# Reorder columns to have filename first
all_results = all_results[['filename', 'examples_per_class', 'value']]

# Sort by value (accuracy) in descending order
all_results_sorted = all_results.sort_values('value', ascending=False)

print("All results:")
display(all_results_sorted)

All results:


Unnamed: 0,filename,examples_per_class,value
5,results-0-2-aug.csv,2,0.711716
3,results-0-2-no-aug.csv,2,0.691173
1,my-pascal-0-2-no-aug-v1.csv,2,0.688558
0,pascal-0-2-no-aug-v2.csv,2,0.685982
4,results-0-2-aug.csv,1,0.662918
2,results-0-2-no-aug.csv,1,0.649085


In [38]:
0.691173 - 0.688558

0.002615000000000034

In [41]:
0.691173 - 0.685982

0.005191000000000057

### Old prompts to OpenAI

In [None]:
# prompt = f"""Analyze these images of class {class_name}. The first image is real and serves as reference.
# For each of the remaining 10 augmented images, compare them to the real one and provide float scores from 0 to 1 for:
# Index: from 1 to 10 in order of the augmented images
# a) Quality: Overall visual fidelity and clarity compared to the real one.
# b) Realism: How well it matches real-world expectations set by the reference image.
# c) Relevance: How well it represents a {class_name} compared to the reference image.
# d) Detail Preservation: Retention of important class-specific features.

# Respond ONLY with a list of JSON objects, one for each non-reference image, in this format:
# [
#   {{"index": 1, "quality": 0.0, "realism": 0.0, "relevance": 0.0, "detail_preservation": 0.0, "explanation": "Brief explanation."}}
# ]
# """

In [None]:
prompt = f"""You are evaluating augmented images for an image classification task. The first image of class {class_name} is real and serves as reference.
For each of the remaining 10 augmented images, please evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:

Index: from 1 to 10 in order of the augmented images

1. Semantic Similarity: How well does the image maintain the core semantic content of the original class? (0 = completely different, 1 = identical)

2. Feature Preservation: Are key features that define the class still present and recognizable? (0 = no key features preserved, 1 = all key features preserved)

3. Diversity: How different is the image from the reference while still maintaining class identity? (0 = identical, 1 = maximum beneficial diversity)

4. Visual Quality: Is the image clear, well-formed, and free from obvious artifacts? (0 = poor quality, 1 = excellent quality)

5. Realism: Does the image look natural and plausible for the given class? (0 = completely unrealistic, 1 = indistinguishable from a real photo)

Provide your response ONLY as a list of JSON objects one for each non-reference augmented image, in this format:

[
  {{"index": 1, "semantic_similarity": 0.0, "feature_preservation": 0.0, "diversity": 0.0, "visual_quality": 0.0, "realism": 0.0, "explanation": "Brief explanation focusing on changes and their impact on classification."}}
]
"""

In [None]:
prompt = f"""You are evaluating augmented images for an image classification task. The first image of class {class_name} is real and serves as reference.
For each of the remaining 10 augmented images, please evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:
Index: from 1 to 10 in order of the augmented images
1. Semantic Similarity: How well does the image maintain the core semantic content of the original class? (0 = completely different, 1 = identical)
2. Feature Preservation: Are key features that define the class still present and recognizable? (0 = no key features preserved, 1 = all key features preserved)
3. Diversity: How different is the image from the reference while still maintaining class identity? (0 = identical, 1 = maximum beneficial diversity)
4. Visual Quality: Is the image clear, well-formed, and free from obvious artifacts? (0 = poor quality, 1 = excellent quality)
5. Realism: Does the image look natural and plausible for the given class? (0 = completely unrealistic, 1 = indistinguishable from a real photo)
6. Contextual Consistency: How well does the image maintain the appropriate context or environment for the class? (0 = completely inconsistent, 1 = perfectly consistent)
7. Pose/Viewpoint Variation: How well does the image provide a useful variation in pose or viewpoint while remaining identifiable? (0 = no variation, 1 = optimal variation)
8. Lighting/Color Variation: How effectively does the image introduce meaningful changes in lighting or color that could aid in generalization? (0 = no variation, 1 = optimal variation)
9. Class Distinguishability: How easily distinguishable is this class from other potentially similar classes in this augmented version? (0 = easily confused with other classes, 1 = highly distinguishable)
Provide your response ONLY as a list of JSON objects one for each non-reference augmented image, in this format:
[
  {{"index": 1, "semantic_similarity": 0.0, "feature_preservation": 0.0, "diversity": 0.0, "visual_quality": 0.0, "realism": 0.0, "contextual_consistency": 0.0, "pose_viewpoint_variation": 0.0, "lighting_color_variation": 0.0, "class_distinguishability": 0.0, "explanation": "Brief explanation focusing on changes and their impact on classification."}}
]
"""

## prompt v0

In [None]:
prompt = f"""You are an expert computer vision model tasked with evaluating the quality of augmented images for training a robust image classifier. The first image of class {{class_name}} is real and serves as a reference. For each of the remaining 10 augmented images, evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:

1. Diversity: How different is this augmentation from the original image in terms of visual and semantic attributes? (0 = identical, 1 = maximum beneficial diversity)

2. Realism: How realistic and plausible does the augmented image look? (0 = completely unrealistic, 1 = indistinguishable from a real image)

3. Feature Preservation: How well does the augmentation maintain the critical features that define the class of the original image? (0 = critical features lost, 1 = all critical features perfectly preserved)

4. Contextual Variation: How well does the augmentation introduce new contexts or backgrounds while maintaining relevance? (0 = no contextual change, 1 = highly relevant new context)

5. Robustness Potential: How likely is this augmentation to help the model learn invariance to irrelevant transformations or resistance to noise? (0 = unlikely to improve robustness, 1 = highly likely to improve robustness)

6. Edge Case Representation: Does this augmentation represent an edge case or rare scenario that could improve the model's generalization? (0 = common scenario, 1 = highly valuable edge case)

7. Overall Quality: Considering all factors, how valuable is this augmentation for training a classifier? (0 = detrimental to training, 1 = extremely valuable)

8. Class Consistency: How well does the augmented image maintain the same class as the original image? (0 = class completely changed, 1 = class perfectly maintained)

9. Artifact-Free: To what extent is the augmented image free from unnatural artifacts or distortions that could negatively impact training? (0 = severe artifacts present, 1 = completely artifact-free)

10. Unique Contribution: How much does this augmentation contribute unique aspects not present in other augmentations? (0 = redundant with other augmentations, 1 = highly unique contribution)

Provide your response as a list of JSON objects, one for each non-reference augmented image, in this format:

[
  {{
    "index": 1,
    "diversity": 0.0,
    "realism": 0.0,
    "feature_preservation": 0.0,
    "contextual_variation": 0.0,
    "robustness_potential": 0.0,
    "edge_case_representation": 0.0,
    "overall_quality": 0.0,
    "class_consistency": 0.0,
    "artifact_free": 0.0,
    "unique_contribution": 0.0,
    "explanation": "Brief explanation of how changes affect classification and quality, including any improvement suggestions"
  }}
]

After evaluating all images, provide a brief summary (max 3 sentences) of the overall quality of the augmentation set, highlighting strengths and areas for improvement.
"""

## prompt v1

In [None]:
prompt = f"""You are evaluating augmented images for an image classification task. The first image of class {class_name} is real and serves as reference.
For each of the remaining 10 augmented images, please evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:
Index: from 1 to 10 in order of the augmented images
1. Class Representation: How well does the image maintain the core semantic content, key features, and distinguishability of the original class? (0 = completely irrelevant or indistinguishable, 1 = perfectly relevant, feature-complete, and highly distinguishable)
2. Visual Fidelity: Is the image clear, well-formed, free from artifacts, natural-looking, and in an appropriate context for the given class? (0 = poor quality, unrealistic, or out of context, 1 = excellent quality, realistic, and in perfect context)
3. Structural Integrity: How well does the image maintain the overall structure and proportions expected for the class? (0 = severely distorted structure, 1 = perfect structural integrity)
4. Diversity: How different is the image from the reference while still maintaining class identity? (0 = identical, 1 = maximum beneficial diversity)
5. Beneficial Variations: How effectively does the image introduce meaningful changes in pose, viewpoint, lighting, or color that could aid in generalization? (0 = no variation, 1 = optimal variation)
Provide your response as a list of JSON objects, one for each non-reference augmented image, in this format:
[
{{
"index": 1,
"class_representation": 0.0,
"visual_fidelity": 0.0,
"structural_integrity": 0.0,
"diversity": 0.0,
"beneficial_variations": 0.0,
"explanation": "Brief explanation of how changes affect classification and quality"
}}
]
"""

## prompt v2

In [None]:
prompt = f"""You are an expert computer vision model evaluating augmented images for an image classification task. The first image of class {class_name} is real and serves as a reference. 
For each of the remaining 10 augmented images, evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:

1. Class Representation: How well does the image maintain the core semantic content, key features, and distinguishability of the original class? (0 = completely irrelevant or indistinguishable, 1 = perfectly relevant, feature-complete, and highly distinguishable)
2. Visual Quality: How well does the augmented image maintain a clear, well-formed, and natural appearance while preserving the expected structure and proportions for the given class? (0 = poor quality, unrealistic, or severely distorted structure, 1 = excellent quality, realistic, and perfect structural integrity)
3. Diversity: How different is the image from the reference while still maintaining class identity? (0 = almost identical, 1 = maximum beneficial diversity)
4. Beneficial Variations: How effectively does the image introduce meaningful changes in pose, viewpoint, lighting, or color that could aid in generalization? (0 = no variation, 1 = optimal variation)

Provide your response as a list of JSON objects, one for each non-reference augmented image, in this format:

[
  {{
    "index": 1,
    "class_representation": 0.0,
    "visual_quality": 0.0,
    "diversity": 0.0,
    "beneficial_variations": 0.0,
    "explanation": "Brief explanation of your evaluation"
  }}
]
"""

In [None]:
prompt = f"""You are an expert computer vision model evaluating augmented images for an image classification task. The first image of class {{class_name}} is real and serves as a reference. For each of the remaining 10 augmented images, evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:

1. Class Representation: How well does the image maintain the core semantic content, key features, and distinguishability of the original class? (0 = completely irrelevant or indistinguishable, 1 = perfectly relevant, feature-complete, and highly distinguishable)

2. Visual Fidelity: Is the image clear, well-formed, free from artifacts, natural-looking, and in an appropriate context for the given class? (0 = poor quality, unrealistic, or out of context, 1 = excellent quality, realistic, and in perfect context)

3. Structural Integrity: How well does the image maintain the overall structure and proportions expected for the class? (0 = severely distorted structure, 1 = perfect structural integrity)

4. Diversity: How different is the image from the reference while still maintaining class identity? (0 = identical, 1 = maximum beneficial diversity)

5. Beneficial Variations: How effectively does the image introduce meaningful changes in pose, viewpoint, lighting, or color that could aid in generalization? (0 = no variation, 1 = optimal variation)

6. Edge Case Representation: Does this augmentation represent a valuable edge case or rare scenario that could improve the model's generalization? (0 = common scenario, 1 = highly valuable edge case)

7. Robustness Potential: How likely is this augmentation to improve the model's robustness to irrelevant transformations or noise? (0 = unlikely to improve robustness, 1 = highly likely to improve robustness)

Provide your response as a list of JSON objects, one for each non-reference augmented image, in this format:

[
  {{
    "index": 1,
    "class_representation": 0.0,
    "visual_fidelity": 0.0,
    "structural_integrity": 0.0,
    "diversity": 0.0,
    "beneficial_variations": 0.0,
    "edge_case_representation": 0.0,
    "robustness_potential": 0.0,
    "explanation": "Brief explanation of how changes affect classification and quality, including any unique aspects or improvement suggestions"
  }}
]

## prompt v3

## prompt v4

In [None]:
prompt = f"""
You are evaluating augmented images for an image classification task. The first image of class {class_name} is real and serves as the reference. 
For each of the remaining 10 augmented images, please evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:

Index: from 1 to 10 in order of the augmented images
1. Visual Fidelity: Is the image clear, free of artifacts, realistic, and visually consistent with what is expected for the class? (0 = poor quality, 1 = excellent quality)
2. Class Identity: How well does the image maintain the core features and distinguishability from other classes? (0 = completely irrelevant, 1 = perfectly relevant)
3. Diversity: How different is the image from the reference while still maintaining class identity? (0 = identical, 1 = maximum useful diversity)
4. Beneficial Variations: How well does the image provide useful variations in pose, viewpoint, lighting, or color that could aid in generalization while maintaining class identity? (0 = no variation, 1 = optimal variation)
5. Structural Consistency: How well does the image maintain appropriate context, structure, and proportions for the class? (0 = highly distorted, 1 = perfectly consistent)

Provide your response as a list of JSON objects, one for each non-reference augmented image, in this format:
[
{{
 "index": 1,
 "visual_fidelity": 0.0,
 "class_identity": 0.0,
 "diversity": 0.0,
 "beneficial_variations": 0.0,
 "structural_consistency": 0.0,
 "explanation": "Brief explanation of how changes affect classification and quality based on the above criteria."
}}
]
"""


## Prompt V5

In [None]:
prompt = f"""
You are one of the world's best image quality experts. You're critical and detail-oriented. For every image you rate correctly, you get paid $100,000. You value your reputation and always make the most objective assessment possible
You are evaluating augmented images for an image classification task. The first image of class {class_name} is real and serves as the reference. 
For each of the remaining 10 augmented images, please evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:

Index: from 1 to 10 in order of the augmented images
1. Visual Fidelity: Is the image clear, free of artifacts, realistic, and visually consistent with what is expected for the class? (0 = poor quality, 1 = excellent quality)
2. Class Identity: How well does the image maintain the core features and distinguishability from other classes? (0 = completely irrelevant, 1 = perfectly relevant)
3. Diversity: How different is the image from the reference while still maintaining class identity? (0 = identical, 1 = maximum useful diversity)
4. Beneficial Variations: How well does the image provide useful variations in pose, viewpoint, lighting, or color while maintaining class identity? (0 = no variation, 1 = optimal variation)
5. Structural Consistency: How well does the image maintain appropriate context, structure, and proportions for the class? (0 = highly distorted, 1 = perfectly consistent)

Provide your response as a list of JSON objects, one for each non-reference augmented image, in this format:
[
{{
 "index": 1,
 "visual_fidelity": 0.0,
 "class_identity": 0.0,
 "diversity": 0.0,
 "beneficial_variations": 0.0,
 "structural_consistency": 0.0,
 "explanation": "Brief explanation of how changes affect classification and quality."
}}
]
"""

## Prompt V6

In [None]:
prompt = f"""
You are evaluating augmented images for an image classification task. The first image of class {{class_name}} is real and serves as the reference. 
For each of the remaining 10 augmented images, please evaluate them based on the following criteria, providing a score between 0.0 and 1.0 for each:
Index: from 1 to 10 in order of the augmented images
1. Visual and Structural Quality: Is the image clear, free of artifacts, realistic, and does it maintain appropriate context, structure, and proportions for the class? (0 = poor quality, highly distorted, 1 = excellent quality, perfectly consistent)
2. Class Identity: How well does the image maintain the core features and distinguishability from other classes? (0 = completely irrelevant, 1 = perfectly relevant)
3. Diversity: How different is the image from the reference while still maintaining class identity? (0 = identical, 1 = maximum useful diversity)
4. Beneficial Variations: How well does the image provide useful variations in pose, viewpoint, lighting, or color that could aid in generalization while maintaining class identity? (0 = no variation, 1 = optimal variation)
Provide your response as a list of JSON objects, one for each non-reference augmented image, in this format:
[
{{
 "index": 1,
 "visual_and_structural_quality": 0.0,
 "class_identity": 0.0,
 "diversity": 0.0,
 "beneficial_variations": 0.0,
 "explanation": "Brief explanation of how changes affect classification and quality based on the above criteria."
}}
]
"""

### Current transform

In [None]:
# if is_train:
#     return transforms.Compose([
#         transforms.Resize(self.image_size),
#         transforms.ToTensor(),
#         transforms.ConvertImageDtype(torch.float),
#         transforms.Lambda(lambda x: x.expand(3, *self.image_size)),
#         transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
#     ])

### Results for pascal torch dataset v1 and prompt v1 

In [111]:
import os
import pandas as pd
import glob

def count_images_in_folder(folder_path):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif']
    return sum(len([f for f in files if os.path.splitext(f)[1].lower() in image_extensions])
               for _, _, files in os.walk(folder_path))

def process_csv(file_path, target_epc, filtered_dir):
    df = pd.read_csv(file_path)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation') & (df['examples_per_class'] == target_epc)]
    if not filtered_df.empty:
        best_result = filtered_df['value'].max()
        filename = os.path.basename(file_path)
        strategy_folder = os.path.join(filtered_dir, f"filtered-pascal-0-{target_epc}", filename.replace('.csv', ''))
        image_count = count_images_in_folder(strategy_folder) if os.path.exists(strategy_folder) else 0
        result = pd.DataFrame({
            'filename': [filename],
            'examples_per_class': [target_epc],
            'value': [best_result],
            'image_count': [image_count]
        })
        return result
    return pd.DataFrame()

def analyze_pascal_results(examples_per_class_list):
    results = {}
    outperforming_strategies = {}

    for epc in examples_per_class_list:
        print(f"\n--- Analysis for pascal-0-{epc} ---")
        
        csv_files = glob.glob(f'my-results-v1/*-0-*.csv')
        filtered_dir = 'my-results-v1'
        
        if not csv_files:
            print(f"No files found for pascal-0-{epc}")
            continue
        
        print(f"Found {len(csv_files)} files for processing")
        
        all_results = pd.concat([process_csv(file, epc, filtered_dir) for file in csv_files], ignore_index=True)
        all_results = all_results.dropna()
        
        if all_results.empty:
            print(f"No valid results found for pascal-0-{epc}")
            continue
        
        all_results_sorted = all_results.sort_values('value', ascending=False)
        
        print("All results:")
        display(all_results_sorted)
        
        baseline_files = all_results_sorted[all_results_sorted['filename'].str.contains(f'pascal-baseline-0-', case=False)]
        
        if baseline_files.empty:
            print(f"Warning: No baseline file found for pascal-0-{epc}")
            print("Available filenames:")
            print(all_results_sorted['filename'].tolist())
            continue
        
        baseline_score = baseline_files['value'].values[0]
        
        outperforming = all_results_sorted[all_results_sorted['value'] > baseline_score]
        
        print(f"\nStrategies outperforming baseline (score: {baseline_score:.6f}):")
        for _, row in outperforming.iterrows():
            print(f"{row['filename']}: {row['value']:.6f} (Images: {row['image_count']})")
        
        results[epc] = all_results_sorted
        outperforming_strategies[epc] = outperforming
        
        best_strategy = all_results_sorted.iloc[0]
        print(f"\nBest strategy for pascal-0-{epc}:")
        print(f"Filename: {best_strategy['filename']}")
        print(f"Examples per class: {best_strategy['examples_per_class']}")
        print(f"Best accuracy: {best_strategy['value']:.6f}")
        print(f"Image count: {best_strategy['image_count']}")

    return results, outperforming_strategies

# List of examples_per_class to analyze
examples_per_class_list = [1, 2, 4, 8, 16]

# Run the analysis
results, outperforming_strategies = analyze_pascal_results(examples_per_class_list)

# You can now access the results and outperforming strategies for each examples_per_class
# For example, to get the outperforming strategies for pascal-0-4:
# outperforming_strategies[4]


--- Analysis for pascal-0-1 ---
Found 201 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
0,pascal-0-1-top_n_overall-0_90.csv,1,0.673252,180
15,pascal-0-1-percentile_overall-0_10.csv,1,0.669661,172
26,pascal-0-1-percentile_overall_creative-0_10.csv,1,0.666673,178
5,pascal-0-1-top_n_overall_creative-0_90.csv,1,0.666307,180
16,pascal-0-1-percentile_class_creative-0_10.csv,1,0.665122,155
4,pascal-0-1-top_n_overall_creative-0_80.csv,1,0.659229,160
8,pascal-0-1-percentile_overall_creative-0_20.csv,1,0.659229,160
9,pascal-0-1-top_n_overall-0_80.csv,1,0.659229,160
10,pascal-0-1-top_n_class-0_90.csv,1,0.657856,180
18,pascal-0-1-percentile_top_n_overall_creative-0...,1,0.657831,131



Strategies outperforming baseline (score: 0.649085):
pascal-0-1-top_n_overall-0_90.csv: 0.673252 (Images: 180)
pascal-0-1-percentile_overall-0_10.csv: 0.669661 (Images: 172)
pascal-0-1-percentile_overall_creative-0_10.csv: 0.666673 (Images: 178)
pascal-0-1-top_n_overall_creative-0_90.csv: 0.666307 (Images: 180)
pascal-0-1-percentile_class_creative-0_10.csv: 0.665122 (Images: 155)
pascal-0-1-top_n_overall_creative-0_80.csv: 0.659229 (Images: 160)
pascal-0-1-percentile_overall_creative-0_20.csv: 0.659229 (Images: 160)
pascal-0-1-top_n_overall-0_80.csv: 0.659229 (Images: 160)
pascal-0-1-top_n_class-0_90.csv: 0.657856 (Images: 180)
pascal-0-1-percentile_top_n_overall_creative-0_1-0_9.csv: 0.657831 (Images: 131)
pascal-0-1-percentile_columns_overall-0_10.csv: 0.656610 (Images: 146)
pascal-0-1-zscore_overall-2.csv: 0.656369 (Images: 181)
pascal-0-1-top_n_class_creative-0_90.csv: 0.655562 (Images: 180)
pascal-0-1-percentile_class-0_10.csv: 0.654953 (Images: 153)
pascal-0-1-zscore_top_n_class

Unnamed: 0,filename,examples_per_class,value,image_count
17,pascal-0-2-percentile_top_n_overall-0_1-0_9.csv,2,0.702935,242
27,pascal-0-2-zscore_top_n_overall-2-0_8.csv,2,0.698364,288
37,pascal-0-2-zscore_top_n_overall_creative-2-0_9...,2,0.698006,324
16,pascal-0-2-top_n_overall_creative-0_80.csv,2,0.696525,320
29,pascal-0-2-zscore_top_n_overall-2-0_9.csv,2,0.696199,324
35,pascal-0-2-percentile_overall_creative-0_20.csv,2,0.695527,317
12,pascal-0-2-top_n_class_creative-0_90.csv,2,0.694716,360
15,pascal-0-2-percentile_overall-0_10.csv,2,0.694516,349
36,pascal-0-2-top_n_overall-0_70.csv,2,0.694226,280
20,pascal-0-2-percentile_overall-0_20.csv,2,0.693998,315



Strategies outperforming baseline (score: 0.691173):
pascal-0-2-percentile_top_n_overall-0_1-0_9.csv: 0.702935 (Images: 242)
pascal-0-2-zscore_top_n_overall-2-0_8.csv: 0.698364 (Images: 288)
pascal-0-2-zscore_top_n_overall_creative-2-0_9.csv: 0.698006 (Images: 324)
pascal-0-2-top_n_overall_creative-0_80.csv: 0.696525 (Images: 320)
pascal-0-2-zscore_top_n_overall-2-0_9.csv: 0.696199 (Images: 324)
pascal-0-2-percentile_overall_creative-0_20.csv: 0.695527 (Images: 317)
pascal-0-2-top_n_class_creative-0_90.csv: 0.694716 (Images: 360)
pascal-0-2-percentile_overall-0_10.csv: 0.694516 (Images: 349)
pascal-0-2-top_n_overall-0_70.csv: 0.694226 (Images: 280)
pascal-0-2-percentile_overall-0_20.csv: 0.693998 (Images: 315)
pascal-0-2-percentile_top_n_overall-0_1-0_8.csv: 0.693900 (Images: 215)
pascal-0-2-percentile_top_n_class-0_1-0_9.csv: 0.692337 (Images: 234)
pascal-0-2-top_n_class-0_80.csv: 0.691904 (Images: 320)
pascal-0-2-top_n_overall_creative-0_90.csv: 0.691191 (Images: 360)

Best strategy

Unnamed: 0,filename,examples_per_class,value,image_count
9,pascal-0-4-percentile_top_n_overall_creative-0...,4,0.761362,457
15,pascal-0-4-zscore_top_n_class_creative-2-0_7.csv,4,0.760445,479
4,pascal-0-4-zscore_top_n_class-2-0_7.csv,4,0.756855,479
38,pascal-0-4-top_n_overall_creative-0_60.csv,4,0.756004,511
45,pascal-0-4-zscore_top_n_class-2-0_8.csv,4,0.755074,551
39,pascal-0-4-percentile_columns_overall-0_10.csv,4,0.753974,559
46,pascal-0-4-top_n_overall_creative-0_70.csv,4,0.753852,569
40,pascal-0-4-zscore_top_n_class_creative-1_5-0_8...,4,0.753506,426
20,pascal-0-4-percentile_top_n_class_creative-0_2...,4,0.753376,350
14,pascal-0-4-zscore_top_n_class-1_5-0_8.csv,4,0.753367,426



Strategies outperforming baseline (score: 0.740899):
pascal-0-4-percentile_top_n_overall_creative-0_1-0_8.csv: 0.761362 (Images: 457)
pascal-0-4-zscore_top_n_class_creative-2-0_7.csv: 0.760445 (Images: 479)
pascal-0-4-zscore_top_n_class-2-0_7.csv: 0.756855 (Images: 479)
pascal-0-4-top_n_overall_creative-0_60.csv: 0.756004 (Images: 511)
pascal-0-4-zscore_top_n_class-2-0_8.csv: 0.755074 (Images: 551)
pascal-0-4-percentile_columns_overall-0_10.csv: 0.753974 (Images: 559)
pascal-0-4-top_n_overall_creative-0_70.csv: 0.753852 (Images: 569)
pascal-0-4-zscore_top_n_class_creative-1_5-0_8.csv: 0.753506 (Images: 426)
pascal-0-4-percentile_top_n_class_creative-0_2-0_9.csv: 0.753376 (Images: 350)
pascal-0-4-zscore_top_n_class-1_5-0_8.csv: 0.753367 (Images: 426)
pascal-0-4-top_n_class-0_60.csv: 0.753333 (Images: 519)
pascal-0-4-top_n_overall_creative-0_80.csv: 0.753296 (Images: 650)
pascal-0-4-percentile_top_n_class-0_1-0_9.csv: 0.752990 (Images: 493)
pascal-0-4-percentile_top_n_class-0_1-0_8.csv:

Unnamed: 0,filename,examples_per_class,value,image_count
36,pascal-0-8-zscore_overall-1_50.csv,8,0.790599,1085
1,pascal-0-8-percentile_top_n_overall-0_1-0_9.csv,8,0.789569,976
50,pascal-0-8-zscore_top_n_class_creative-1_5-0_8...,8,0.789394,861
26,pascal-0-8-zscore_top_n_overall_creative-2-0_9...,8,0.788774,1268
20,pascal-0-8-zscore_top_n_overall_creative-1_5-0...,8,0.788745,759
31,pascal-0-8-zscore_top_n_overall-2-0_9.csv,8,0.7885,1268
4,pascal-0-8-percentile_columns_overall-0_20.csv,8,0.788471,793
37,pascal-0-8-zscore_top_n_overall_creative-1_5-0...,8,0.788215,976
5,pascal-0-8-percentile_top_n_overall_creative-0...,8,0.787812,976
18,pascal-0-8-percentile_top_n_overall-0_2-0_8.csv,8,0.787801,634



Strategies outperforming baseline (score: 0.780030):
pascal-0-8-zscore_overall-1_50.csv: 0.790599 (Images: 1085)
pascal-0-8-percentile_top_n_overall-0_1-0_9.csv: 0.789569 (Images: 976)
pascal-0-8-zscore_top_n_class_creative-1_5-0_8.csv: 0.789394 (Images: 861)
pascal-0-8-zscore_top_n_overall_creative-2-0_9.csv: 0.788774 (Images: 1268)
pascal-0-8-zscore_top_n_overall_creative-1_5-0_7.csv: 0.788745 (Images: 759)
pascal-0-8-zscore_top_n_overall-2-0_9.csv: 0.788500 (Images: 1268)
pascal-0-8-percentile_columns_overall-0_20.csv: 0.788471 (Images: 793)
pascal-0-8-zscore_top_n_overall_creative-1_5-0_9.csv: 0.788215 (Images: 976)
pascal-0-8-percentile_top_n_overall_creative-0_1-0_9.csv: 0.787812 (Images: 976)
pascal-0-8-percentile_top_n_overall-0_2-0_8.csv: 0.787801 (Images: 634)
pascal-0-8-top_n_overall-0_40.csv: 0.787138 (Images: 640)
pascal-0-8-percentile_top_n_class_creative-0_1-0_9.csv: 0.787060 (Images: 969)
pascal-0-8-percentile_top_n_class-0_1-0_9.csv: 0.786990 (Images: 969)
pascal-0-8-

Unnamed: 0,filename,examples_per_class,value,image_count
6,pascal-0-16-percentile_columns_overall-0_10.csv,16,0.830495,2134
21,pascal-0-16-percentile_overall-0_20.csv,16,0.829804,0
5,pascal-0-16-percentile_top_n_class-0_1-0_9.csv,16,0.829387,1912
36,pascal-0-16-zscore_top_n_overall_creative-2-0_...,16,0.82899,0
30,pascal-0-16-percentile_columns_overall-0_20.csv,16,0.828514,0
34,pascal-0-16-percentile_top_n_overall_creative-...,16,0.827913,0
26,pascal-0-16-percentile_overall-0_30.csv,16,0.82746,0
1,pascal-0-16-percentile_overall_creative-0_20.csv,16,0.827191,0
14,pascal-0-16-zscore_top_n_overall-1_5-0_8.csv,16,0.826826,0
37,pascal-0-16-percentile_top_n_overall_creative-...,16,0.826778,1920



Strategies outperforming baseline (score: 0.815071):
pascal-0-16-percentile_columns_overall-0_10.csv: 0.830495 (Images: 2134)
pascal-0-16-percentile_overall-0_20.csv: 0.829804 (Images: 0)
pascal-0-16-percentile_top_n_class-0_1-0_9.csv: 0.829387 (Images: 1912)
pascal-0-16-zscore_top_n_overall_creative-2-0_8.csv: 0.828990 (Images: 0)
pascal-0-16-percentile_columns_overall-0_20.csv: 0.828514 (Images: 0)
pascal-0-16-percentile_top_n_overall_creative-0_1-0_8.csv: 0.827913 (Images: 0)
pascal-0-16-percentile_overall-0_30.csv: 0.827460 (Images: 0)
pascal-0-16-percentile_overall_creative-0_20.csv: 0.827191 (Images: 0)
pascal-0-16-zscore_top_n_overall-1_5-0_8.csv: 0.826826 (Images: 0)
pascal-0-16-percentile_top_n_overall_creative-0_1-0_9.csv: 0.826778 (Images: 1920)
pascal-0-16-zscore_top_n_class_creative-1_5-0_8.csv: 0.826505 (Images: 0)
pascal-0-16-top_n_overall-0_70.csv: 0.826333 (Images: 0)
pascal-0-16-percentile_top_n_overall-0_1-0_9.csv: 0.826261 (Images: 1920)
pascal-0-16-percentile_colu

## Correct version with baseline aug used

In [126]:
import os
import pandas as pd
import glob

def count_images_in_folder(folder_path):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif']
    return sum(len([f for f in files if os.path.splitext(f)[1].lower() in image_extensions])
               for _, _, files in os.walk(folder_path))

def process_csv(file_path, target_epc, filtered_dir):
    df = pd.read_csv(file_path)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation') & (df['examples_per_class'] == target_epc)]
    if not filtered_df.empty:
        best_result = filtered_df['value'].max()
        filename = os.path.basename(file_path)
        strategy_folder = os.path.join(filtered_dir, f"filtered-pascal-0-{target_epc}", filename.replace('.csv', ''))
        image_count = count_images_in_folder(strategy_folder) if os.path.exists(strategy_folder) else 0
        result = pd.DataFrame({
            'filename': [filename],
            'examples_per_class': [target_epc],
            'value': [best_result],
            'image_count': [image_count]
        })
        return result
    return pd.DataFrame()

def get_baseline_score(baseline_file):
    df = pd.read_csv(baseline_file)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation')]
    if not filtered_df.empty:
        return filtered_df['value'].max()
    return None

def analyze_pascal_results(examples_per_class_list):
    results = {}
    outperforming_strategies = {}
    best_scores = {}
    baseline_scores = {}

    for epc in examples_per_class_list:
        print(f"\n--- Analysis for pascal-0-{epc} ---")
        
        csv_files = glob.glob(f'my-results-aug-v1/*-0-*.csv')
        filtered_dir = 'my-results-aug-v1'
        
        if not csv_files:
            print(f"No files found for pascal-0-{epc}")
            continue
        
        print(f"Found {len(csv_files)} files for processing")
        
        all_results = pd.concat([process_csv(file, epc, filtered_dir) for file in csv_files], ignore_index=True)
        all_results = all_results.dropna()
        
        if all_results.empty:
            print(f"No valid results found for pascal-0-{epc}")
            continue
        
        all_results_sorted = all_results.sort_values('value', ascending=False)
        
        print("All results:")
        display(all_results_sorted)
        
        baseline_file = f'my-results-aug-v1/pascal-baseline-0-{epc}.csv'
        if os.path.exists(baseline_file):
            baseline_score = get_baseline_score(baseline_file)
            if baseline_score is not None:
                baseline_scores[epc] = baseline_score
                print(f"Baseline score for pascal-0-{epc}: {baseline_score:.6f}")
            else:
                print(f"Warning: Could not extract baseline score from {baseline_file}")
        else:
            print(f"Warning: Baseline file not found for pascal-0-{epc}")
        
        outperforming = all_results_sorted[all_results_sorted['value'] > baseline_scores.get(epc, 0)]
        
        print(f"\nStrategies outperforming baseline:")
        for _, row in outperforming.iterrows():
            print(f"{row['filename']}: {row['value']:.6f} (Images: {row['image_count']})")
        
        results[epc] = all_results_sorted
        outperforming_strategies[epc] = outperforming
        
        best_strategy = all_results_sorted.iloc[0]
        best_scores[epc] = best_strategy['value']
        
        print(f"\nBest strategy for pascal-0-{epc}:")
        print(f"Filename: {best_strategy['filename']}")
        print(f"Examples per class: {best_strategy['examples_per_class']}")
        print(f"Best accuracy: {best_strategy['value']:.6f}")
        print(f"Image count: {best_strategy['image_count']}")

    return results, outperforming_strategies, best_scores, baseline_scores

# List of examples_per_class to analyze
examples_per_class_list = [1, 2, 4, 8, 16]

# Run the analysis
results, outperforming_strategies, best_scores, baseline_scores = analyze_pascal_results(examples_per_class_list)

# Print improvements over researcher's baseline
print("\nImprovements over researcher's baseline:")
for epc in examples_per_class_list:
    if epc in best_scores and epc in baseline_scores:
        improvement = (best_scores[epc] - baseline_scores[epc]) * 100
        print(f"Improved accuracy over researchers for pascal-0-{epc}: {improvement:.2f}%")
    else:
        print(f"Could not calculate improvement for pascal-0-{epc} due to missing data")


--- Analysis for pascal-0-1 ---
Found 31 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
0,pascal-0-1-top_n_overall-0_90.csv,1,0.678446,180
3,pascal-0-1-top_n_overall_creative-0_90.csv,1,0.677568,180
9,pascal-0-1-percentile_overall_creative-0_10.csv,1,0.676713,178
6,pascal-0-1-percentile_overall-0_10.csv,1,0.676578,172
7,pascal-0-1-percentile_class_creative-0_10.csv,1,0.667789,155
2,pascal-baseline-0-1.csv,1,0.662918,0
1,pascal-baseline-0-2.csv,1,0.660436,0
4,pascal-baseline-0-4.csv,1,0.660436,0
5,pascal-baseline-0-16.csv,1,0.660436,0
8,pascal-baseline-0-8.csv,1,0.660436,0


Baseline score for pascal-0-1: 0.662918

Strategies outperforming baseline:
pascal-0-1-top_n_overall-0_90.csv: 0.678446 (Images: 180)
pascal-0-1-top_n_overall_creative-0_90.csv: 0.677568 (Images: 180)
pascal-0-1-percentile_overall_creative-0_10.csv: 0.676713 (Images: 178)
pascal-0-1-percentile_overall-0_10.csv: 0.676578 (Images: 172)
pascal-0-1-percentile_class_creative-0_10.csv: 0.667789 (Images: 155)

Best strategy for pascal-0-1:
Filename: pascal-0-1-top_n_overall-0_90.csv
Examples per class: 1
Best accuracy: 0.678446
Image count: 180

--- Analysis for pascal-0-2 ---
Found 31 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
6,pascal-0-2-zscore_top_n_overall-2-0_9.csv,2,0.726129,324
3,pascal-0-2-top_n_overall_creative-0_80.csv,2,0.722493,320
5,pascal-0-2-zscore_top_n_overall-2-0_8.csv,2,0.721345,288
9,pascal-0-2-zscore_top_n_overall_creative-2-0_9...,2,0.721313,324
4,pascal-0-2-percentile_top_n_overall-0_1-0_9.csv,2,0.719868,242
0,pascal-baseline-0-2.csv,2,0.71001,0
1,pascal-baseline-0-4.csv,2,0.71001,0
2,pascal-baseline-0-16.csv,2,0.71001,0
7,pascal-baseline-0-8.csv,2,0.71001,0
8,pascal-baseline-0-1-v2.csv,2,0.71001,0


Baseline score for pascal-0-2: 0.710010

Strategies outperforming baseline:
pascal-0-2-zscore_top_n_overall-2-0_9.csv: 0.726129 (Images: 324)
pascal-0-2-top_n_overall_creative-0_80.csv: 0.722493 (Images: 320)
pascal-0-2-zscore_top_n_overall-2-0_8.csv: 0.721345 (Images: 288)
pascal-0-2-zscore_top_n_overall_creative-2-0_9.csv: 0.721313 (Images: 324)
pascal-0-2-percentile_top_n_overall-0_1-0_9.csv: 0.719868 (Images: 242)

Best strategy for pascal-0-2:
Filename: pascal-0-2-zscore_top_n_overall-2-0_9.csv
Examples per class: 2
Best accuracy: 0.726129
Image count: 324

--- Analysis for pascal-0-4 ---
Found 31 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
4,pascal-0-4-zscore_top_n_class_creative-2-0_7.csv,4,0.768936,479
1,pascal-0-4-percentile_top_n_overall_creative-0...,4,0.766805,447
5,pascal-0-4-top_n_overall_creative-0_60.csv,4,0.766349,480
7,pascal-0-4-zscore_top_n_class-2-0_8.csv,4,0.765908,551
0,pascal-0-4-zscore_top_n_class-2-0_7.csv,4,0.762307,479
2,pascal-baseline-0-4.csv,4,0.756322,0
3,pascal-baseline-0-16.csv,4,0.756322,0
6,pascal-baseline-0-8.csv,4,0.756322,0
8,pascal-baseline-0-1-v2.csv,4,0.756322,0


Baseline score for pascal-0-4: 0.756322

Strategies outperforming baseline:
pascal-0-4-zscore_top_n_class_creative-2-0_7.csv: 0.768936 (Images: 479)
pascal-0-4-percentile_top_n_overall_creative-0_1-0_8.csv: 0.766805 (Images: 447)
pascal-0-4-top_n_overall_creative-0_60.csv: 0.766349 (Images: 480)
pascal-0-4-zscore_top_n_class-2-0_8.csv: 0.765908 (Images: 551)
pascal-0-4-zscore_top_n_class-2-0_7.csv: 0.762307 (Images: 479)

Best strategy for pascal-0-4:
Filename: pascal-0-4-zscore_top_n_class_creative-2-0_7.csv
Examples per class: 4
Best accuracy: 0.768936
Image count: 479

--- Analysis for pascal-0-8 ---
Found 31 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
2,pascal-0-8-zscore_top_n_overall_creative-1_5-0...,8,0.785979,759
0,pascal-0-8-percentile_top_n_overall-0_1-0_9.csv,8,0.785925,976
6,pascal-0-8-zscore_top_n_class_creative-1_5-0_8...,8,0.781897,861
4,pascal-0-8-zscore_overall-1_50.csv,8,0.781497,1085
3,pascal-0-8-zscore_top_n_overall_creative-2-0_9...,8,0.781095,1268
1,pascal-baseline-0-16.csv,8,0.779054,0
5,pascal-baseline-0-8.csv,8,0.779054,0
7,pascal-baseline-0-1-v2.csv,8,0.779054,0


Baseline score for pascal-0-8: 0.779054

Strategies outperforming baseline:
pascal-0-8-zscore_top_n_overall_creative-1_5-0_7.csv: 0.785979 (Images: 759)
pascal-0-8-percentile_top_n_overall-0_1-0_9.csv: 0.785925 (Images: 976)
pascal-0-8-zscore_top_n_class_creative-1_5-0_8.csv: 0.781897 (Images: 861)
pascal-0-8-zscore_overall-1_50.csv: 0.781497 (Images: 1085)
pascal-0-8-zscore_top_n_overall_creative-2-0_9.csv: 0.781095 (Images: 1268)

Best strategy for pascal-0-8:
Filename: pascal-0-8-zscore_top_n_overall_creative-1_5-0_7.csv
Examples per class: 8
Best accuracy: 0.785979
Image count: 759

--- Analysis for pascal-0-16 ---
Found 31 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
1,pascal-0-16-percentile_top_n_class-0_1-0_9.csv,16,0.827378,1912
2,pascal-0-16-percentile_columns_overall-0_10.csv,16,0.822335,2134
3,pascal-0-16-percentile_overall-0_20.csv,16,0.821378,2511
5,pascal-0-16-zscore_top_n_overall_creative-2-0_...,16,0.819986,2296
4,pascal-0-16-percentile_columns_overall-0_20.csv,16,0.815259,1500
0,pascal-baseline-0-16.csv,16,0.814962,0
6,pascal-baseline-0-1-v2.csv,16,0.814962,0


Baseline score for pascal-0-16: 0.814962

Strategies outperforming baseline:
pascal-0-16-percentile_top_n_class-0_1-0_9.csv: 0.827378 (Images: 1912)
pascal-0-16-percentile_columns_overall-0_10.csv: 0.822335 (Images: 2134)
pascal-0-16-percentile_overall-0_20.csv: 0.821378 (Images: 2511)
pascal-0-16-zscore_top_n_overall_creative-2-0_8.csv: 0.819986 (Images: 2296)
pascal-0-16-percentile_columns_overall-0_20.csv: 0.815259 (Images: 1500)

Best strategy for pascal-0-16:
Filename: pascal-0-16-percentile_top_n_class-0_1-0_9.csv
Examples per class: 16
Best accuracy: 0.827378
Image count: 1912

Improvements over researcher's baseline:
Improved accuracy over researchers for pascal-0-1: 1.55%
Improved accuracy over researchers for pascal-0-2: 1.61%
Improved accuracy over researchers for pascal-0-4: 1.26%
Improved accuracy over researchers for pascal-0-8: 0.69%
Improved accuracy over researchers for pascal-0-16: 1.24%


## New modification results compared to researchers with prompt v1 and dataset v1

In [122]:
import os
import pandas as pd
import glob

def count_images_in_folder(folder_path):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif']
    return sum(len([f for f in files if os.path.splitext(f)[1].lower() in image_extensions])
               for _, _, files in os.walk(folder_path))

def process_csv(file_path, target_epc, filtered_dir):
    df = pd.read_csv(file_path)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation') & (df['examples_per_class'] == target_epc)]
    if not filtered_df.empty:
        best_result = filtered_df['value'].max()
        filename = os.path.basename(file_path)
        strategy_folder = os.path.join(filtered_dir, f"filtered-pascal-0-{target_epc}", filename.replace('.csv', ''))
        image_count = count_images_in_folder(strategy_folder) if os.path.exists(strategy_folder) else 0
        result = pd.DataFrame({
            'filename': [filename],
            'examples_per_class': [target_epc],
            'value': [best_result],
            'image_count': [image_count]
        })
        return result
    return pd.DataFrame()

def get_baseline_score(baseline_file):
    df = pd.read_csv(baseline_file)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation')]
    if not filtered_df.empty:
        return filtered_df['value'].max()
    return None

def analyze_pascal_results(examples_per_class_list):
    results = {}
    outperforming_strategies = {}
    best_scores = {}
    baseline_scores = {}

    for epc in examples_per_class_list:
        print(f"\n--- Analysis for pascal-0-{epc} ---")
        
        csv_files = glob.glob(f'my-results-randaug-v1/*-0-*.csv')
        filtered_dir = 'my-rresults-randaug-v1'
        
        if not csv_files:
            print(f"No files found for pascal-0-{epc}")
            continue
        
        print(f"Found {len(csv_files)} files for processing")
        
        all_results = pd.concat([process_csv(file, epc, filtered_dir) for file in csv_files], ignore_index=True)
        all_results = all_results.dropna()
        
        if all_results.empty:
            print(f"No valid results found for pascal-0-{epc}")
            continue
        
        all_results_sorted = all_results.sort_values('value', ascending=False)
        
        print("All results:")
        display(all_results_sorted)
        
        baseline_file = f'my-results-randaug-v1/pascal-baseline-0-{epc}.csv'
        if os.path.exists(baseline_file):
            baseline_score = get_baseline_score(baseline_file)
            if baseline_score is not None:
                baseline_scores[epc] = baseline_score
                print(f"Baseline score for pascal-0-{epc}: {baseline_score:.6f}")
            else:
                print(f"Warning: Could not extract baseline score from {baseline_file}")
        else:
            print(f"Warning: Baseline file not found for pascal-0-{epc}")
        
        outperforming = all_results_sorted[all_results_sorted['value'] > baseline_scores.get(epc, 0)]
        
        print(f"\nStrategies outperforming baseline:")
        for _, row in outperforming.iterrows():
            print(f"{row['filename']}: {row['value']:.6f} (Images: {row['image_count']})")
        
        results[epc] = all_results_sorted
        outperforming_strategies[epc] = outperforming
        
        best_strategy = all_results_sorted.iloc[0]
        best_scores[epc] = best_strategy['value']
        
        print(f"\nBest strategy for pascal-0-{epc}:")
        print(f"Filename: {best_strategy['filename']}")
        print(f"Examples per class: {best_strategy['examples_per_class']}")
        print(f"Best accuracy: {best_strategy['value']:.6f}")
        print(f"Image count: {best_strategy['image_count']}")

    return results, outperforming_strategies, best_scores, baseline_scores

# List of examples_per_class to analyze
examples_per_class_list = [1, 2, 4, 8, 16]

# Run the analysis
results, outperforming_strategies, best_scores, baseline_scores = analyze_pascal_results(examples_per_class_list)

# Print improvements over researcher's baseline
print("\nImprovements over researcher's baseline:")
for epc in examples_per_class_list:
    if epc in best_scores and epc in baseline_scores:
        improvement = (best_scores[epc] - baseline_scores[epc]) * 100
        print(f"Improved accuracy over researchers for pascal-0-{epc}: {improvement:.2f}%")
    else:
        print(f"Could not calculate improvement for pascal-0-{epc} due to missing data")


--- Analysis for pascal-0-1 ---
Found 43 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
0,pascal-0-1-top_n_overall-0_90.csv,1,0.684333,0
9,pascal-0-1-percentile_overall_creative-0_10.csv,1,0.684075,0
7,pascal-0-1-percentile_class_creative-0_10.csv,1,0.681221,0
3,pascal-0-1-top_n_overall_creative-0_90.csv,1,0.68098,0
6,pascal-0-1-percentile_overall-0_10.csv,1,0.672397,0
2,pascal-baseline-0-1.csv,1,0.662918,0
1,pascal-baseline-0-2.csv,1,0.660436,0
4,pascal-baseline-0-4.csv,1,0.660436,0
5,pascal-baseline-0-16.csv,1,0.660436,0
8,pascal-baseline-0-8.csv,1,0.660436,0


Baseline score for pascal-0-1: 0.662918

Strategies outperforming baseline:
pascal-0-1-top_n_overall-0_90.csv: 0.684333 (Images: 0)
pascal-0-1-percentile_overall_creative-0_10.csv: 0.684075 (Images: 0)
pascal-0-1-percentile_class_creative-0_10.csv: 0.681221 (Images: 0)
pascal-0-1-top_n_overall_creative-0_90.csv: 0.680980 (Images: 0)
pascal-0-1-percentile_overall-0_10.csv: 0.672397 (Images: 0)

Best strategy for pascal-0-1:
Filename: pascal-0-1-top_n_overall-0_90.csv
Examples per class: 1
Best accuracy: 0.684333
Image count: 0

--- Analysis for pascal-0-2 ---
Found 43 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
3,pascal-0-2-top_n_overall_creative-0_80.csv,2,0.738917,0
4,pascal-0-2-percentile_top_n_overall-0_1-0_9.csv,2,0.738439,0
6,pascal-0-2-zscore_top_n_overall-2-0_9.csv,2,0.738155,0
8,pascal-0-2-zscore_top_n_overall_creative-2-0_9...,2,0.737935,0
5,pascal-0-2-zscore_top_n_overall-2-0_8.csv,2,0.737561,0
0,pascal-baseline-0-2.csv,2,0.71001,0
1,pascal-baseline-0-4.csv,2,0.71001,0
2,pascal-baseline-0-16.csv,2,0.71001,0
7,pascal-baseline-0-8.csv,2,0.71001,0


Baseline score for pascal-0-2: 0.710010

Strategies outperforming baseline:
pascal-0-2-top_n_overall_creative-0_80.csv: 0.738917 (Images: 0)
pascal-0-2-percentile_top_n_overall-0_1-0_9.csv: 0.738439 (Images: 0)
pascal-0-2-zscore_top_n_overall-2-0_9.csv: 0.738155 (Images: 0)
pascal-0-2-zscore_top_n_overall_creative-2-0_9.csv: 0.737935 (Images: 0)
pascal-0-2-zscore_top_n_overall-2-0_8.csv: 0.737561 (Images: 0)

Best strategy for pascal-0-2:
Filename: pascal-0-2-top_n_overall_creative-0_80.csv
Examples per class: 2
Best accuracy: 0.738917
Image count: 0

--- Analysis for pascal-0-4 ---
Found 43 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
5,pascal-0-4-top_n_overall_creative-0_60.csv,4,0.778394,0
1,pascal-0-4-percentile_top_n_overall_creative-0...,4,0.777741,0
4,pascal-0-4-zscore_top_n_class_creative-2-0_7.csv,4,0.774643,0
0,pascal-0-4-zscore_top_n_class-2-0_7.csv,4,0.771194,0
7,pascal-0-4-zscore_top_n_class-2-0_8.csv,4,0.770639,0
2,pascal-baseline-0-4.csv,4,0.756322,0
3,pascal-baseline-0-16.csv,4,0.756322,0
6,pascal-baseline-0-8.csv,4,0.756322,0


Baseline score for pascal-0-4: 0.756322

Strategies outperforming baseline:
pascal-0-4-top_n_overall_creative-0_60.csv: 0.778394 (Images: 0)
pascal-0-4-percentile_top_n_overall_creative-0_1-0_8.csv: 0.777741 (Images: 0)
pascal-0-4-zscore_top_n_class_creative-2-0_7.csv: 0.774643 (Images: 0)
pascal-0-4-zscore_top_n_class-2-0_7.csv: 0.771194 (Images: 0)
pascal-0-4-zscore_top_n_class-2-0_8.csv: 0.770639 (Images: 0)

Best strategy for pascal-0-4:
Filename: pascal-0-4-top_n_overall_creative-0_60.csv
Examples per class: 4
Best accuracy: 0.778394
Image count: 0

--- Analysis for pascal-0-8 ---
Found 43 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
2,pascal-0-8-zscore_top_n_overall_creative-1_5-0...,8,0.799487,0
6,pascal-0-8-zscore_top_n_class_creative-1_5-0_8...,8,0.798677,0
3,pascal-0-8-zscore_top_n_overall_creative-2-0_9...,8,0.797337,0
0,pascal-0-8-percentile_top_n_overall-0_1-0_9.csv,8,0.796627,0
4,pascal-0-8-zscore_overall-1_50.csv,8,0.79618,0
1,pascal-baseline-0-16.csv,8,0.779054,0
5,pascal-baseline-0-8.csv,8,0.779054,0


Baseline score for pascal-0-8: 0.779054

Strategies outperforming baseline:
pascal-0-8-zscore_top_n_overall_creative-1_5-0_7.csv: 0.799487 (Images: 0)
pascal-0-8-zscore_top_n_class_creative-1_5-0_8.csv: 0.798677 (Images: 0)
pascal-0-8-zscore_top_n_overall_creative-2-0_9.csv: 0.797337 (Images: 0)
pascal-0-8-percentile_top_n_overall-0_1-0_9.csv: 0.796627 (Images: 0)
pascal-0-8-zscore_overall-1_50.csv: 0.796180 (Images: 0)

Best strategy for pascal-0-8:
Filename: pascal-0-8-zscore_top_n_overall_creative-1_5-0_7.csv
Examples per class: 8
Best accuracy: 0.799487
Image count: 0

--- Analysis for pascal-0-16 ---
Found 43 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
2,pascal-0-16-percentile_top_n_class-0_1-0_9.csv,16,0.846669,0
17,pascal-0-16-zscore_top_n_overall_creative-2-0_...,16,0.846168,0
8,pascal-0-16-percentile_overall-0_20.csv,16,0.842426,0
3,pascal-0-16-percentile_columns_overall-0_10.csv,16,0.841954,0
14,pascal-0-16-percentile_columns_overall-0_20.csv,16,0.837658,0
16,pascal-0-16-percentile_top_n_overall_creative-...,16,0.827913,0
12,pascal-0-16-percentile_overall-0_30.csv,16,0.82746,0
5,pascal-0-16-top_n_overall-0_70.csv,16,0.826333,0
9,pascal-0-16-percentile_top_n_overall-0_1-0_9.csv,16,0.826261,0
13,pascal-0-16-percentile_top_n_overall-0_2-0_9.csv,16,0.825353,0


Baseline score for pascal-0-16: 0.814962

Strategies outperforming baseline:
pascal-0-16-percentile_top_n_class-0_1-0_9.csv: 0.846669 (Images: 0)
pascal-0-16-zscore_top_n_overall_creative-2-0_8.csv: 0.846168 (Images: 0)
pascal-0-16-percentile_overall-0_20.csv: 0.842426 (Images: 0)
pascal-0-16-percentile_columns_overall-0_10.csv: 0.841954 (Images: 0)
pascal-0-16-percentile_columns_overall-0_20.csv: 0.837658 (Images: 0)
pascal-0-16-percentile_top_n_overall_creative-0_1-0_8.csv: 0.827913 (Images: 0)
pascal-0-16-percentile_overall-0_30.csv: 0.827460 (Images: 0)
pascal-0-16-top_n_overall-0_70.csv: 0.826333 (Images: 0)
pascal-0-16-percentile_top_n_overall-0_1-0_9.csv: 0.826261 (Images: 0)
pascal-0-16-percentile_top_n_overall-0_2-0_9.csv: 0.825353 (Images: 0)
pascal-0-16-percentile_top_n_overall-0_1-0_8.csv: 0.824945 (Images: 0)
pascal-0-16-zscore_top_n_overall_creative-2-0_7.csv: 0.824710 (Images: 0)
pascal-0-16-top_n_overall_creative-0_70.csv: 0.824618 (Images: 0)
pascal-0-16-percentile_top

## New modification results compared to researchers with prompt v1 and dataset v2

In [128]:
import os
import pandas as pd
import glob

def count_images_in_folder(folder_path):
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif']
    return sum(len([f for f in files if os.path.splitext(f)[1].lower() in image_extensions])
               for _, _, files in os.walk(folder_path))

def process_csv(file_path, target_epc, filtered_dir):
    df = pd.read_csv(file_path)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation') & (df['examples_per_class'] == target_epc)]
    if not filtered_df.empty:
        best_result = filtered_df['value'].max()
        filename = os.path.basename(file_path)
        strategy_folder = os.path.join(filtered_dir, f"filtered-pascal-0-{target_epc}", filename.replace('.csv', ''))
        image_count = count_images_in_folder(strategy_folder) if os.path.exists(strategy_folder) else 0
        result = pd.DataFrame({
            'filename': [filename],
            'examples_per_class': [target_epc],
            'value': [best_result],
            'image_count': [image_count]
        })
        return result
    return pd.DataFrame()

def get_baseline_score(baseline_file):
    df = pd.read_csv(baseline_file)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation')]
    if not filtered_df.empty:
        return filtered_df['value'].max()
    return None

def analyze_pascal_results(examples_per_class_list):
    results = {}
    outperforming_strategies = {}
    best_scores = {}
    baseline_scores = {}

    for epc in examples_per_class_list:
        print(f"\n--- Analysis for pascal-0-{epc} ---")
        
        csv_files = glob.glob(f'my-results-randaug-v1-dataset-v2/*-0-*.csv')
        filtered_dir = 'my-results-randaug-v1-dataset-v2'
        
        if not csv_files:
            print(f"No files found for pascal-0-{epc}")
            continue
        
        print(f"Found {len(csv_files)} files for processing")
        
        all_results = pd.concat([process_csv(file, epc, filtered_dir) for file in csv_files], ignore_index=True)
        all_results = all_results.dropna()
        
        if all_results.empty:
            print(f"No valid results found for pascal-0-{epc}")
            continue
        
        all_results_sorted = all_results.sort_values('value', ascending=False)
        
        print("All results:")
        display(all_results_sorted)
        
        baseline_file = f'my-results-randaug-v1-dataset-v2/pascal-baseline-0-{epc}.csv'
        if os.path.exists(baseline_file):
            baseline_score = get_baseline_score(baseline_file)
            if baseline_score is not None:
                baseline_scores[epc] = baseline_score
                print(f"Baseline score for pascal-0-{epc}: {baseline_score:.6f}")
            else:
                print(f"Warning: Could not extract baseline score from {baseline_file}")
        else:
            print(f"Warning: Baseline file not found for pascal-0-{epc}")
        
        outperforming = all_results_sorted[all_results_sorted['value'] > baseline_scores.get(epc, 0)]
        
        print(f"\nStrategies outperforming baseline:")
        for _, row in outperforming.iterrows():
            print(f"{row['filename']}: {row['value']:.6f} (Images: {row['image_count']})")
        
        results[epc] = all_results_sorted
        outperforming_strategies[epc] = outperforming
        
        best_strategy = all_results_sorted.iloc[0]
        best_scores[epc] = best_strategy['value']
        
        print(f"\nBest strategy for pascal-0-{epc}:")
        print(f"Filename: {best_strategy['filename']}")
        print(f"Examples per class: {best_strategy['examples_per_class']}")
        print(f"Best accuracy: {best_strategy['value']:.6f}")
        print(f"Image count: {best_strategy['image_count']}")

    return results, outperforming_strategies, best_scores, baseline_scores

# List of examples_per_class to analyze
examples_per_class_list = [1, 2, 4, 8, 16]

# Run the analysis
results, outperforming_strategies, best_scores, baseline_scores = analyze_pascal_results(examples_per_class_list)

# Print improvements over researcher's baseline
print("\nImprovements over researcher's baseline:")
for epc in examples_per_class_list:
    if epc in best_scores and epc in baseline_scores:
        improvement = (best_scores[epc] - baseline_scores[epc]) * 100
        print(f"Improved accuracy over researchers for pascal-0-{epc}: {improvement:.2f}%")
    else:
        print(f"Could not calculate improvement for pascal-0-{epc} due to missing data")


--- Analysis for pascal-0-1 ---
Found 30 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
9,pascal-0-1-percentile_overall_creative-0_10.csv,1,0.685399,0
0,pascal-0-1-top_n_overall-0_90.csv,1,0.679405,0
3,pascal-0-1-top_n_overall_creative-0_90.csv,1,0.679396,0
7,pascal-0-1-percentile_class_creative-0_10.csv,1,0.677403,0
6,pascal-0-1-percentile_overall-0_10.csv,1,0.677206,0
2,pascal-baseline-0-1.csv,1,0.662918,0
1,pascal-baseline-0-2.csv,1,0.660436,0
4,pascal-baseline-0-4.csv,1,0.660436,0
5,pascal-baseline-0-16.csv,1,0.660436,0
8,pascal-baseline-0-8.csv,1,0.660436,0


Baseline score for pascal-0-1: 0.662918

Strategies outperforming baseline:
pascal-0-1-percentile_overall_creative-0_10.csv: 0.685399 (Images: 0)
pascal-0-1-top_n_overall-0_90.csv: 0.679405 (Images: 0)
pascal-0-1-top_n_overall_creative-0_90.csv: 0.679396 (Images: 0)
pascal-0-1-percentile_class_creative-0_10.csv: 0.677403 (Images: 0)
pascal-0-1-percentile_overall-0_10.csv: 0.677206 (Images: 0)

Best strategy for pascal-0-1:
Filename: pascal-0-1-percentile_overall_creative-0_10.csv
Examples per class: 1
Best accuracy: 0.685399
Image count: 0

--- Analysis for pascal-0-2 ---
Found 30 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
5,pascal-0-2-zscore_top_n_overall-2-0_8.csv,2,0.744197,0
6,pascal-0-2-zscore_top_n_overall-2-0_9.csv,2,0.742506,0
4,pascal-0-2-percentile_top_n_overall-0_1-0_9.csv,2,0.739808,0
3,pascal-0-2-top_n_overall_creative-0_80.csv,2,0.737876,0
8,pascal-0-2-zscore_top_n_overall_creative-2-0_9...,2,0.735079,0
0,pascal-baseline-0-2.csv,2,0.71001,0
1,pascal-baseline-0-4.csv,2,0.71001,0
2,pascal-baseline-0-16.csv,2,0.71001,0
7,pascal-baseline-0-8.csv,2,0.71001,0


Baseline score for pascal-0-2: 0.710010

Strategies outperforming baseline:
pascal-0-2-zscore_top_n_overall-2-0_8.csv: 0.744197 (Images: 0)
pascal-0-2-zscore_top_n_overall-2-0_9.csv: 0.742506 (Images: 0)
pascal-0-2-percentile_top_n_overall-0_1-0_9.csv: 0.739808 (Images: 0)
pascal-0-2-top_n_overall_creative-0_80.csv: 0.737876 (Images: 0)
pascal-0-2-zscore_top_n_overall_creative-2-0_9.csv: 0.735079 (Images: 0)

Best strategy for pascal-0-2:
Filename: pascal-0-2-zscore_top_n_overall-2-0_8.csv
Examples per class: 2
Best accuracy: 0.744197
Image count: 0

--- Analysis for pascal-0-4 ---
Found 30 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
1,pascal-0-4-percentile_top_n_overall_creative-0...,4,0.778771,0
5,pascal-0-4-top_n_overall_creative-0_60.csv,4,0.776168,0
4,pascal-0-4-zscore_top_n_class_creative-2-0_7.csv,4,0.776091,0
0,pascal-0-4-zscore_top_n_class-2-0_7.csv,4,0.770005,0
7,pascal-0-4-zscore_top_n_class-2-0_8.csv,4,0.769754,0
2,pascal-baseline-0-4.csv,4,0.756322,0
3,pascal-baseline-0-16.csv,4,0.756322,0
6,pascal-baseline-0-8.csv,4,0.756322,0


Baseline score for pascal-0-4: 0.756322

Strategies outperforming baseline:
pascal-0-4-percentile_top_n_overall_creative-0_1-0_8.csv: 0.778771 (Images: 0)
pascal-0-4-top_n_overall_creative-0_60.csv: 0.776168 (Images: 0)
pascal-0-4-zscore_top_n_class_creative-2-0_7.csv: 0.776091 (Images: 0)
pascal-0-4-zscore_top_n_class-2-0_7.csv: 0.770005 (Images: 0)
pascal-0-4-zscore_top_n_class-2-0_8.csv: 0.769754 (Images: 0)

Best strategy for pascal-0-4:
Filename: pascal-0-4-percentile_top_n_overall_creative-0_1-0_8.csv
Examples per class: 4
Best accuracy: 0.778771
Image count: 0

--- Analysis for pascal-0-8 ---
Found 30 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
2,pascal-0-8-zscore_top_n_overall_creative-1_5-0...,8,0.801673,0
0,pascal-0-8-percentile_top_n_overall-0_1-0_9.csv,8,0.80029,0
3,pascal-0-8-zscore_top_n_overall_creative-2-0_9...,8,0.79819,0
6,pascal-0-8-zscore_top_n_class_creative-1_5-0_8...,8,0.797775,0
4,pascal-0-8-zscore_overall-1_50.csv,8,0.797748,0
1,pascal-baseline-0-16.csv,8,0.779054,0
5,pascal-baseline-0-8.csv,8,0.779054,0


Baseline score for pascal-0-8: 0.779054

Strategies outperforming baseline:
pascal-0-8-zscore_top_n_overall_creative-1_5-0_7.csv: 0.801673 (Images: 0)
pascal-0-8-percentile_top_n_overall-0_1-0_9.csv: 0.800290 (Images: 0)
pascal-0-8-zscore_top_n_overall_creative-2-0_9.csv: 0.798190 (Images: 0)
pascal-0-8-zscore_top_n_class_creative-1_5-0_8.csv: 0.797775 (Images: 0)
pascal-0-8-zscore_overall-1_50.csv: 0.797748 (Images: 0)

Best strategy for pascal-0-8:
Filename: pascal-0-8-zscore_top_n_overall_creative-1_5-0_7.csv
Examples per class: 8
Best accuracy: 0.801673
Image count: 0

--- Analysis for pascal-0-16 ---
Found 30 files for processing
All results:


Unnamed: 0,filename,examples_per_class,value,image_count
5,pascal-0-16-zscore_top_n_overall_creative-2-0_...,16,0.842425,0
1,pascal-0-16-percentile_top_n_class-0_1-0_9.csv,16,0.841394,0
3,pascal-0-16-percentile_overall-0_20.csv,16,0.840945,0
4,pascal-0-16-percentile_columns_overall-0_20.csv,16,0.840392,0
2,pascal-0-16-percentile_columns_overall-0_10.csv,16,0.839665,0
0,pascal-baseline-0-16.csv,16,0.814962,0


Baseline score for pascal-0-16: 0.814962

Strategies outperforming baseline:
pascal-0-16-zscore_top_n_overall_creative-2-0_8.csv: 0.842425 (Images: 0)
pascal-0-16-percentile_top_n_class-0_1-0_9.csv: 0.841394 (Images: 0)
pascal-0-16-percentile_overall-0_20.csv: 0.840945 (Images: 0)
pascal-0-16-percentile_columns_overall-0_20.csv: 0.840392 (Images: 0)
pascal-0-16-percentile_columns_overall-0_10.csv: 0.839665 (Images: 0)

Best strategy for pascal-0-16:
Filename: pascal-0-16-zscore_top_n_overall_creative-2-0_8.csv
Examples per class: 16
Best accuracy: 0.842425
Image count: 0

Improvements over researcher's baseline:
Improved accuracy over researchers for pascal-0-1: 2.25%
Improved accuracy over researchers for pascal-0-2: 3.42%
Improved accuracy over researchers for pascal-0-4: 2.24%
Improved accuracy over researchers for pascal-0-8: 2.26%
Improved accuracy over researchers for pascal-0-16: 2.75%


In [127]:
import pandas as pd
import glob
import os

# Constants
BASE_FOLDER = '/Users/andrew/Thesis/smart-image-augmentation/results/pascal-researchers-aug/textual-inversion-1.0-0.75-0.5-0.25'
EXAMPLES_PER_CLASS = [1, 2, 4, 8, 16]
SEEDS = range(8)  # 0 to 7

def process_csv(file_path, target_epc):
    df = pd.read_csv(file_path)
    filtered_df = df[(df['metric'] == 'Accuracy') & (df['split'] == 'Validation') & (df['examples_per_class'] == target_epc)]
    if not filtered_df.empty:
        best_result = filtered_df['value'].max()
        return best_result
    return None

def generate_results_dataframe():
    results = []
    for epc in EXAMPLES_PER_CLASS:
        for seed in SEEDS:
            filename = os.path.join(BASE_FOLDER, f"results_{seed}_{epc}.csv")
            if os.path.exists(filename):
                best_accuracy = process_csv(filename, epc)
                if best_accuracy is not None:
                    results.append({
                        'Seed': seed,
                        'Examples_Per_Class': epc,
                        'Best_Accuracy': best_accuracy
                    })
            else:
                print(f"File not found: {filename}")
    
    results_df = pd.DataFrame(results)
    
    # Pivot the dataframe to have seeds as columns and examples per class as rows
    pivot_df = results_df.pivot(index='Examples_Per_Class', columns='Seed', values='Best_Accuracy')
    
    # Add a column for the best accuracy across all seeds
    pivot_df['Best_Overall'] = pivot_df.max(axis=1)
    
    return pivot_df

# Generate and display the results
results_df = generate_results_dataframe()
display(results_df)

# Optionally, save to CSV
# results_df.to_csv('pascal_results_summary.csv')

Seed,0,1,2,3,4,5,6,7,Best_Overall
Examples_Per_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.662918,0.660436,0.660436,0.660436,0.660436,0.660436,0.660436,0.660436,0.662918
2,0.71001,0.71001,0.71001,0.71001,0.71001,0.71001,0.71001,0.71001,0.71001
4,0.756322,0.756322,0.756322,0.756322,0.756322,0.759291,0.759291,0.759291,0.759291
8,0.779054,0.779054,0.779054,0.779054,0.779054,0.779054,0.783132,0.813692,0.813692
16,0.814962,0.814962,0.814962,0.814962,0.819774,0.819774,0.822897,0.829569,0.829569


In [125]:
results_df

Seed,0,1,2,3,4,5,6,7,Best_Overall
Examples_Per_Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.662918,0.814962,0.814962,0.814962,0.814962,0.819774,0.819774,0.822897,0.822897
2,0.71001,0.814962,0.814962,0.814962,0.814962,0.819774,0.819774,0.822897,0.822897
4,0.756322,0.814962,0.814962,0.814962,0.814962,0.819774,0.819774,0.822897,0.822897
8,0.779054,0.814962,0.814962,0.814962,0.814962,0.819774,0.819774,0.822897,0.822897
16,0.814962,0.814962,0.814962,0.814962,0.819774,0.819774,0.822897,0.829569,0.829569


In [87]:
import pandas as pd
import numpy as np
import scipy.stats as stats

# Configuration
SCORE_COLUMNS = ['class_representation', 'visual_fidelity', 'structural_integrity', 'diversity', 'beneficial_variations']
CREATIVE_SCORE_PAIRS = [(0, 3), (2, 4)]  # Indices of columns to be multiplied for creative score

def load_filtration_results(file_path):
    """Load the filtration results CSV file."""
    return pd.read_csv(file_path)

def calculate_score(row, creative=False):
    """Calculate the score for a row based on the strategy type."""
    if creative:
        return row[SCORE_COLUMNS[0]] * row[SCORE_COLUMNS[3]] * row[SCORE_COLUMNS[1]] * row[SCORE_COLUMNS[2]] * row[SCORE_COLUMNS[4]]
        return row[SCORE_COLUMNS[0]] * row[SCORE_COLUMNS[3]]
        return sum(row[SCORE_COLUMNS[i]] * row[SCORE_COLUMNS[j]] for i, j in CREATIVE_SCORE_PAIRS)
    else:
        return sum(row[col] for col in SCORE_COLUMNS) / len(SCORE_COLUMNS)

def apply_strategy(df, strategy):
    """Apply the specified strategy to the dataframe."""
    strategy_type = strategy['type']
    
    if strategy_type not in ['zscore_top_n', 'percentile_top_n']:
        df['score'] = df.apply(lambda row: calculate_score(row, strategy.get('creative', False)), axis=1)
    
    if strategy_type == 'top_n':
        return apply_top_n_strategy(df, strategy['value'], strategy.get('group_by_class', True))
    elif strategy_type == 'percentile':
        return apply_percentile_strategy(df, strategy['value'], strategy.get('group_by_class', True))
    elif strategy_type == 'zscore':
        return apply_zscore_strategy(df, strategy['threshold'], strategy.get('group_by_class', True))
    elif strategy_type == 'percentile_by_columns':
        return apply_percentile_by_columns_strategy(df, strategy['value'], strategy.get('group_by_class', True))
    elif strategy_type == 'zscore_top_n':
        return apply_zscore_top_n_strategy(df, strategy['zscore_threshold'], strategy['top_n_value'], strategy.get('group_by_class', True), strategy.get('creative', False))
    elif strategy_type == 'percentile_top_n':
        return apply_percentile_top_n_strategy(df, strategy['percentile_value'], strategy['top_n_value'], strategy.get('group_by_class', True), strategy.get('creative', False))
    else:
        raise ValueError(f"Unknown strategy type: {strategy_type}")

def apply_top_n_strategy(df, n, group_by_class):
    if group_by_class:
        return df.groupby('class_name').apply(lambda x: x.nlargest(max(1, int(len(x) * n)), 'score')).reset_index(drop=True)
    else:
        return df.nlargest(max(1, int(len(df) * n)), 'score')

def apply_percentile_strategy(df, p, group_by_class):
    if group_by_class:
        return df[df['score'] > df.groupby('class_name')['score'].transform(lambda x: x.quantile(p))]
    else:
        return df[df['score'] > df['score'].quantile(p)]

def apply_zscore_strategy(df, threshold, group_by_class):
    if group_by_class:
        return df.groupby('class_name').apply(lambda x: x[~(np.abs(stats.zscore(x[SCORE_COLUMNS])) > threshold).any(axis=1)]).reset_index(drop=True)
    else:
        return df[~(np.abs(stats.zscore(df[SCORE_COLUMNS])) > threshold).any(axis=1)]

def apply_percentile_by_columns_strategy(df, percentile, group_by_class):
    if group_by_class:
        return df.groupby('class_name').apply(lambda x: x[np.all(x[SCORE_COLUMNS] > x[SCORE_COLUMNS].quantile(percentile), axis=1)]).reset_index(drop=True)
    else:
        return df[np.all(df[SCORE_COLUMNS] > df[SCORE_COLUMNS].quantile(percentile), axis=1)]

def apply_zscore_top_n_strategy(df, zscore_threshold, top_n_value, group_by_class, creative):
    zscore_filtered = df[~(np.abs(stats.zscore(df[SCORE_COLUMNS])) > zscore_threshold).any(axis=1)]
    zscore_filtered['score'] = zscore_filtered.apply(lambda row: calculate_score(row, creative), axis=1)
    return apply_top_n_strategy(zscore_filtered, top_n_value, group_by_class)

def apply_percentile_top_n_strategy(df, percentile_value, top_n_value, group_by_class, creative):
    percentile_filtered = df[np.all(df[SCORE_COLUMNS] > df[SCORE_COLUMNS].quantile(percentile_value), axis=1)]
    percentile_filtered['score'] = percentile_filtered.apply(lambda row: calculate_score(row, creative), axis=1)
    return apply_top_n_strategy(percentile_filtered, top_n_value, group_by_class)

# Example usage:
# 1. Load the data
df = load_filtration_results('filtration-results-v1/pascal-0-1-results.csv')

# Top-n strategies
# strategy = {'type': 'top_n', 'value': 0.9, 'group_by_class': True, 'name': 'top_n_class', 'creative': False}
# strategy = {'type': 'top_n', 'value': 0.9, 'group_by_class': False, 'name': 'top_n_overall', 'creative': False}
# strategy = {'type': 'top_n', 'value': 0.9, 'group_by_class': True, 'name': 'top_n_class_creative', 'creative': True}
# strategy = {'type': 'top_n', 'value': 0.9, 'group_by_class': False, 'name': 'top_n_overall_creative', 'creative': True}

# Percentile strategies
# strategy = {'type': 'percentile', 'value': 0.1, 'group_by_class': True, 'name': 'percentile_class', 'creative': False}
# strategy = {'type': 'percentile', 'value': 0.1, 'group_by_class': False, 'name': 'percentile_overall', 'creative': False}
# strategy = {'type': 'percentile', 'value': 0.1, 'group_by_class': True, 'name': 'percentile_class_creative', 'creative': True}
strategy = {'type': 'percentile', 'value': 0.1, 'group_by_class': False, 'name': 'percentile_overall_creative', 'creative': True}

# Z-score strategies
# strategy = {'type': 'zscore', 'threshold': 2, 'group_by_class': True, 'name': 'zscore_class'}
# strategy = {'type': 'zscore', 'threshold': 2, 'group_by_class': False, 'name': 'zscore_overall'}

# Percentile by columns strategies
# strategy = {'type': 'percentile_by_columns', 'value': 0.1, 'group_by_class': True, 'name': 'percentile_columns_class'}
# strategy = {'type': 'percentile_by_columns', 'value': 0.1, 'group_by_class': False, 'name': 'percentile_columns_overall'}

# Combined Z-score + top-n strategies
# strategy = {'type': 'zscore_top_n', 'zscore_threshold': 2, 'top_n_value': 0.9, 'group_by_class': True, 'name': 'zscore_top_n_class', 'creative': False}
# strategy = {'type': 'zscore_top_n', 'zscore_threshold': 2, 'top_n_value': 0.9, 'group_by_class': False, 'name': 'zscore_top_n_overall', 'creative': False}
# strategy = {'type': 'zscore_top_n', 'zscore_threshold': 2, 'top_n_value': 0.9, 'group_by_class': True, 'name': 'zscore_top_n_class_creative', 'creative': True}
# strategy = {'type': 'zscore_top_n', 'zscore_threshold': 2, 'top_n_value': 0.9, 'group_by_class': False, 'name': 'zscore_top_n_overall_creative', 'creative': True}

# Combined Column-level Percentile + top-n strategies
# strategy = {'type': 'percentile_top_n', 'percentile_value': 0.1, 'top_n_value': 0.9, 'group_by_class': True, 'name': 'percentile_top_n_class', 'creative': False}
# strategy = {'type': 'percentile_top_n', 'percentile_value': 0.1, 'top_n_value': 0.9, 'group_by_class': False, 'name': 'percentile_top_n_overall', 'creative': False}
# strategy = {'type': 'percentile_top_n', 'percentile_value': 0.1, 'top_n_value': 0.9, 'group_by_class': True, 'name': 'percentile_top_n_class_creative', 'creative': True}
# strategy = {'type': 'percentile_top_n', 'percentile_value': 0.1, 'top_n_value': 0.9, 'group_by_class': False, 'name': 'percentile_top_n_overall_creative', 'creative': True}

# 3. Apply the strategy
filtered_df = apply_strategy(df, strategy)

# 4. View results
display(filtered_df)
print(f"Total images after filtration: {len(filtered_df)}")
print("Images per class:")
print(filtered_df['class_name'].value_counts())

Unnamed: 0,index,class_representation,visual_fidelity,structural_integrity,diversity,beneficial_variations,explanation,filename,class_name,real_filename,score
1,2,0.4,0.3,0.4,0.6,0.5,The image shows multiple airplanes but is blur...,aug-0-1.png,airplane,2007_009348.jpg,0.01440
2,3,0.5,0.4,0.5,0.6,0.5,The image is clearer than the previous ones bu...,aug-0-2.png,airplane,2007_009348.jpg,0.03000
3,4,0.6,0.5,0.6,0.7,0.6,The image is clearer and the airplane is more ...,aug-0-3.png,airplane,2007_009348.jpg,0.07560
4,5,0.7,0.6,0.7,0.8,0.7,The image is clear and the airplane is disting...,aug-0-4.png,airplane,2007_009348.jpg,0.16464
5,6,0.8,0.7,0.8,0.8,0.8,The image is clear and the airplane is highly ...,aug-0-5.png,airplane,2007_009348.jpg,0.28672
...,...,...,...,...,...,...,...,...,...,...,...
195,6,0.6,0.5,0.6,0.7,0.6,The image includes a television setup but is c...,aug-19-5.png,television,2007_000121.jpg,0.07560
196,7,0.3,0.3,0.4,0.8,0.5,"The image is cluttered with unrelated items, m...",aug-19-6.png,television,2007_000121.jpg,0.01440
197,8,0.4,0.4,0.5,0.7,0.6,The image includes a television setup but is c...,aug-19-7.png,television,2007_000121.jpg,0.03360
198,9,0.5,0.5,0.6,0.7,0.6,The image includes a television setup but is c...,aug-19-8.png,television,2007_000121.jpg,0.06300


Total images after filtration: 178
Images per class:
class_name
dining table    10
chair           10
train           10
sofa            10
potted plant    10
motorcycle      10
dog             10
bicycle         10
television      10
cat             10
car             10
boat            10
bottle           9
airplane         9
cow              8
horse            8
person           8
bus              7
bird             6
sheep            3
Name: count, dtype: int64


In [88]:
filtered_df.sort_values('score', ascending=False)[-20:]

Unnamed: 0,index,class_representation,visual_fidelity,structural_integrity,diversity,beneficial_variations,explanation,filename,class_name,real_filename,score
196,7,0.3,0.3,0.4,0.8,0.5,"The image is cluttered with unrelated items, m...",aug-19-6.png,television,2007_000121.jpg,0.0144
1,2,0.4,0.3,0.4,0.6,0.5,The image shows multiple airplanes but is blur...,aug-0-1.png,airplane,2007_009348.jpg,0.0144
92,3,0.3,0.4,0.3,0.7,0.5,The image is blurry and the cows are not clear...,aug-9-2.png,cow,2011_001653.jpg,0.0126
97,8,0.3,0.4,0.3,0.7,0.5,The image is blurry and the cows are not clear...,aug-9-7.png,cow,2011_001653.jpg,0.0126
94,5,0.3,0.4,0.3,0.7,0.5,The image is blurry and the cows are not clear...,aug-9-4.png,cow,2011_001653.jpg,0.0126
126,7,0.4,0.3,0.3,0.7,0.5,The image has noticeable distortions and artif...,aug-12-6.png,horse,2011_003184.jpg,0.0126
51,2,0.3,0.4,0.3,0.5,0.4,The image is heavily distorted and contains mu...,aug-5-1.png,bus,2008_003373.jpg,0.0072
43,4,0.3,0.3,0.4,0.5,0.4,The image is less clear with distorted bottles...,aug-4-3.png,bottle,2009_001937.jpg,0.0072
84,5,0.3,0.4,0.4,0.4,0.3,The image is highly distorted with significant...,aug-8-4.png,chair,2011_002709.jpg,0.00576
89,10,0.3,0.4,0.4,0.4,0.3,The image is highly distorted with significant...,aug-8-9.png,chair,2011_002709.jpg,0.00576


In [95]:
df = load_filtration_results('filtration-results-v1/pascal-0-1-results.csv')
df

Unnamed: 0,index,class_representation,visual_fidelity,structural_integrity,diversity,beneficial_variations,explanation,filename,class_name,real_filename
0,1,0.3,0.2,0.2,0.5,0.4,"The image is highly distorted and unclear, mak...",aug-0-0.png,airplane,2007_009348.jpg
1,2,0.4,0.3,0.4,0.6,0.5,The image shows multiple airplanes but is blur...,aug-0-1.png,airplane,2007_009348.jpg
2,3,0.5,0.4,0.5,0.6,0.5,The image is clearer than the previous ones bu...,aug-0-2.png,airplane,2007_009348.jpg
3,4,0.6,0.5,0.6,0.7,0.6,The image is clearer and the airplane is more ...,aug-0-3.png,airplane,2007_009348.jpg
4,5,0.7,0.6,0.7,0.8,0.7,The image is clear and the airplane is disting...,aug-0-4.png,airplane,2007_009348.jpg
...,...,...,...,...,...,...,...,...,...,...
195,6,0.6,0.5,0.6,0.7,0.6,The image includes a television setup but is c...,aug-19-5.png,television,2007_000121.jpg
196,7,0.3,0.3,0.4,0.8,0.5,"The image is cluttered with unrelated items, m...",aug-19-6.png,television,2007_000121.jpg
197,8,0.4,0.4,0.5,0.7,0.6,The image includes a television setup but is c...,aug-19-7.png,television,2007_000121.jpg
198,9,0.5,0.5,0.6,0.7,0.6,The image includes a television setup but is c...,aug-19-8.png,television,2007_000121.jpg


In [97]:
df.sort_values('diversity', ascending=False)[-20:]

Unnamed: 0,index,class_representation,visual_fidelity,structural_integrity,diversity,beneficial_variations,explanation,filename,class_name,real_filename
168,9,0.3,0.3,0.2,0.4,0.3,The image has some resemblance to a sheep but ...,aug-16-8.png,sheep,2009_002988.jpg
35,6,1.0,0.9,1.0,0.3,0.4,This image is very similar to the reference im...,aug-3-5.png,boat,2008_006065.jpg
36,7,1.0,0.9,1.0,0.3,0.4,This image is very similar to the reference im...,aug-3-6.png,boat,2008_006065.jpg
38,9,1.0,0.9,1.0,0.3,0.4,This image is very similar to the reference im...,aug-3-8.png,boat,2008_006065.jpg
37,8,1.0,0.9,1.0,0.3,0.4,This image is very similar to the reference im...,aug-3-7.png,boat,2008_006065.jpg
193,4,0.9,0.8,0.9,0.3,0.4,The image closely resembles the reference imag...,aug-19-3.png,television,2007_000121.jpg
33,4,1.0,0.9,1.0,0.3,0.4,This image is very similar to the reference im...,aug-3-3.png,boat,2008_006065.jpg
21,2,1.0,0.8,1.0,0.3,0.4,The image maintains the core features and stru...,aug-2-1.png,bird,2010_000075.jpg
39,10,1.0,0.9,1.0,0.3,0.4,This image is very similar to the reference im...,aug-3-9.png,boat,2008_006065.jpg
151,2,0.9,0.9,0.9,0.3,0.4,"This image is very similar to the reference, m...",aug-15-1.png,potted plant,2010_000567.jpg


### Results from researchers with next train transform:

In [94]:
# train_transform = transforms.Compose([
#             transforms.Resize(image_size),
#             transforms.RandomHorizontalFlip(p=0.5),
#             transforms.RandomRotation(degrees=15.0),
#             transforms.ToTensor(),
#             transforms.ConvertImageDtype(torch.float),
#             transforms.Lambda(lambda x: x.expand(3, *image_size)),
#             transforms.Normalize(mean=[0.5, 0.5, 0.5], 
#                                   std=[0.5, 0.5, 0.5])
#         ])

In [1]:
import pandas as pd
their_df = pd.read_csv('my_results/results_0_1.csv')
filtered_df = their_df[(their_df['metric'] == 'Accuracy') & 
                          (their_df['split'] == 'Validation')]

# Group by examples_per_class and get the max value for each group
best_results_their_v1 = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_their_v1

FileNotFoundError: [Errno 2] No such file or directory: 'my_results/results_0_1.csv'

In [18]:
!pwd

/Users/andrew/Thesis/smart-image-augmentation/results


In [19]:
!ls

comparing-our-accuracy-and-researchers.ipynb
experiments.ipynb
[1m[36mfiltration-results[m[m
[1m[36mmy-results[m[m
replicating-paper-results.ipynb
[1m[36mresearchers-results[m[m


### Results from researchers with next train transform:

In [97]:
# transforms.Compose([
#             transforms.Resize(image_size),
#             transforms.ToTensor(),
#             transforms.ConvertImageDtype(torch.float),
#             transforms.Lambda(lambda x: x.expand(3, *image_size)),
#             transforms.Normalize(mean=[0.5, 0.5, 0.5], 
#                                   std=[0.5, 0.5, 0.5])
#         ])

In [113]:
import pandas as pd
their_df = pd.read_csv('my_results/results_0_1_v2.csv')
filtered_df = their_df[(their_df['metric'] == 'Accuracy') & 
                          (their_df['split'] == 'Validation')]

# Group by examples_per_class and get the max value for each group
best_results_their_v2 = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_their_v2

Unnamed: 0,examples_per_class,value
0,1,0.649085


In [17]:
df = pd.read_csv('my-results/results-filtered-pascal-0-1-top_n-0_90_v1.csv')
filtered_df_my = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]
# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df_my.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.659246


In [15]:
df = pd.read_csv('my-results/results-filtered-pascal-0-1-top_n-0_90_v2.csv')
filtered_df_my = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]
# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df_my.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.655907


### My implementation results based on their code with next train transform:

In [106]:
# train_transform = transforms.Compose([
#             transforms.Resize(image_size),
#             transforms.RandomHorizontalFlip(p=0.5),
#             transforms.RandomRotation(degrees=15.0),
#             transforms.ToTensor(),
#             transforms.ConvertImageDtype(torch.float),
#             transforms.Lambda(lambda x: x.expand(3, *image_size)),
#             transforms.Normalize(mean=[0.5, 0.5, 0.5], 
#                                   std=[0.5, 0.5, 0.5])
#         ])

In [134]:
my_df = pd.read_csv('results_1_pascal-0-1_5.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v1 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v1

Unnamed: 0,examples_per_class,value
0,1,0.661101


In [121]:
best_results_my_v1.value.to_numpy()

array([0.66110075])

In [126]:
print(f"Difference between our and researchers implementation with augmentation transform: {best_results_their_v1.value.to_numpy()[0]-best_results_my_v1.value.to_numpy()[0]}")


Difference between our and researchers implementation with augmentation transform: 0.0018177000000000332


Unnamed: 0,examples_per_class,value
0,1,0.655907


### My implementation results based on their code with next train transform:

In [108]:
# transforms.Compose([
#                 transforms.Resize(self.image_size),
#                 transforms.ToTensor(),
#                 transforms.ConvertImageDtype(torch.float),
#                 transforms.Lambda(lambda x: x.expand(3, *self.image_size)),
#                 transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
#             ])

In [124]:
my_df = pd.read_csv('results_1_pascal-0-1_4.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.659004


In [127]:
print(f"Difference between our and researchers implementation with simple transform: {best_results_their_v2.value.to_numpy()[0]-best_results_my_v2.value.to_numpy()[0]}")


Difference between our and researchers implementation with simple transform: -0.009919289999999914


In [136]:
my_df = pd.read_csv('results_1_pascal-0-1_7.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.661101


In [139]:
my_df = pd.read_csv('results_1_pascal-0-1_9.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.656475


In [143]:
my_df = pd.read_csv('results_1_pascal-0-1_10.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.656475


In [3]:
import pandas as pd
my_df = pd.read_csv('my_results/results_1_pascal-0-1_11.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.656475


In [4]:
import pandas as pd
my_df = pd.read_csv('my_results/results_1_pascal-0-1_12.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.656475


In [5]:
import pandas as pd
my_df = pd.read_csv('my_results/results_1_pascal-0-1_13.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.660327


In [9]:
!ls

comparing-our-accuracy-and-researchers.ipynb
experiments.ipynb
[1m[36mfiltration-results[m[m
[1m[36mmy-results[m[m
replicating-paper-results.ipynb
[1m[36mresearchers-results[m[m


In [11]:
import pandas as pd
my_df = pd.read_csv('my-results/results-pascal-0-1_v1.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.659004


In [12]:
import pandas as pd
my_df = pd.read_csv('my-results/results-pascal-0-1_v2.csv', index_col=None)
my_filtered_df = my_df[(my_df['metric'] == 'Accuracy') & 
                          (my_df['split'] == 'Validation')]

best_results_my_v2 = my_filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my_v2

Unnamed: 0,examples_per_class,value
0,1,0.660327


In [70]:
their_df = pd.read_csv('my_results/results_0_1.csv')
filtered_df = their_df[(their_df['metric'] == 'Accuracy') & 
                          (their_df['split'] == 'Validation')]

best_results_their = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_their

Unnamed: 0,examples_per_class,value
0,1,0.662918


In [None]:
df = pd.read_csv('my-results/results-filtered-pascal-0-1-top_n-0_90_v1.csv')
filtered_df_my = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]
# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df_my.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.655907


In [82]:
df = pd.read_csv('results_1_pascal-0-1_2.csv')
filtered_df_my = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]
# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df_my.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.659004


In [84]:
their_df = pd.read_csv('my_results/results_0_1.csv')
filtered_df = their_df[(their_df['metric'] == 'Accuracy') & 
                          (their_df['split'] == 'Validation')]

best_results_their = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_their

Unnamed: 0,examples_per_class,value
0,1,0.662918


## with simple transform

In [83]:
df = pd.read_csv('results_1_pascal-0-1_3.csv')
filtered_df_my = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]
# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df_my.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.659004


In [89]:
0.662918 - 0.659004 

0.003913999999999973

In [92]:
0.662918 - 0.661101

0.0018169999999999575

In [85]:
df = pd.read_csv('results_1_pascal-0-1_4.csv')
filtered_df_my = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]
# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df_my.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.659004


## With flips and rotations transform

In [87]:
df = pd.read_csv('results_1_pascal-0-1_5.csv')
filtered_df_my = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]
# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df_my.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.661101


In [91]:
their_df = pd.read_csv('results_0_1.csv')
filtered_df = their_df[(their_df['metric'] == 'Accuracy') & 
                          (their_df['split'] == 'Validation')]

best_results_their = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_their

Unnamed: 0,examples_per_class,value
0,1,0.649085


In [90]:
0.662918 - 0.661101

0.0018169999999999575

In [50]:
df = pd.read_csv('results_1_pascal-0-1_aws.csv')
df

Unnamed: 0,seed,examples_per_class,epoch,value,metric,split
0,0,1,0,1.978202,Loss,Training
1,0,1,0,2.326135,Loss,Validation
2,0,1,0,0.850066,Accuracy,Training
3,0,1,0,0.500000,Accuracy,Validation
4,0,1,0,1.988846,Loss Airplane,Training
...,...,...,...,...,...,...
4195,0,1,49,1.000000,Accuracy Train,Validation
4196,0,1,49,0.000453,Loss Television,Training
4197,0,1,49,0.364092,Loss Television,Validation
4198,0,1,49,1.000000,Accuracy Television,Training


In [53]:
their_df = pd.read_csv('my_results/results_0_1.csv')
their_df

Unnamed: 0.1,Unnamed: 0,seed,examples_per_class,epoch,value,metric,split
0,0,0,1,0,2.044756,Loss,Training
1,1,0,1,0,2.356342,Loss,Validation
2,2,0,1,0,0.827288,Accuracy,Training
3,3,0,1,0,0.536613,Accuracy,Validation
4,4,0,1,0,1.903856,Loss Airplane,Training
...,...,...,...,...,...,...,...
4195,4195,0,1,49,0.852861,Accuracy Train,Validation
4196,4196,0,1,49,0.000490,Loss Television,Training
4197,4197,0,1,49,0.491666,Loss Television,Validation
4198,4198,0,1,49,1.000000,Accuracy Television,Training


In [24]:
their_df = pd.read_csv('results_0_2.csv')

filtered_df = their_df[(their_df['metric'] == 'Accuracy') & 
                          (their_df['split'] == 'Validation')]

# Group by examples_per_class and get the max value for each group
best_results_their = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_their

Unnamed: 0,examples_per_class,value
0,1,0.662918
1,2,0.711716


### Results from my implementation with top_k=10 detail=low

In [26]:
df = pd.read_csv('results_1_pascal-0-1.csv')
filtered_df = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]

# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.65


In [32]:
df = pd.read_csv('results_1_filtered-pascal-0-1-percentile-0_10.csv')
filtered_df = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]

# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.7


Unnamed: 0,seed,examples_per_class,epoch,value,metric,split
3,0,1,0,0.45,Accuracy,Validation
87,0,1,1,0.45,Accuracy,Validation
171,0,1,2,0.55,Accuracy,Validation
255,0,1,3,0.6,Accuracy,Validation
339,0,1,4,0.6,Accuracy,Validation
423,0,1,5,0.6,Accuracy,Validation
507,0,1,6,0.6,Accuracy,Validation
591,0,1,7,0.6,Accuracy,Validation
675,0,1,8,0.65,Accuracy,Validation
759,0,1,9,0.65,Accuracy,Validation


In [33]:
df = pd.read_csv('results_1_filtered-pascal-0-1-percentile-0_10_collab.csv')
filtered_df = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]

# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.7


### Results from my implementation with top_k=9 detail=low

In [16]:
df = pd.read_csv('my_results/results_1_percentile_0_1.csv')
filtered_df = df[(df['metric'] == 'Accuracy') & 
                          (df['split'] == 'Validation')]

# Group by examples_per_class and get the max value for each group
best_results_my = filtered_df.groupby('examples_per_class')['value'].max().reset_index()
best_results_my

Unnamed: 0,examples_per_class,value
0,1,0.75
