In [1]:
#Imports

import os
os.environ["OMP_NUM_THREADS"] = "1"
import ast
import re
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
def calculate_elaboration_score(features: dict) -> float:
    comment_density       = features.get("comment_density", 0.0)
    num_methods           = features.get("num_methods", 0)
    avg_id_length         = features.get("avg_identifier_length", 0.0)
    doc_comment_density   = features.get("doc_comment_density", 0.0)
    blank_line_ratio      = features.get("blank_line_ratio", 0.0)

    x1 = min(comment_density, 1.0)
    x2 = min(num_methods, 10) / 10
    x3 = min(avg_id_length, 15) / 15
    x4 = min(doc_comment_density, 1.0)
    x5 = min(blank_line_ratio, 0.3) / 0.3

    weights = {
        "x1": 0.25,
        "x2": 0.20,
        "x3": 0.15,
        "x4": 0.25,
        "x5": 0.15
    }

    score = (
        weights["x1"] * x1 +
        weights["x2"] * x2 +
        weights["x3"] * x3 +
        weights["x4"] * x4 +
        weights["x5"] * x5
    )

    return round(score, 5)

In [None]:
# Language-specific filters
java_keywords = {
    "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class",
    "const", "continue", "default", "do", "double", "else", "enum", "extends", "final",
    "finally", "float", "for", "goto", "if", "implements", "import", "instanceof", "int",
    "interface", "long", "native", "new", "null", "package", "private", "protected", "public",
    "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this", "throw",
    "throws", "transient", "try", "void", "volatile", "while", "true", "false",
    "String", "System", "out", "in", "println", "args", "Exception", "java", "util", "nextInt", "Scanner"
}
standard_library_classes = {
    "Integer", "Double", "Float", "Long", "Short", "Byte", "Boolean", "Character",
    "StringBuilder", "StringBuffer", "Math", "System", "Object",
    "Arrays", "Collections", "Comparator", "Iterator", "Exception", "RuntimeException",
    "Scanner", "InputStream", "BufferedReader", "File", "IOException",
    "ArrayList", "LinkedList", "HashMap", "HashSet", "TreeMap", "TreeSet",
    "PriorityQueue", "Queue", "Stack", "Deque", "Map", "List", "Set",
}
java_keywords |= standard_library_classes
python_builtins = set(dir(__builtins__))

def extract_features_from_codee(code: str, language: str, debug: bool=False) -> dict:
    lines = code.splitlines()
    total_lines = len(lines)
    comment_lines = 0
    blank_lines = 0
    identifier_lengths = []
    num_methods = 0
    num_doc_comments = 0

    if debug:
        print("----- Debug Info -----")
        print(f"Total lines: {total_lines}")

    if language == "python":
        try:
            tree = ast.parse(code)
        except SyntaxError:
            return None

        for node in ast.walk(tree):
            if isinstance(node, ast.FunctionDef):
                num_methods += 1
                if ast.get_docstring(node):
                    num_doc_comments += 1
            elif isinstance(node, ast.ClassDef):
                if ast.get_docstring(node):
                    num_doc_comments += 1
            elif isinstance(node, ast.Module):
                if ast.get_docstring(node):
                    num_doc_comments += 1
            elif isinstance(node, ast.Name):
                if node.id not in python_builtins:
                    identifier_lengths.append(len(node.id))
                    if debug:
                        print(f"Python identifier: {node.id} ({len(node.id)})")

        for line in lines:
            stripped = line.strip()
            if stripped.startswith("#"):
                comment_lines += 1
            elif stripped == "":
                blank_lines += 1

    elif language == "java":
        method_pattern = re.compile(
    r"(public|private|protected)?\s*(static\s+)?[\w<>\[\]]+\s+\w+\s*\([^)]*\)\s*\{"
)
        identifier_pattern = re.compile(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b")

        in_doc_block = False  # Track if we're inside a /** ... */ JavaDoc block

        for line in lines:
            stripped = line.strip()

            # Start of JavaDoc block
            if stripped.startswith("/**"):
                in_doc_block = True
                num_doc_comments += 1  #  Count block just once
                continue

            # Inside JavaDoc block
            if in_doc_block:
                if stripped.endswith("*/"):
                    in_doc_block = False
                continue  # Skip rest of doc block lines

            # Single-line or regular comments
            if stripped.startswith("//") or "/*" in stripped:
                comment_lines += 1
                continue

            if stripped == "":
                blank_lines += 1

            if method_pattern.search(stripped):
                num_methods += 1
                if debug:
                    print(f"Found Java method: {stripped}")

            for match in identifier_pattern.findall(stripped):
                if match not in java_keywords:
                    identifier_lengths.append(len(match))
                    if debug:
                        print(f"Java identifier: {match} ({len(match)})")

    else:
        return None

    doc_comment_density = num_doc_comments / (num_methods + 1)

    if debug:
        print(f"Doc comments: {num_doc_comments}")
        print(f"Number of methods: {num_methods}")
        print(f"Identifiers: {identifier_lengths}")
        print(f"Blank lines: {blank_lines}")
        print(f"Comment lines: {comment_lines}")

    return {
        "comment_density": comment_lines / total_lines if total_lines > 0 else 0,
        "num_methods": num_methods,
        "avg_identifier_length": (sum(identifier_lengths) / len(identifier_lengths)) if identifier_lengths else 0,
        "doc_comment_density": doc_comment_density,
        "blank_line_ratio": blank_lines / total_lines if total_lines > 0 else 0
    }

In [4]:
if __name__ == "__main__":
    test_file = Path("/Users/vidhimittal/Desktop/Honors/Python-Example.py")
    code_content = test_file.read_text(encoding="utf-8", errors="ignore")
    lang = "java" if test_file.suffix == ".java" else "python"
    features = extract_features_from_codee(code_content, lang, debug=True)
    print("Extracted Features:", features)
    score = calculate_elaboration_score(features)
    print(score)

----- Debug Info -----
Total lines: 18
Python identifier: result (6)
Python identifier: a (1)
Python identifier: b (1)
Python identifier: a (1)
Python identifier: b (1)
Python identifier: add (3)
Python identifier: result (6)
Python identifier: main (4)
Doc comments: 1
Number of methods: 3
Identifiers: [6, 1, 1, 1, 1, 3, 6, 4]
Blank lines: 4
Comment lines: 2
Extracted Features: {'comment_density': 0.1111111111111111, 'num_methods': 3, 'avg_identifier_length': 2.875, 'doc_comment_density': 0.25, 'blank_line_ratio': 0.2222222222222222}
0.29014


In [5]:
if __name__ == "__main__":
    test_file = Path("/Users/vidhimittal/Desktop/Honors/Java-Example.java")
    code_content = test_file.read_text(encoding="utf-8", errors="ignore")
    lang = "java" if test_file.suffix == ".java" else "python"
    features = extract_features_from_codee(code_content, lang, debug=True)
    print("Extracted Features:", features)
    score = calculate_elaboration_score(features)
    print(score)

----- Debug Info -----
Total lines: 20
Java identifier: Example (7)
Found Java method: public static int add(int a, int b) {
Java identifier: add (3)
Java identifier: a (1)
Java identifier: b (1)
Java identifier: a (1)
Java identifier: b (1)
Found Java method: public static int subtract(int a, int b) {
Java identifier: subtract (8)
Java identifier: a (1)
Java identifier: b (1)
Java identifier: a (1)
Java identifier: b (1)
Found Java method: public static void main(String[] args) {
Java identifier: main (4)
Java identifier: result (6)
Java identifier: add (3)
Java identifier: Sum (3)
Java identifier: result (6)
Doc comments: 1
Number of methods: 3
Identifiers: [7, 3, 1, 1, 1, 1, 8, 1, 1, 1, 1, 4, 6, 3, 3, 6]
Blank lines: 3
Comment lines: 2
Extracted Features: {'comment_density': 0.1, 'num_methods': 3, 'avg_identifier_length': 3.0, 'doc_comment_density': 0.25, 'blank_line_ratio': 0.15}
0.2525


In [None]:
problem_id = "00000056"
base_dir = Path(f"/Users/vidhimittal/Desktop/organized_progpedia_data/{problem_id}/human/WRONG_ANSWER")
results = []

# Loop through both Java and Python folders
for lang_folder in ["java", "python"]:
    lang_path = base_dir / lang_folder
    extension = ".java" if lang_folder == "java" else ".py"

    for filepath in lang_path.rglob(f"*{extension}"):
        try:
            code = filepath.read_text(encoding="utf-8", errors="ignore")
            features = extract_features_from_codee(code, language=lang_folder, debug=False)

            if features:
                score = calculate_elaboration_score(features)
                results.append({
                    "filename": filepath.name,
                    "path": str(filepath),
                    "language": lang_folder,
                    "elaboration_score": score,
                    **features
                })
            else:
                print(f"[!] Skipped (invalid code): {filepath}")
        except Exception as e:
            print(f"[ERROR] {filepath}: {e}")

# Convert to DataFrame
df = pd.DataFrame(results)
df.sort_values(by="elaboration_score", ascending=False, inplace=True)
df.reset_index(drop=True, inplace=True)

# Preview
print(df.head())

# Save to CSV with problem number in name
output_csv = f"wrong_answer_{problem_id}_elaboration_scores.csv"
df.to_csv(output_csv, index=False)
print(f"\n Saved to {output_csv}")

In [None]:
import pandas as pd
from pathlib import Path

base_path = Path("/Users/vidhimittal/Desktop/organized_progpedia_data")
labels = {
    "ACCEPTED": "Accepted",
    "WRONG_ANSWER": "Wrong Answer",
    "RUNTIME_ERROR": "Runtime Error"
}

all_data = []

for problem_dir in base_path.iterdir():
    if not problem_dir.is_dir() or not problem_dir.name.isdigit():
        continue  # skip non-problem directories

    problem_id = problem_dir.name
    human_dir = problem_dir / "human"

    for label_folder, label_name in labels.items():
        csv_files = list((human_dir / label_folder).glob("*.csv"))
        if not csv_files:
            continue  # skip if no CSV file found in this label

        csv_file = csv_files[0]
        try:
            df = pd.read_csv(csv_file)
            df["problem_id"] = problem_id
            df["label"] = label_name
            all_data.append(df)
        except Exception as e:
            print(f"[ERROR] Could not load {csv_file}: {e}")

# Combine all into one DataFrame
df_all = pd.concat(all_data, ignore_index=True)
print(f"Loaded {len(df_all)} total rows across all problems.")


print(df_all.head())

df_all.to_csv("all_progpedia_elaboration_scores.csv", index=False)

In [None]:
df = pd.read_csv("all_progpedia_elaboration_scores.csv")

print(df.info())
print(df["label"].value_counts())
print(df["language"].value_counts())

In [None]:
# Descriptive stats for elaboration score by outcome
print(df.groupby("label")["elaboration_score"].describe())

# Descriptive stats by language
print(df.groupby("language")["elaboration_score"].describe())

# Cross by both
print(df.groupby(["label", "language"])["elaboration_score"].describe())

In [None]:
from scipy.stats import f_oneway

accepted = df[df.label == "Accepted"]["elaboration_score"]
wrong = df[df.label == "Wrong Answer"]["elaboration_score"]
runtime = df[df.label == "Runtime Error"]["elaboration_score"]

f_stat, p_value = f_oneway(accepted, wrong, runtime)
print(f"ANOVA F-stat: {f_stat:.4f}, p-value: {p_value:.4e}")

In [None]:
# Convert label to binary outcome (Accepted = 1)
df["is_correct"] = (df["label"] == "Accepted").astype(int)

# Correlation matrix
features = ["comment_density", "doc_comment_density", "num_methods", 
            "avg_identifier_length", "blank_line_ratio", "elaboration_score"]

corrs = df[features + ["is_correct"]].corr()
print(corrs["is_correct"].sort_values(ascending=False))

In [None]:
features_to_test = [
    "comment_density", "doc_comment_density", "num_methods",
    "avg_identifier_length", "blank_line_ratio"
]

for feat in features_to_test:
    a = df[df.label == "Accepted"][feat]
    w = df[df.label == "Wrong Answer"][feat]
    r = df[df.label == "Runtime Error"][feat]
    f_stat, p = f_oneway(a, w, r)
    print(f"{feat}: F = {f_stat:.4f}, p = {p:.4e}")