In [1]:
import requests
import time
import os
import json
import pickle
from tqdm import tqdm,trange
from openai import OpenAI
import pandas as pd
from typing import List, Tuple, Optional, Dict, Any
from sklearn.metrics import accuracy_score

import re

In [3]:
df = pd.read_csv('CollectedIssues.csv')

In [None]:
# Bug Symptom Taxonomy Mapping
BUG_SYMPTOM_MAP = {
    # [A] Crash
    "Crash": "A",
    "Reference Error": "A.1",
    "DL Operator Exception": "A.1.1",
    "Function Inaccessible": "A.1.2",
    "Tensor Disposed": "A.1.3",
    "Attribute/Return Value Undefined": "A.1.4",
    "Training Argument Exception": "A.1.5",
    
    "Data & Model Error": "A.2",
    "Tensor Shape/Type/Value Error": "A.2.1",
    "JS Variable Shape/Type/Value Error": "A.2.2",
    "Model Usage/Design Error": "A.2.3",
    
    "Fetch Failure": "A.3",
    "Browser & Device Error": "A.4",
    "Others (Crash)": "A.5",
    
    # [B] Poor Performance
    "Poor Performance": "B",
    "Time": "B.1",
    "Slow Execution": "B.1.1",
    "Browser Hangs": "B.1.2",
    
    "Memory": "B.2",
    "Memory Leak": "B.2.1",
    "Out of Memory": "B.2.2",
    "Abnormal GPU Memory/Utilization": "B.2.3",
    
    "Others (Performance)": "B.3",
    "Regression": "B.3.1",
    "Unstable": "B.3.2",
    
    # [C] Build & Initialization Failure
    "Build & Initialization Failure": "C",
    "TF.js/JS Application Compile Failure": "C.1",
    "npm Package Installation Failure": "C.2",
    "Multi-backend Initialization Failure": "C.3",
    
    # [D] Incorrect Functionality
    "Incorrect Functionality": "D",
    "Inconsistency between Backends/Platforms/Devices": "D.1",
    "Poor Accuracy": "D.2",
    "Inf/None/Null Results": "D.3",
    "Others (Functionality)": "D.4",
    
    # [E] Document Error
    "Document Error": "E",
}

# Root Cause Taxonomy Mapping
ROOT_CAUSE_MAP = {
    # [A] Incorrect Programming
    "Incorrect Programming": "A",
    "Unimplemented Operator": "A.1",
    "Inconsistent Modules in TF.js": "A.2",
    "Inconsistent Modules": "A.2",
    "API Misuse": "A.3",
    "Incorrect Code Logic": "A.4",
    "Incompatibility between 3rd-party DL Library and TF.js": "A.5",
    "Incompatibilitty between 3rd-party DL Library and TF.js": "A.5",
    "Import Error": "A.6",
    "Improper Exception Handling": "A.7",
    
    # [B] Configuration & Dependency Error
    "Configuration & Dependency Error": "B",
    "Multi-environment Misconfiguration": "B.1",
    "Misconfiguration": "B.1",
    "Dependency Error": "B.2",
    "Untimely Update": "B.3",
    "Confused Document": "B.4",
    
    # [C] Data/Model Error
    "Data/Model Error": "C",
    "Data/Model Inaccessibility": "C.1",
    "Improper Model/Tensor Attribute": "C.2",
    "Improper Model Attribute": "C.2",

    
    # [D] Execution Environment Error
    "Execution Environment Error": "D",
    "Device Incompatibility": "D.1",
    "Browser Incompatibility": "D.2",
    "Cross-platform App Framework Incompatibility": "D.3",
    "WebGL Limits": "D.4",
    
    # [E] Unknown
    "Unknown": "E",
}

In [None]:
sub_symptom_ids = []
for idx, row in df.iterrows():
    sub_symptom = str(row.get('sub-symptom', '')).strip()

    bug_id = BUG_SYMPTOM_MAP.get(sub_symptom, None)
    if bug_id is None:

        symptom = str(row.get('symptoms', '')).strip()
        bug_id = BUG_SYMPTOM_MAP.get(symptom, None)
        if bug_id is None:
            not_found_list.append(sub_symptom)
            print(f"sub-symptom: {sub_symptom}")
            print(row)
            # del this row
            df = df.drop(idx)
            continue
    sub_symptom_ids.append(bug_id)

df['symptom_id'] = sub_symptom_ids



In [None]:
root_causes_ids = []
for idx, row in df.iterrows():
    root_causes = str(row.get('root causes', '')).strip()
    root_causes_ids.append(ROOT_CAUSE_MAP.get(root_causes, None))
    if root_causes_ids[-1] is None:
        print(f"root causes: {root_causes}")
        # print(row)
        # del this row
        # df = df.drop(idx)
        # continue

df['root_causes_id'] = root_causes_ids

In [None]:
df.to_csv('clean_CollectedIssues.csv', index=False)
