# Classify issues Pn-g4 -> Pn-g4-c

In [4]:
import sys
import os
import pandas as pd
import numpy as np

In [5]:
classes = {
    '???': 0,
    'ADM': 1,
    'BLD': 2,
    'CFG': 3,
    'CUST': 4,
    'DEV': 5,
    'DOC': 6,
    'EDU': 7,
    'MIGR': 8,
    'MIT': 9,
    'PM': 10,
    'RQM': 11,
    'RVW': 12,
    'TST': 13,
    'BUG': 14
}


In [6]:
Issues = {
    'P1': 'P1-g4',
    'P2': 'P2-g4',
    'P3': 'P3-g4',
    'P4': 'P4-g4',
    'P5': 'P5-g4',
    'P6': 'P6-g4',
}

Tawos = {
    'XD': 'XD',
    'DATACASS': 'DATACASS',
    'NEXUS': 'NEXUS',
    'MESOS': 'MESOS',
    'USERGRID': 'USERGRID',
    'MXNET': 'MXNET',
    'ALOY': 'ALOY',
    'APSTUD': 'APSTUD',
    'CLI': 'CLI',
    'DAEMON': 'DAEMON',
    'TIDOC': 'TIDOC',
    'TIMOB': 'TIMOB',
    'TISTUD': 'TISTUD',
    'BAM': 'BAM',
    'CLOV': 'CLOV',
    'CWD': 'CWD',
    'FE': 'FE',
    'JRASERVER': 'JRASERVER',
    'JSWCLOUD': 'JSWCLOUD',
    'JSWSERVER': 'JSWSERVER',
    'CONFCLOUD': 'CONFCLOUD',
    'CONFSERVER': 'CONFSERVER',
    'BE': 'BE',
    'FAB': 'FAB',
    'INDY': 'INDY',
    'IS': 'IS',
    'STL': 'STL',
    'DM': 'DM',
    'DURACLOUD': 'DURACLOUD',
    'COMPASS': 'COMPASS',
    'CXX': 'CXX',
    'JAVA': 'JAVA',
    'SERVER': 'SERVER',
    'MDL': 'MDL',
    'APIKIT': 'APIKIT',
    'MULE': 'MULE',
    'DNN': 'DNN', 
}

datasets ={
    'Issues': Issues, 
    'Tawos': Tawos
}

datasets_path = "../py-ccflex/temp/processing"

In [7]:
import keywords_classify as kc

dev_tasks = 'Epic/Story/Improvement/New Feature/Product Improvement/Quick Task/Request/Sub-task/Task'.lower()
rqm_tasks = 'Question/Requirement/Wish/Suggestion'.lower()
tst_tasks = 'Sub-Test/Testing Issue/UAT Test case/Test'.lower()
doc_tasks = 'Documentation'.lower()
adm_tasks = 'Technical task'.lower()
res_df = pd.DataFrame()
res_df['classes'] = [c for c in classes.keys()]
for dataset_name in datasets:
    dataset = datasets[dataset_name]
    for project, repo in list(dataset.items()):
        data_path = f"{datasets_path}/{repo}.csv"
        print(f"- import project {project} from file {data_path}")
        
        data = pd.read_csv(data_path, sep='$')
        df = data.copy().astype({"type_value":"Int64", "state_changes":"Int64", "class_value":"Int64"})

        if dataset_name != 'Tawos': 
            res_df[f"{project}-old"] = ''
            for class_key in classes.keys():
                class_count = df['class_name'].value_counts().get(class_key)
                res_df[f"{project}-old"].iloc[classes[class_key]] = class_count if class_count is not None else 0
        
        for index, row in df.iterrows():
            issue_type = row['type_name'].lower()
            dev_type = issue_type in dev_tasks
            rqm_type = issue_type in rqm_tasks
            tst_type = issue_type in tst_tasks
            bug_type = issue_type in 'bug'
            doc_type = issue_type in doc_tasks
            adm_type = issue_type in adm_tasks
            
            issue_text = f"{row['title']} {row['description']}".lower()
            new_class_name = ''
            # Jeżeli to zadanie jest pytaniem
            if issue_type in 'question': 
                # to jeśli pytający jest deweloperem, to zadanie to wymaganie
                if row['creator'] == 1: 
                    new_class_name = 'RQM'
                # jeśli pytający jest klientem, to zadanie jest obsługą klienta
                else: 
                    new_class_name = 'CUST'
            elif dev_type:
                text_classes = kc.classify_text(issue_text)
                new_class_name = kc.max_class(text_classes) if text_classes else 'DEV'
#                 if 'TRAVEL Maciej'.lower() in issue_text:
#                     print(f">{row['id']}<\n {issue_text}\n{text_classes}\n{new_class_name}")
            elif rqm_type: 
                new_class_name = 'RQM'
            elif bug_type: 
                new_class_name = 'BUG'
            elif tst_type: 
                new_class_name = 'TST'
            elif doc_type: 
                new_class_name = 'DOC'
            elif adm_type: 
                new_class_name = 'ADM'
            else:
                new_class_name = '???'
            df.at[index, 'class_name'] = new_class_name
            df.at[index, 'class_value'] = int(classes[new_class_name]) if new_class_name else int(0)

        res_df[f"{project}-new"] = ''
        for class_key in classes.keys():
            class_count = df['class_name'].value_counts().get(class_key)
            res_df[f"{project}-new"].iloc[classes[class_key]] = class_count if class_count is not None else 0

        out_data_path = f"../py-ccflex/temp/processing/{repo}-c.csv"
        print(f'- export classified issues to {out_data_path}')
        df.to_csv(out_data_path, sep='$', index=False)

out_data_path = '../py-ccflex/temp/processing/classify_result.csv'
print(f'\nExport statistics to {out_data_path}')
res_df.to_csv(out_data_path, sep='\t', index=False)

- import project P1 from file ../py-ccflex/temp/processing/P1-g4.csv
- export classified issues to ../py-ccflex/temp/processing/P1-g4-c.csv
- import project P2 from file ../py-ccflex/temp/processing/P2-g4.csv
- export classified issues to ../py-ccflex/temp/processing/P2-g4-c.csv
- import project P3 from file ../py-ccflex/temp/processing/P3-g4.csv
- export classified issues to ../py-ccflex/temp/processing/P3-g4-c.csv
- import project P4 from file ../py-ccflex/temp/processing/P4-g4.csv
- export classified issues to ../py-ccflex/temp/processing/P4-g4-c.csv
- import project P5 from file ../py-ccflex/temp/processing/P5-g4.csv
- export classified issues to ../py-ccflex/temp/processing/P5-g4-c.csv
- import project P6 from file ../py-ccflex/temp/processing/P6-g4.csv
- export classified issues to ../py-ccflex/temp/processing/P6-g4-c.csv
- import project XD from file ../py-ccflex/temp/processing/XD.csv
- export classified issues to ../py-ccflex/temp/processing/XD-c.csv
- import project DATACASS 

In [None]:
res_df

In [3]:
import keywords_classify as kc
text="Brainstorm session to agree ongoing way of working for support (2020-09-16)	This meeting goal is to gree how the team organizes in situations when customer reports incident that needs to be resolved quickly. This is NOT ONLY production related incidents"
text_classes = kc.classify_text(text, debug=True)
print(text_classes)
new_class_name = kc.max_class(text_classes) if text_classes else 'DEV'
print(new_class_name)

{'first': 'brainstorm', 'second': 'session', 'third': '', 'type': 'MIT'} == 2
{'MIT': 2}
MIT
