In [1]:
from loguru import logger
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
from src.config import BaseConfig
from src.modeling.metrics import weighted_accuracy
from tqdm import tqdm
import gc

from sentence_transformers.cross_encoder import CrossEncoder

config = BaseConfig()

In [77]:
path = config.processed_data_dir / "final_data_to_train.json"
test_keys_path = config.processed_data_dir / "test_tasks.csv"
df = pd.read_json(path)
test_keys = pd.read_csv(test_keys_path)

df["time_to_complete_hours_transformed"] = df["time_to_complete_hours"].clip(2, 12)  # clip between 2 and 24 hours
df["time_to_complete_hours_transformed"] = df["time_to_complete_hours_transformed"] // 2 * 2  # round to the nearest even number

y = df["time_to_complete_hours_transformed"]
x = df["task_text"]

y = y[~df.jira_key.isin(test_keys.jira_key)]
x = x[~df.jira_key.isin(test_keys.jira_key)]

y_test_from_manual = y[~df.jira_key.isin(test_keys.jira_key)]
x_test_from_manual = x[~df.jira_key.isin(test_keys.jira_key)]

y.value_counts().sort_index()
df.head(0)

Unnamed: 0,assignee_level_order,jira_key,weeks_since_member_join,time_to_complete_hours,task_text,time_to_complete_hours_transformed


# Simple architecture


1. Skip 15 first tasks – it will be used for example for future
2. For each task get 5 most relevant by Cross Encoder.
3. Update corpus for each task by time (jira_key is enough to order)
4. Make 5 more columns with their time_to_complete_hours
5. Use LLM to ask about estimated time to complete task

# Data preprocessing

In [46]:
path = config.processed_data_dir / "final_data_to_train_w_relevant_previous_tasks.json"

data_w_relevance = pd.read_json(path)
data_w_relevance.columns = ["jira_key", "relevant_tasks"]
data_w_relevance.head(1)

Unnamed: 0,jira_key,relevant_tasks
0,PRT-101,{}


In [47]:
def enrich_task(task: dict) -> dict:
    """
    By jira key add info about assignee

    :param task:
    :return:
    """
    try:
        task = task["corpus"]
        assignee_level_order = df[df["jira_key"] == task["jira_key"]]["assignee_level_order"].values[0]
        weeks_since_member_join = df[df["jira_key"] == task["jira_key"]]["weeks_since_member_join"].values[0]

        task["assignee_level_order"] = assignee_level_order
        task["weeks_since_member_join"] = weeks_since_member_join
    except Exception as e:
        logger.error(e)
        task["assignee_level_order"] = np.nan
        task["weeks_since_member_join"] = np.nan

    return task


def enrich_corpus(corpus: list) -> list:
    """
    Enrich each task in corpus

    :param corpus:
    :return:
    """
    return [enrich_task(task) for task in corpus]


data_w_relevance.relevant_tasks = data_w_relevance.relevant_tasks.apply(enrich_corpus)


In [67]:
data = df.merge(data_w_relevance, on="jira_key").copy()
data.sample(0)

Unnamed: 0,assignee_level_order,jira_key,weeks_since_member_join,time_to_complete_hours,task_text,relevant_tasks


In [6]:
data.to_json(config.processed_data_dir / "final_data_to_train_w_relevant_previous_tasks_enriched.json", orient="records")

In [7]:
from src.tools.ask_anthropic import QuerySchema, TaskEstimator

In [23]:
task_estimator = TaskEstimator()

queries = []
results = []

for task in tqdm(data.to_dict(orient="records")):
    q = QuerySchema(
        current_task=task["task_text"],
        related_tasks=task["relevant_tasks"],
        weeks_since_member_join=task["weeks_since_member_join"],
        assignee_level_order=task["assignee_level_order"]
    )
    r = task_estimator.estimate_task_time(q)

    queries.append(q)
    results.append(r)


100%|██████████| 487/487 [31:48<00:00,  3.92s/it]


In [49]:
result = pd.DataFrame({
    "query": queries,
    "result": results
})

result["query"] = result["query"].apply(lambda x: x.model_dump())
result.result = result["result"].apply(lambda x: x.model_dump())

result.to_json(
    config.processed_data_dir / "final_data_to_train_w_relevant_previous_tasks_enriched_estimated.json",
    orient="records",
    indent=4
)

In [68]:
y = data["time_to_complete_hours"]
y_pred = result["result"].apply(lambda x: x["estimated_time"])

weighted_accuracy(y, y_pred)

0.18462757527733753

In [69]:
pd.DataFrame(
    {
        "y": y,
        "y_pred": y_pred
    }
).corr()

Unnamed: 0,y,y_pred
y,1.0,0.315511
y_pred,0.315511,1.0


In [70]:
preds = pd.DataFrame(
    {
        "y_true": y,
        "y_pred": y_pred
    }
)

preds.groupby("y_true")["y_pred"].mean()


y_true
2     5.920000
4     6.679245
6     6.260870
8     7.780488
10    6.666667
12    8.602740
Name: y_pred, dtype: float64

In [74]:
preds["cnt"] = 1
preds.pivot_table(index="y_true", columns="y_pred", values="cnt", aggfunc="count")

y_pred,2,4,8,16
y_true,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,8.0,96.0,94.0,2.0
4,2.0,42.0,57.0,5.0
6,,10.0,13.0,
8,1.0,19.0,54.0,8.0
10,,1.0,2.0,
12,,17.0,42.0,14.0


In [81]:
test = preds.loc[y_test_from_manual.index]

weighted_accuracy(test["y_true"], test["y_pred"])

0.19834024896265556

# Результаты
LLM standalone показала лучшие результаты. 18% Weighted accuracy

Unnamed: 0,query,result
0,{'current_task': '**Summary:** Verify and vali...,{'current_task': 'Verify and validate testpack...
1,{'current_task': '**Summary:** Create a new si...,{'current_task': 'Create a new simulation for ...
2,{'current_task': '**Summary:** Create a compre...,{'current_task': 'Create a comprehensive mind ...
3,{'current_task': '**Summary:** Optimize longli...,{'current_task': 'Optimize longlists to handle...
4,{'current_task': '**Summary:** Analyze the ris...,{'current_task': 'Analyze the risk impact of t...
...,...,...
482,{'current_task': '**Summary:** Lead Monnai int...,{'current_task': 'Lead Monnai integration to p...
483,{'current_task': '**Summary:** Create a simpli...,{'current_task': 'Create a simplified Risk Ove...
484,{'current_task': '**Summary:** Create and vali...,{'current_task': 'Create and validate a Proof ...
485,{'current_task': '**Summary:** Analyze and com...,{'current_task': 'Analyze and compare Accounts...
