In [3]:
import sys, os
project_root = os.path.abspath("..")  # adjust based on your directory structure
if project_root not in sys.path:
    sys.path.append(project_root)

import re
import pandas as pd
from FeatureCleaning.CleanDSDTale import export_clean_df  # Adjust import path as needed
import dtale

# Add to a cell in your notebook
import importlib

# Force reload the module
if 'FeatureCleaning.CleanDSDTale' in sys.modules:
    importlib.reload(sys.modules['FeatureCleaning.CleanDSDTale'])
    from FeatureCleaning.CleanDSDTale import export_clean_df  # Re-import after reload

def extract_sprint_name(sprint_str):
    """
    Extract the sprint name from a sprint string.
    For example, from a value like:
      "com.atlassian.greenhopper.service.sprint.Sprint@16353814[id=5599,...,name=Sprint 9,...]"
    this returns "Sprint 9".
    """
    if not sprint_str or not isinstance(sprint_str, str):
        return None
    match = re.search(r'name=([^,]+)', sprint_str)
    if match:
        return match.group(1).strip()
    return None

def extract_sprint_from_fixversions(fix_versions):
    """
    Given a list of fixVersions dictionaries, return the first version name containing 'Sprint'.
    """
    if isinstance(fix_versions, list):
        for version in fix_versions:
            name = version.get("name", "")
            if "Sprint" in name:
                return name
    return None

def add_sprint_field(df):
    """
    Add a standardized 'sprint' column to the DataFrame.
    First, try using customfield_10557; if not present, fall back to fixVersions.
    """
    if "fields.customfield_10557" in df.columns:
        df["sprint"] = df["fields.customfield_10557"].apply(extract_sprint_name)
    elif "fields.fixVersions" in df.columns:
        df["sprint"] = df["fields.fixVersions"].apply(extract_sprint_from_fixversions)
    else:
        df["sprint"] = None
    return df

def extract_planning_fields(df):
    """
    From the full cleaned DataFrame, extract only the planning-phase fields.
    Enhanced to include all critical planning-phase data for task-level and project-level estimation.
    """
    df = df.copy()
    df = add_sprint_field(df)
    
    planning_cols = [
        # Basic issue identification
        "key",                    # Issue key
        "fields.summary",         # Summary text
        
        # Issue classification
        "fields.issuetype.name",  # Type
        "fields.status.name",     # Status
        "fields.priority.name",   # Priority
        
        # People involved
        "fields.assignee.key",    # Assignee ID
        "fields.creator.key",     # Creator ID
        "fields.reporter.key",    # Reporter ID
        
        # Project context
        "fields.project.id",      # Project ID
        "fields.project.name",    # Project name
        
        # Time information
        "fields.created",         # Creation date
        
        # Relationships
        "sprint",                 # Extracted sprint info
        "issuelinks_total",       # Total link count
        "has_issuelinks_relates", # Has 'relates to' links
        "has_issuelinks_cloners", # Has 'cloners' links
        
        # Components and labels
        "fields.components",      # Components
        "fields.labels",          # Labels
        
        # For training purposes
        "fields.resolutiondate"   # Used to calculate resolution time
    ]
    
    # Get all description embedding columns (they start with "desc_emb_")
    embedding_cols = [col for col in df.columns if col.startswith("desc_emb_")]
    
    # Combine all desired columns
    all_cols = planning_cols + embedding_cols
    
    # Filter to only include columns that exist in the DataFrame
    available_cols = [col for col in all_cols if col in df.columns]
    
    # Create the base planning DataFrame
    planning_df = df[available_cols].copy()
    
    # Add derived features
    
    # Component and label counts
    if "fields.components" in planning_df.columns:
        planning_df["component_count"] = planning_df["fields.components"].apply(
            lambda x: len(x) if isinstance(x, list) else 0
        )
    
    if "fields.labels" in planning_df.columns:
        planning_df["label_count"] = planning_df["fields.labels"].apply(
            lambda x: len(x) if isinstance(x, list) else 0
        )
    
    # Creation date features
    if "fields.created" in planning_df.columns:
        planning_df["created_date"] = pd.to_datetime(planning_df["fields.created"])
        planning_df["created_day_of_week"] = planning_df["created_date"].dt.dayofweek
        planning_df["created_month"] = planning_df["created_date"].dt.month
        planning_df["created_year"] = planning_df["created_date"].dt.year
    
    # Calculate resolution time (if available - for training only)
    if "fields.resolutiondate" in planning_df.columns and "fields.created" in planning_df.columns:
        planning_df["resolution_time"] = (
            pd.to_datetime(planning_df["fields.resolutiondate"]) - 
            pd.to_datetime(planning_df["fields.created"])
        ).dt.total_seconds() / 3600  # Convert to hours
        
        # Drop the raw resolution date as we now have the derived feature
        planning_df = planning_df.drop(columns=["fields.resolutiondate"])
    
    return planning_df

def create_project_features(df):
    """
    Aggregate task-level features to create project-level features.
    Enhanced with project duration metrics.
    """
    # Group by project ID and name
    agg_dict = {
        # Issue counts
        'key': 'count',  # Total number of issues
    }
    
    # Add issue type distribution if available
    if 'fields.issuetype.name' in df.columns:
        agg_dict['fields.issuetype.name'] = lambda x: x.value_counts().to_dict()
    
    # Add priority distribution if available  
    if 'fields.priority.name' in df.columns:
        agg_dict['fields.priority.name'] = lambda x: x.value_counts().to_dict()
    
    # Add status distribution if available
    if 'fields.status.name' in df.columns:
        agg_dict['fields.status.name'] = lambda x: x.value_counts().to_dict()
    
    # Team size metrics if available
    if 'fields.assignee.key' in df.columns:
        agg_dict['fields.assignee.key'] = lambda x: x.nunique()
    
    if 'fields.creator.key' in df.columns:
        agg_dict['fields.creator.key'] = lambda x: x.nunique()
    
    # Connectivity metrics if available
    if 'issuelinks_total' in df.columns:
        agg_dict['issuelinks_total'] = ['sum', 'mean']
    
    if 'has_issuelinks_relates' in df.columns:
        agg_dict['has_issuelinks_relates'] = 'mean'
    
    # Component usage if available
    if 'fields.components' in df.columns:
        agg_dict['fields.components'] = lambda x: set(item for sublist in x if isinstance(sublist, list) for item in sublist)
    
    # Add component and label counts if available
    if 'component_count' in df.columns:
        agg_dict['component_count'] = ['sum', 'mean']
    
    if 'label_count' in df.columns:
        agg_dict['label_count'] = ['sum', 'mean']
    
    # Add sprint information if available
    if 'sprint' in df.columns:
        agg_dict['sprint'] = lambda x: x.value_counts().to_dict()
    
    # Add creation time features if available
    if 'created_date' in df.columns:
        # Min and max dates to calculate project duration
        agg_dict['created_date'] = ['min', 'max']
    
    if 'created_month' in df.columns:
        agg_dict['created_month'] = lambda x: x.value_counts().to_dict()
    
    if 'created_day_of_week' in df.columns:
        agg_dict['created_day_of_week'] = lambda x: x.value_counts().to_dict()
    
    # Add resolution time statistics if available (for model training)
    if 'resolution_time' in df.columns:
        agg_dict['resolution_time'] = ['mean', 'median', 'min', 'max', 'std', 'sum']
    
    # Add description embedding averages if available
    embedding_cols = [col for col in df.columns if col.startswith('desc_emb_')]
    for col in embedding_cols:
        agg_dict[col] = 'mean'
    
    # Perform the groupby aggregation
    project_features = df.groupby(['fields.project.id', 'fields.project.name']).agg(agg_dict)
    
    # Flatten the multi-level columns
    project_features.columns = ['_'.join(col) if isinstance(col, tuple) else col 
                               for col in project_features.columns]
    
    # Reset index to convert groupby result to regular DataFrame
    project_features = project_features.reset_index()
    
    # Expand the categorical distributions
    project_features = expand_categorical_features(project_features)
    
    # Add derived metrics
    
    # Compute team-related metrics if available
    if 'fields.assignee.key' in project_features.columns and 'key_count' in project_features.columns:
        project_features['issue_to_assignee_ratio'] = (
            project_features['key_count'] / project_features['fields.assignee.key'].apply(lambda x: max(1, x))
        )
    
    # Calculate link density if available
    if 'issuelinks_total_sum' in project_features.columns and 'key_count' in project_features.columns:
        project_features['link_density'] = (
            project_features['issuelinks_total_sum'] / project_features['key_count']
        )
    
    # Calculate temporal metrics (if date information is available)
    if 'created_date_min' in project_features.columns and 'created_date_max' in project_features.columns:
        # Project duration in days
        project_features['project_duration_days'] = (
            (pd.to_datetime(project_features['created_date_max']) - 
             pd.to_datetime(project_features['created_date_min'])).dt.total_seconds() / (24 * 3600)
        )
        
        # Issue creation rate (issues per day) - a proxy for velocity
        project_features['issue_creation_rate'] = (
            project_features['key_count'] / 
            project_features['project_duration_days'].replace(0, 1)  # Avoid division by zero
        )
    
    return project_features

def export_clean_planningphase_df(open_dtale=True):
    """
    Run the full cleaning pipeline to obtain a cleaned task-level DataFrame,
    then extract only the planning-phase fields to create a planning-phase dataset.
    If open_dtale is True, launch a D-Tale session for interactive exploration.
    
    Returns:
      planning_df (pd.DataFrame): The planning-phase DataFrame with the selected fields.
    """
    full_df = export_clean_df()  # This returns your fully cleaned task-level DataFrame.
    planning_df = extract_planning_fields(full_df)
    project_features_df = create_project_features(planning_df)

    
    if open_dtale:
        print("Launching D-Tale session for planning-phase DataFrame...")
        d_pf = dtale.show(project_features_df, ignore_duplicate=True, allow_cell_edits=False)

        d_pf.open_browser()
        
    return project_features_df

def expand_categorical_features(df):
    """
    Expand categorical distributions stored as dictionaries.
    """
    # Identify columns that contain dictionaries
    dict_cols = [col for col in df.columns 
                if isinstance(df[col].iloc[0], dict) if len(df) > 0]
    
    result_df = df.copy()
    
    for col in dict_cols:
        # Find all unique categories across all projects
        all_categories = set()
        for d in df[col]:
            if isinstance(d, dict):
                all_categories.update(d.keys())
        
        # Create a column for each category
        for category in all_categories:
            new_col_name = f"{col}_{category}"
            result_df[new_col_name] = df[col].apply(
                lambda x: x.get(category, 0) if isinstance(x, dict) else 0
            )
        
        # Drop the original dictionary column
        result_df = result_df.drop(columns=[col])
    
    return result_df

# def create_project_features(df):
#     """
#     Aggregate task-level features to create project-level features.
#     Enhanced with project duration metrics and fixed unhashable dict issue.
#     """
#     # Group by project ID and name
#     agg_dict = {
#         # Issue counts
#         'key': 'count',  # Total number of issues
#     }
    
#     # Add issue type distribution if available
#     if 'fields.issuetype.name' in df.columns:
#         agg_dict['fields.issuetype.name'] = lambda x: x.value_counts().to_dict()
    
#     # Add priority distribution if available  
#     if 'fields.priority.name' in df.columns:
#         agg_dict['fields.priority.name'] = lambda x: x.value_counts().to_dict()
    
#     # Add status distribution if available
#     if 'fields.status.name' in df.columns:
#         agg_dict['fields.status.name'] = lambda x: x.value_counts().to_dict()
    
#     # Team size metrics if available
#     if 'fields.assignee.key' in df.columns:
#         agg_dict['fields.assignee.key'] = lambda x: x.nunique()
    
#     if 'fields.creator.key' in df.columns:
#         agg_dict['fields.creator.key'] = lambda x: x.nunique()
    
#     # Connectivity metrics if available
#     if 'issuelinks_total' in df.columns:
#         agg_dict['issuelinks_total'] = ['sum', 'mean']
    
#     if 'has_issuelinks_relates' in df.columns:
#         agg_dict['has_issuelinks_relates'] = 'mean'
    
#     # Component usage if available - FIX for unhashable dict error
#     if 'fields.components' in df.columns:
#         # Extract component names safely, handling different data structures
#         agg_dict['fields.components'] = lambda x: set([
#             str(comp_name) for row in x 
#             for comp_name in (row if isinstance(row, list) else []) 
#         ])
    
#     # Add component and label counts if available
#     if 'component_count' in df.columns:
#         agg_dict['component_count'] = ['sum', 'mean']
    
#     if 'label_count' in df.columns:
#         agg_dict['label_count'] = ['sum', 'mean']
    
#     # Add sprint information if available
#     if 'sprint' in df.columns:
#         agg_dict['sprint'] = lambda x: x.value_counts().to_dict()
    
#     # Add creation time features if available
#     if 'created_date' in df.columns:
#         # Min and max dates to calculate project duration
#         agg_dict['created_date'] = ['min', 'max']
    
#     if 'created_month' in df.columns:
#         agg_dict['created_month'] = lambda x: x.value_counts().to_dict()
    
#     if 'created_day_of_week' in df.columns:
#         agg_dict['created_day_of_week'] = lambda x: x.value_counts().to_dict()
    
#     # Add resolution time statistics if available (for model training)
#     if 'resolution_time' in df.columns:
#         agg_dict['resolution_time'] = ['mean', 'median', 'min', 'max', 'std', 'sum']
    
#     # Add description embedding averages if available
#     embedding_cols = [col for col in df.columns if col.startswith('desc_emb_')]
#     for col in embedding_cols:
#         agg_dict[col] = 'mean'
    
#     # Perform the groupby aggregation
#     project_features = df.groupby(['fields.project.id', 'fields.project.name']).agg(agg_dict)
    
#     # Flatten the multi-level columns
#     project_features.columns = ['_'.join(col) if isinstance(col, tuple) else col 
#                                for col in project_features.columns]
    
#     # Reset index to convert groupby result to regular DataFrame
#     project_features = project_features.reset_index()
    
#     # Expand the categorical distributions
#     project_features = expand_categorical_features(project_features)
    
#     # Add derived metrics
    
#     # Compute team-related metrics if available
#     if 'fields.assignee.key' in project_features.columns and 'key_count' in project_features.columns:
#         project_features['issue_to_assignee_ratio'] = (
#             project_features['key_count'] / project_features['fields.assignee.key'].apply(lambda x: max(1, x))
#         )
    
#     # Calculate link density if available
#     if 'issuelinks_total_sum' in project_features.columns and 'key_count' in project_features.columns:
#         project_features['link_density'] = (
#             project_features['issuelinks_total_sum'] / project_features['key_count']
#         )
    
#     # Calculate temporal metrics (if date information is available)
#     if 'created_date_min' in project_features.columns and 'created_date_max' in project_features.columns:
#         # Project duration in days
#         project_features['project_duration_days'] = (
#             (pd.to_datetime(project_features['created_date_max']) - 
#              pd.to_datetime(project_features['created_date_min'])).dt.total_seconds() / (24 * 3600)
#         )
        
#         # Issue creation rate (issues per day) - a proxy for velocity
#         project_features['issue_creation_rate'] = (
#             project_features['key_count'] / 
#             project_features['project_duration_days'].replace(0, 1)  # Avoid division by zero
#         )
    
#     return project_features
def create_project_features(df):
    """
    Aggregate task-level features to create project-level features.
    Returns a DataFrame with clean, readable column names.
    
    Parameters:
        df (pd.DataFrame): DataFrame with task-level features
        
    Returns:
        pd.DataFrame: DataFrame with project-level features and clean field names
    """
    # Define the aggregation dictionary
    agg_dict = {}
    
    # Basic counts
    agg_dict['key'] = 'count'  # This will create 'key_count' in the result
    
    # Add issue type distribution if available
    if 'fields.issuetype.name' in df.columns:
        agg_dict['fields.issuetype.name'] = lambda x: x.value_counts().to_dict()
    
    # Add priority distribution if available  
    if 'fields.priority.name' in df.columns:
        agg_dict['fields.priority.name'] = lambda x: x.value_counts().to_dict()
    
    # Add status distribution if available
    if 'fields.status.name' in df.columns:
        agg_dict['fields.status.name'] = lambda x: x.value_counts().to_dict()
    
    # Team size metrics if available
    if 'fields.assignee.key' in df.columns:
        agg_dict['fields.assignee.key'] = lambda x: x.nunique()
    
    if 'fields.creator.key' in df.columns:
        agg_dict['fields.creator.key'] = lambda x: x.nunique()
        
    if 'fields.reporter.key' in df.columns:
        agg_dict['fields.reporter.key'] = lambda x: x.nunique()
    
    # Connectivity metrics if available
    if 'issuelinks_total' in df.columns:
        agg_dict['issuelinks_total'] = ['sum', 'mean']
    
    if 'has_issuelinks_relates' in df.columns:
        agg_dict['has_issuelinks_relates'] = 'mean'
        
    if 'has_issuelinks_cloners' in df.columns:
        agg_dict['has_issuelinks_cloners'] = 'mean'
    
    # Component usage if available - Fix for unhashable dict error
    if 'fields.components' in df.columns:
        # Extract component names safely, handling different data structures
        agg_dict['fields.components'] = lambda x: set([
            str(comp_name) for row in x 
            for comp_name in (row if isinstance(row, list) else []) 
        ])
    
    # Add component and label counts
    if 'component_count' in df.columns:
        agg_dict['component_count'] = ['sum', 'mean']
    
    if 'label_count' in df.columns:
        agg_dict['label_count'] = ['sum', 'mean']
    
    # Add sprint information if available
    if 'sprint' in df.columns:
        agg_dict['sprint'] = lambda x: x.value_counts().to_dict()
    
    # Add creation time features 
    if 'created_date' in df.columns:
        agg_dict['created_date'] = ['min', 'max']
    
    if 'created_month' in df.columns:
        agg_dict['created_month'] = lambda x: x.value_counts().to_dict()
    
    if 'created_day_of_week' in df.columns:
        agg_dict['created_day_of_week'] = lambda x: x.value_counts().to_dict()
    
    # Add resolution time statistics
    if 'resolution_time' in df.columns:
        agg_dict['resolution_time'] = ['mean', 'median', 'min', 'max', 'std', 'sum']
    
    # Add description embedding averages
    embedding_cols = [col for col in df.columns if col.startswith('desc_emb_')]
    for col in embedding_cols:
        agg_dict[col] = 'mean'
    
    # Perform the groupby aggregation
    project_features = df.groupby(['fields.project.id', 'fields.project.name']).agg(agg_dict)
    
    # Flatten the multi-level columns
    project_features.columns = ['_'.join(col) if isinstance(col, tuple) else col 
                               for col in project_features.columns]
    
    # Reset index to convert groupby result to regular DataFrame
    project_features = project_features.reset_index()
    
    # Expand the categorical distributions
    project_features = expand_categorical_features(project_features)
    
    # Create a dictionary of old to new column names
    rename_mapping = {
        'fields.project.id': 'project_id',
        'fields.project.name': 'project_name',
        'key_count': 'issue_count',
        'fields.assignee.key': 'assignee_count',
        'fields.creator.key': 'creator_count',
        'fields.reporter.key': 'reporter_count',
        'fields.issuetype.name': 'issue_types',
        'fields.priority.name': 'priorities',
        'fields.status.name': 'statuses',
        'issuelinks_total_sum': 'total_links',
        'issuelinks_total_mean': 'avg_links_per_issue',
        'has_issuelinks_relates_mean': 'pct_related_issues',
        'has_issuelinks_cloners_mean': 'pct_cloned_issues',
        'fields.components': 'component_names',
        'component_count_sum': 'total_components',
        'component_count_mean': 'avg_components_per_issue',
        'label_count_sum': 'total_labels',
        'label_count_mean': 'avg_labels_per_issue',
        'sprint': 'sprints',
        'created_date_min': 'project_start_date',
        'created_date_max': 'project_latest_date',
        'created_month': 'creation_months',
        'created_day_of_week': 'creation_weekdays',
        'resolution_time_mean': 'avg_resolution_hours',
        'resolution_time_median': 'median_resolution_hours',
        'resolution_time_min': 'min_resolution_hours',
        'resolution_time_max': 'max_resolution_hours',
        'resolution_time_std': 'resolution_hours_std',
        'resolution_time_sum': 'total_resolution_hours'
    }
    
    # Only rename columns that exist in the DataFrame
    columns_to_rename = {k: v for k, v in rename_mapping.items() if k in project_features.columns}
    project_features = project_features.rename(columns=columns_to_rename)
    
    # Add derived metrics with clean names
    
    # Project duration in days - using the original column names first, then renaming
    if 'created_date_min' in project_features.columns and 'created_date_max' in project_features.columns:
        project_features['project_duration_days'] = (
            (pd.to_datetime(project_features['created_date_max']) - 
             pd.to_datetime(project_features['created_date_min'])).dt.total_seconds() / (24 * 3600)
        )
        
        # Issue creation rate (issues per day) - a proxy for velocity
        if 'key_count' in project_features.columns:  # Use original column name
            project_features['issues_per_day'] = (
                project_features['key_count'] / 
                project_features['project_duration_days'].replace(0, 1)  # Avoid division by zero
            )
    elif 'project_start_date' in project_features.columns and 'project_latest_date' in project_features.columns:
        project_features['project_duration_days'] = (
            (pd.to_datetime(project_features['project_latest_date']) - 
             pd.to_datetime(project_features['project_start_date'])).dt.total_seconds() / (24 * 3600)
        )
        
        # Issue creation rate (issues per day) - a proxy for velocity
        if 'issue_count' in project_features.columns:  # Use renamed column
            project_features['issues_per_day'] = (
                project_features['issue_count'] / 
                project_features['project_duration_days'].replace(0, 1)  # Avoid division by zero
            )
    
    # Compute team-related metrics - check both original and renamed columns
    if 'fields.assignee.key' in project_features.columns and 'key_count' in project_features.columns:
        project_features['issues_per_assignee'] = (
            project_features['key_count'] / 
            project_features['fields.assignee.key'].apply(lambda x: max(1, x))
        )
    elif 'assignee_count' in project_features.columns and 'issue_count' in project_features.columns:
        project_features['issues_per_assignee'] = (
            project_features['issue_count'] / 
            project_features['assignee_count'].apply(lambda x: max(1, x))
        )
    
    # Calculate network metrics - check both original and renamed columns
    if 'issuelinks_total_sum' in project_features.columns and 'key_count' in project_features.columns:
        project_features['link_density'] = (
            project_features['issuelinks_total_sum'] / project_features['key_count']
        )
    elif 'total_links' in project_features.columns and 'issue_count' in project_features.columns:
        project_features['link_density'] = (
            project_features['total_links'] / project_features['issue_count']
        )
    
    return project_features

# Example usage:
if __name__ == "__main__":
    df_planning = export_clean_planningphase_df(open_dtale=True)
    print("Planning-phase DataFrame columns:")
    print(df_planning.columns.tolist())
    print("Sample rows:")
    print(df_planning.head(5))


Processing repository: MongoDB ...
Found 137172 total issues in 'MongoDB'. Processing in batches of 500...
Using fixed maximum of 100 records. Will retrieve 100 issues.
Final sample for 'MongoDB': 100 issues (out of 137172 total).
  - Processing changelog history batch 1/1
Data processed. Launching D-Tale session...
✅ D-Tale session launched successfully.
Launching D-Tale session for planning-phase DataFrame...



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2016-05-31 03:37:02', '2015-04-09 01:57:36', '2016-06-02 19:34:23',
 '2016-08-30 18:49:27', '2021-11-11 18:46:00', '2021-10-14 01:55:17',
 '2016-08-26 11:59:57', '2021-11-17 18:36:08', '2021-12-09 17:18:04',
 '2021-06-14 08:18:38', '2021-05-11 17:40:37', '2020-12-10 15:57:59',
 '2020-04-27 22:15:42', '2020-04-07 18:45:20', '2020-01-15 16:48:20',
 '2021-12-22 20:20:31', '2020-06-08 18:34:38', '2019-04-30 15:29:29',
 '2018-12-14 02:39:58', '2018-08-16 15:19:04', '2018-06-11 19:27:35',
 '2018-04-23 17:53:21', '2018-03-06 23:25:03', '2018-02-05 21:41:41',
 '2017-04-18 17:41:48', '2017-10-18 16:13:51', '2016-12-05 16:06:58',
 '2016-10-04 15:48:29', '2016-05-23 17:45:00', '2015-12-23 15:41:37',
 '2015-09-24 22:02:51', '2015-09-15 18:16:27', '2015-08-14 21:23:05',
 '2015-08-06 17:29:43', '2015-01-22 17:57:26', '2014-12-12 17:25:31',
 '2019-06-11 18:56:00', '2014-05-21 17:4

Planning-phase DataFrame columns:
['project_id', 'project_name', 'issue_count', 'fields.assignee.key_<lambda>', 'fields.creator.key_<lambda>', 'fields.reporter.key_<lambda>', 'total_links', 'avg_links_per_issue', 'pct_cloned_issues', 'fields.components_<lambda>', 'total_components', 'avg_components_per_issue', 'total_labels', 'avg_labels_per_issue', 'project_start_date', 'project_latest_date', 'avg_resolution_hours', 'median_resolution_hours', 'min_resolution_hours', 'max_resolution_hours', 'resolution_hours_std', 'total_resolution_hours', 'fields.issuetype.name_<lambda>_Task', 'fields.issuetype.name_<lambda>_Bug', 'fields.issuetype.name_<lambda>_Problem Ticket', 'fields.issuetype.name_<lambda>_New Feature', 'fields.issuetype.name_<lambda>_Improvement', 'fields.priority.name_<lambda>_Major - P3', 'fields.priority.name_<lambda>_Missing', 'fields.priority.name_<lambda>_Critical - P2', 'fields.priority.name_<lambda>_Unknown', 'fields.priority.name_<lambda>_Trivial - P5', 'fields.priority.

Address already in use


Port 40000 is in use by another program. Either identify and stop that program, or start the server with a different port.
2025-03-03 09:22:02,433 - ERROR    - 1
Traceback (most recent call last):
  File "/opt/anaconda3/envs/my_mongo_env/lib/python3.9/site-packages/werkzeug/serving.py", line 759, in __init__
    self.server_bind()
  File "/opt/anaconda3/envs/my_mongo_env/lib/python3.9/http/server.py", line 137, in server_bind
    socketserver.TCPServer.server_bind(self)
  File "/opt/anaconda3/envs/my_mongo_env/lib/python3.9/socketserver.py", line 466, in server_bind
    self.socket.bind(self.server_address)
OSError: [Errno 48] Address already in use

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/anaconda3/envs/my_mongo_env/lib/python3.9/site-packages/dtale/app.py", line 884, in _start
    app.run(
  File "/opt/anaconda3/envs/my_mongo_env/lib/python3.9/site-packages/dtale/app.py", line 225, in run
    super(DtaleFla