In [3]:
import sys, os
project_root = os.path.abspath("..")  # adjust based on your directory structure
if project_root not in sys.path:
    sys.path.append(project_root)

import re
import pandas as pd
from FeatureCleaning.CleanDSDTale import export_clean_df  # Adjust import path as needed
import dtale
import tempfile
import gc

# Add to a cell in your notebook
import importlib


# Force reload the module
if 'FeatureCleaning.CleanDSDTale' in sys.modules:
    importlib.reload(sys.modules['FeatureCleaning.CleanDSDTale'])
    from FeatureCleaning.CleanDSDTale import export_clean_df  # Re-import after reload

def extract_sprint_name(sprint_str):
    """
    Extract the sprint name from a sprint string.
    For example, from a value like:
      "com.atlassian.greenhopper.service.sprint.Sprint@16353814[id=5599,...,name=Sprint 9,...]"
    this returns "Sprint 9".
    """
    if not sprint_str or not isinstance(sprint_str, str):
        return None
    match = re.search(r'name=([^,]+)', sprint_str)
    if match:
        return match.group(1).strip()
    return None

def extract_sprint_from_fixversions(fix_versions):
    """
    Given a list of fixVersions dictionaries, return the first version name containing 'Sprint'.
    """
    if isinstance(fix_versions, list):
        for version in fix_versions:
            name = version.get("name", "")
            if "Sprint" in name:
                return name
    return None

def add_sprint_field(df):
    """
    Add a standardized 'sprint' column to the DataFrame.
    First, try using customfield_10557; if not present, fall back to fixVersions.
    """
    if "fields.customfield_10557" in df.columns:
        df["sprint"] = df["fields.customfield_10557"].apply(extract_sprint_name)
    elif "fields.fixVersions" in df.columns:
        df["sprint"] = df["fields.fixVersions"].apply(extract_sprint_from_fixversions)
    else:
        df["sprint"] = None
    return df

def extract_planning_fields(df):
    """
    From the full cleaned DataFrame, extract only the planning-phase fields.
    Enhanced to include all critical planning-phase data for task-level and project-level estimation.
    """
    df = df.copy()
    df = add_sprint_field(df)
    
    planning_cols = [
        # Basic issue identification
        "key",                    # Issue key
        "fields.summary",         # Summary text
        
        # Issue classification
        "fields.issuetype.name",  # Type
        "fields.status.name",     # Status
        "fields.priority.name",   # Priority
        
        # People involved
        "fields.assignee.key",    # Assignee ID
        "fields.creator.key",     # Creator ID
        "fields.reporter.key",    # Reporter ID
        
        # Project context
        "fields.project.id",      # Project ID
        "fields.project.name",    # Project name
        
        # Time information
        "fields.created",         # Creation date
        
        # Relationships
        "sprint",                 # Extracted sprint info
        "issuelinks_total",       # Total link count
        "has_issuelinks_relates", # Has 'relates to' links
        "has_issuelinks_cloners", # Has 'cloners' links
        
        # Components and labels
        "fields.components",      # Components
        "fields.labels",          # Labels
        
        # For training purposes
        "fields.resolutiondate"   # Used to calculate resolution time
    ]
    
    # Get all description embedding columns (they start with "desc_emb_")
    embedding_cols = [col for col in df.columns if col.startswith("desc_emb_")]
    
    # Combine all desired columns
    all_cols = planning_cols + embedding_cols
    
    # Filter to only include columns that exist in the DataFrame
    available_cols = [col for col in all_cols if col in df.columns]
    
    # Create the base planning DataFrame
    planning_df = df[available_cols].copy()
    
    # Add derived features
    
    # Component and label counts
    if "fields.components" in planning_df.columns:
        planning_df["component_count"] = planning_df["fields.components"].apply(
            lambda x: len(x) if isinstance(x, list) else 0
        )
    
    if "fields.labels" in planning_df.columns:
        planning_df["label_count"] = planning_df["fields.labels"].apply(
            lambda x: len(x) if isinstance(x, list) else 0
        )
    
    # Creation date features
    if "fields.created" in planning_df.columns:
        planning_df["created_date"] = pd.to_datetime(planning_df["fields.created"])
        planning_df["created_day_of_week"] = planning_df["created_date"].dt.dayofweek
        planning_df["created_month"] = planning_df["created_date"].dt.month
        planning_df["created_year"] = planning_df["created_date"].dt.year
    
    # Calculate resolution time (if available - for training only)
    if "fields.resolutiondate" in planning_df.columns and "fields.created" in planning_df.columns:
        planning_df["resolution_time"] = (
            pd.to_datetime(planning_df["fields.resolutiondate"]) - 
            pd.to_datetime(planning_df["fields.created"])
        ).dt.total_seconds() / 3600  # Convert to hours
        
        # Drop the raw resolution date as we now have the derived feature
        planning_df = planning_df.drop(columns=["fields.resolutiondate"])
    
    return planning_df

def create_project_features(df):
    """
    Aggregate task-level features to create project-level features.
    Enhanced with project duration metrics.
    """
    # Group by project ID and name
    agg_dict = {
        # Issue counts
        'key': 'count',  # Total number of issues
    }
    
    # Add issue type distribution if available
    if 'fields.issuetype.name' in df.columns:
        agg_dict['fields.issuetype.name'] = lambda x: x.value_counts().to_dict()
    
    # Add priority distribution if available  
    if 'fields.priority.name' in df.columns:
        agg_dict['fields.priority.name'] = lambda x: x.value_counts().to_dict()
    
    # Add status distribution if available
    if 'fields.status.name' in df.columns:
        agg_dict['fields.status.name'] = lambda x: x.value_counts().to_dict()
    
    # Team size metrics if available
    if 'fields.assignee.key' in df.columns:
        agg_dict['fields.assignee.key'] = lambda x: x.nunique()
    
    if 'fields.creator.key' in df.columns:
        agg_dict['fields.creator.key'] = lambda x: x.nunique()
    
    # Connectivity metrics if available
    if 'issuelinks_total' in df.columns:
        agg_dict['issuelinks_total'] = ['sum', 'mean']
    
    if 'has_issuelinks_relates' in df.columns:
        agg_dict['has_issuelinks_relates'] = 'mean'
    
    # Component usage if available
    if 'fields.components' in df.columns:
        agg_dict['fields.components'] = lambda x: set(item for sublist in x if isinstance(sublist, list) for item in sublist)
    
    # Add component and label counts if available
    if 'component_count' in df.columns:
        agg_dict['component_count'] = ['sum', 'mean']
    
    if 'label_count' in df.columns:
        agg_dict['label_count'] = ['sum', 'mean']
    
    # Add sprint information if available
    if 'sprint' in df.columns:
        agg_dict['sprint'] = lambda x: x.value_counts().to_dict()
    
    # Add creation time features if available
    if 'created_date' in df.columns:
        # Min and max dates to calculate project duration
        agg_dict['created_date'] = ['min', 'max']
    
    if 'created_month' in df.columns:
        agg_dict['created_month'] = lambda x: x.value_counts().to_dict()
    
    if 'created_day_of_week' in df.columns:
        agg_dict['created_day_of_week'] = lambda x: x.value_counts().to_dict()
    
    # Add resolution time statistics if available (for model training)
    if 'resolution_time' in df.columns:
        agg_dict['resolution_time'] = ['mean', 'median', 'min', 'max', 'std', 'sum']
    
    # Add description embedding averages if available
    embedding_cols = [col for col in df.columns if col.startswith('desc_emb_')]
    for col in embedding_cols:
        agg_dict[col] = 'mean'
    
    # Perform the groupby aggregation
    project_features = df.groupby(['fields.project.id', 'fields.project.name']).agg(agg_dict)
    
    # Flatten the multi-level columns
    project_features.columns = ['_'.join(col) if isinstance(col, tuple) else col 
                               for col in project_features.columns]
    
    # Reset index to convert groupby result to regular DataFrame
    project_features = project_features.reset_index()
    
    # Expand the categorical distributions
    project_features = expand_categorical_features(project_features)
    
    # Add derived metrics
    
    # Compute team-related metrics if available
    if 'fields.assignee.key' in project_features.columns and 'key_count' in project_features.columns:
        project_features['issue_to_assignee_ratio'] = (
            project_features['key_count'] / project_features['fields.assignee.key'].apply(lambda x: max(1, x))
        )
    
    # Calculate link density if available
    if 'issuelinks_total_sum' in project_features.columns and 'key_count' in project_features.columns:
        project_features['link_density'] = (
            project_features['issuelinks_total_sum'] / project_features['key_count']
        )
    
    # Calculate temporal metrics (if date information is available)
    if 'created_date_min' in project_features.columns and 'created_date_max' in project_features.columns:
        # Project duration in days
        project_features['project_duration_days'] = (
            (pd.to_datetime(project_features['created_date_max']) - 
             pd.to_datetime(project_features['created_date_min'])).dt.total_seconds() / (24 * 3600)
        )
        
        # Issue creation rate (issues per day) - a proxy for velocity
        project_features['issue_creation_rate'] = (
            project_features['key_count'] / 
            project_features['project_duration_days'].replace(0, 1)  # Avoid division by zero
        )
    
    return project_features

def export_clean_planningphase_df(open_dtale=True):
    """
    Run the full cleaning pipeline to obtain a cleaned task-level DataFrame,
    then extract only the planning-phase fields to create a planning-phase dataset.
    If open_dtale is True, launch a D-Tale session for interactive exploration.
    
    Returns:
      planning_df (pd.DataFrame): The planning-phase DataFrame with the selected fields.
    """
    full_df = export_clean_df()  # This returns your fully cleaned task-level DataFrame.
    planning_df = extract_planning_fields(full_df)
    project_features_df = create_project_features(planning_df)

    
    if open_dtale:
        print("Launching D-Tale session for planning-phase DataFrame...")
        d_pf = dtale.show(project_features_df, ignore_duplicate=True, allow_cell_edits=False)

        d_pf.open_browser()
        
    return project_features_df

def expand_categorical_features(df):
    """
    Expand categorical distributions stored as dictionaries.
    """
    # Identify columns that contain dictionaries
    dict_cols = [col for col in df.columns 
                if isinstance(df[col].iloc[0], dict) if len(df) > 0]
    
    result_df = df.copy()
    
    for col in dict_cols:
        # Find all unique categories across all projects
        all_categories = set()
        for d in df[col]:
            if isinstance(d, dict):
                all_categories.update(d.keys())
        
        # Create a column for each category
        for category in all_categories:
            new_col_name = f"{col}_{category}"
            result_df[new_col_name] = df[col].apply(
                lambda x: x.get(category, 0) if isinstance(x, dict) else 0
            )
        
        # Drop the original dictionary column
        result_df = result_df.drop(columns=[col])
    
    return result_df

def create_project_features(df):
    """
    Aggregate task-level features to create project-level features.
    Returns a DataFrame with clean, readable column names.
    
    Parameters:
        df (pd.DataFrame): DataFrame with task-level features
        
    Returns:
        pd.DataFrame: DataFrame with project-level features and clean field names
    """
    # Define the aggregation dictionary
    agg_dict = {}
    
    # Basic counts
    agg_dict['key'] = 'count'  # This will create 'key_count' in the result
    
    # Add issue type distribution if available
    if 'fields.issuetype.name' in df.columns:
        agg_dict['fields.issuetype.name'] = lambda x: x.value_counts().to_dict()
    
    # Add priority distribution if available  
    if 'fields.priority.name' in df.columns:
        agg_dict['fields.priority.name'] = lambda x: x.value_counts().to_dict()
    
    # Add status distribution if available
    if 'fields.status.name' in df.columns:
        agg_dict['fields.status.name'] = lambda x: x.value_counts().to_dict()
    
    # Team size metrics if available
    if 'fields.assignee.key' in df.columns:
        agg_dict['fields.assignee.key'] = lambda x: x.nunique()
    
    if 'fields.creator.key' in df.columns:
        agg_dict['fields.creator.key'] = lambda x: x.nunique()
        
    if 'fields.reporter.key' in df.columns:
        agg_dict['fields.reporter.key'] = lambda x: x.nunique()
    
    # Connectivity metrics if available
    if 'issuelinks_total' in df.columns:
        agg_dict['issuelinks_total'] = ['sum', 'mean']
    
    if 'has_issuelinks_relates' in df.columns:
        agg_dict['has_issuelinks_relates'] = 'mean'
        
    if 'has_issuelinks_cloners' in df.columns:
        agg_dict['has_issuelinks_cloners'] = 'mean'
    
    # Component usage if available - Fix for unhashable dict error
    if 'fields.components' in df.columns:
        # Extract component names safely, handling different data structures
        agg_dict['fields.components'] = lambda x: set([
            str(comp_name) for row in x 
            for comp_name in (row if isinstance(row, list) else []) 
        ])
    
    # Add component and label counts
    if 'component_count' in df.columns:
        agg_dict['component_count'] = ['sum', 'mean']
    
    if 'label_count' in df.columns:
        agg_dict['label_count'] = ['sum', 'mean']
    
    # Add sprint information if available
    if 'sprint' in df.columns:
        agg_dict['sprint'] = lambda x: x.value_counts().to_dict()
    
    # Add creation time features 
    if 'created_date' in df.columns:
        agg_dict['created_date'] = ['min', 'max']
    
    if 'created_month' in df.columns:
        agg_dict['created_month'] = lambda x: x.value_counts().to_dict()
    
    if 'created_day_of_week' in df.columns:
        agg_dict['created_day_of_week'] = lambda x: x.value_counts().to_dict()
    
    # Add resolution time statistics
    if 'resolution_time' in df.columns:
        agg_dict['resolution_time'] = ['mean', 'median', 'min', 'max', 'std', 'sum']
    
    # Add description embedding averages
    embedding_cols = [col for col in df.columns if col.startswith('desc_emb_')]
    for col in embedding_cols:
        agg_dict[col] = 'mean'
    
    # Perform the groupby aggregation
    project_features = df.groupby(['fields.project.id', 'fields.project.name']).agg(agg_dict)
    
    # Flatten the multi-level columns
    project_features.columns = ['_'.join(col) if isinstance(col, tuple) else col 
                               for col in project_features.columns]
    
    # Reset index to convert groupby result to regular DataFrame
    project_features = project_features.reset_index()
    
    # Expand the categorical distributions
    project_features = expand_categorical_features(project_features)
    
    # Create a dictionary of old to new column names
    rename_mapping = {
        'fields.project.id': 'project_id',
        'fields.project.name': 'project_name',
        'key_count': 'issue_count',
        'fields.assignee.key': 'assignee_count',
        'fields.creator.key': 'creator_count',
        'fields.reporter.key': 'reporter_count',
        'fields.issuetype.name': 'issue_types',
        'fields.priority.name': 'priorities',
        'fields.status.name': 'statuses',
        'issuelinks_total_sum': 'total_links',
        'issuelinks_total_mean': 'avg_links_per_issue',
        'has_issuelinks_relates_mean': 'pct_related_issues',
        'has_issuelinks_cloners_mean': 'pct_cloned_issues',
        'fields.components': 'component_names',
        'component_count_sum': 'total_components',
        'component_count_mean': 'avg_components_per_issue',
        'label_count_sum': 'total_labels',
        'label_count_mean': 'avg_labels_per_issue',
        'sprint': 'sprints',
        'created_date_min': 'project_start_date',
        'created_date_max': 'project_latest_date',
        'created_month': 'creation_months',
        'created_day_of_week': 'creation_weekdays',
        'resolution_time_mean': 'avg_resolution_hours',
        'resolution_time_median': 'median_resolution_hours',
        'resolution_time_min': 'min_resolution_hours',
        'resolution_time_max': 'max_resolution_hours',
        'resolution_time_std': 'resolution_hours_std',
        'resolution_time_sum': 'total_resolution_hours'
    }
    
    # Only rename columns that exist in the DataFrame
    columns_to_rename = {k: v for k, v in rename_mapping.items() if k in project_features.columns}
    project_features = project_features.rename(columns=columns_to_rename)
    
    # Add derived metrics with clean names
    
    # Project duration in days - using the original column names first, then renaming
    if 'created_date_min' in project_features.columns and 'created_date_max' in project_features.columns:
        project_features['project_duration_days'] = (
            (pd.to_datetime(project_features['created_date_max']) - 
             pd.to_datetime(project_features['created_date_min'])).dt.total_seconds() / (24 * 3600)
        )
        
        # Issue creation rate (issues per day) - a proxy for velocity
        if 'key_count' in project_features.columns:  # Use original column name
            project_features['issues_per_day'] = (
                project_features['key_count'] / 
                project_features['project_duration_days'].replace(0, 1)  # Avoid division by zero
            )
    elif 'project_start_date' in project_features.columns and 'project_latest_date' in project_features.columns:
        project_features['project_duration_days'] = (
            (pd.to_datetime(project_features['project_latest_date']) - 
             pd.to_datetime(project_features['project_start_date'])).dt.total_seconds() / (24 * 3600)
        )
        
        # Issue creation rate (issues per day) - a proxy for velocity
        if 'issue_count' in project_features.columns:  # Use renamed column
            project_features['issues_per_day'] = (
                project_features['issue_count'] / 
                project_features['project_duration_days'].replace(0, 1)  # Avoid division by zero
            )
    
    # Compute team-related metrics - check both original and renamed columns
    if 'fields.assignee.key' in project_features.columns and 'key_count' in project_features.columns:
        project_features['issues_per_assignee'] = (
            project_features['key_count'] / 
            project_features['fields.assignee.key'].apply(lambda x: max(1, x))
        )
    elif 'assignee_count' in project_features.columns and 'issue_count' in project_features.columns:
        project_features['issues_per_assignee'] = (
            project_features['issue_count'] / 
            project_features['assignee_count'].apply(lambda x: max(1, x))
        )
    
    # Calculate network metrics - check both original and renamed columns
    if 'issuelinks_total_sum' in project_features.columns and 'key_count' in project_features.columns:
        project_features['link_density'] = (
            project_features['issuelinks_total_sum'] / project_features['key_count']
        )
    elif 'total_links' in project_features.columns and 'issue_count' in project_features.columns:
        project_features['link_density'] = (
            project_features['total_links'] / project_features['issue_count']
        )
    
    return project_features

# Example usage:
if __name__ == "__main__":
    df_planning = export_clean_planningphase_df(open_dtale=True)
    print("Planning-phase DataFrame columns:")
    print(df_planning.columns.tolist())
    print("Sample rows:")
    print(df_planning.head(5))


Processing repository: Apache ...
Found 1014926 total issues in 'Apache'. Analyzing projects...
Found 646 projects in repository.
Project distribution: 646 small, 0 medium, 0 large
Selected small project: Spark with 37443 issues
Selected small project: Apache Flex with 35390 issues
Selected small project: HBase with 26421 issues
Selected small project: Hive with 25731 issues
Selected small project: Flink with 25492 issues
Selected small project: Ambari with 25384 issues
Selected small project: Camel with 17391 issues
Selected small project: Cassandra with 17115 issues
Selected small project: Ignite with 16194 issues
Selected small project: Hadoop Common with 15797 issues
Selected small project: Infrastructure with 15777 issues
Selected small project: Solr with 15704 issues
Selected small project: Hadoop HDFS with 15588 issues
Selected small project: Apache Arrow with 15128 issues
Selected small project: Apache Cordova with 14007 issues
Selected small project: Beam with 13597 issues
Se

2025-03-11 11:29:38,363 - INFO     - Executing shutdown due to inactivity...
2025-03-11 11:29:38,431 - INFO     - Executing shutdown...
2025-03-11 11:29:38,432 - INFO     - Not running with the Werkzeug Server, exiting by searching gc for BaseWSGIServer

`torch.distributed.reduce_op` is deprecated, please use `torch.distributed.ReduceOp` instead



  - Processed batch 84 (300 issues, 25200/35390)
  - Processed batch 85 (300 issues, 25500/35390)
  - Processed batch 86 (300 issues, 25800/35390)
  - Processed batch 87 (300 issues, 26100/35390)
  - Processed batch 88 (300 issues, 26400/35390)
  - Processed batch 89 (300 issues, 26700/35390)
  - Processed batch 90 (300 issues, 27000/35390)
  - Processed batch 91 (300 issues, 27300/35390)
  - Processed batch 92 (300 issues, 27600/35390)
  - Processed batch 93 (300 issues, 27900/35390)
  - Processed batch 94 (300 issues, 28200/35390)
  - Processed batch 95 (300 issues, 28500/35390)
  - Processed batch 96 (300 issues, 28800/35390)
  - Processed batch 97 (300 issues, 29100/35390)
  - Processed batch 98 (300 issues, 29400/35390)
  - Processed batch 99 (300 issues, 29700/35390)
  - Processed batch 100 (300 issues, 30000/35390)
  - Processed batch 101 (300 issues, 30300/35390)
  - Processed batch 102 (300 issues, 30600/35390)
  - Processed batch 103 (300 issues, 30900/35390)
  - Processed ba


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2022-01-05 12:07:04', '2022-01-05 09:57:23', '2022-01-05 09:27:31',
 '2022-01-05 04:33:04', '2022-01-05 06:29:53', '2022-01-04 19:26:58',
 '2022-01-05 04:53:53', '2022-01-05 07:32:08', '2022-01-04 19:20:49',
 '2022-01-05 08:13:01',
 ...
 '2005-09-13 12:40:26', '2007-03-08 00:09:18', '2007-03-08 00:16:47',
 '2007-03-08 00:09:18', '2011-02-11 11:11:09', '2007-03-08 00:16:47',
 '2007-03-08 00:09:18', '2007-03-08 00:16:46', '2007-03-08 00:16:46',
 '2007-03-08 00:09:17']
Length: 752179, dtype: datetime64[ns]' has dtype incompatible with datetime64[ns, UTC], please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2022-01-05 09:48:48', '2022-01-05 09:46:33', '2022-01-05 08:19:29',
 '2022-01-05 04:22:29', '2022-01-05 01:10:51', '2022-01-04 19:22:40',
 '2022

✅ D-Tale session launched successfully.
Launching D-Tale session for planning-phase DataFrame...



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2004-01-17 22:54:01', '2003-09-04 02:42:25', '2001-06-03 11:06:50',
 '2001-07-18 16:25:14', '2003-09-24 19:29:38', '2004-03-26 01:19:44',
 '2001-02-22 15:17:34', '2001-02-09 08:44:45', '2004-08-20 21:25:44',
 '2003-03-12 11:15:58',
 ...
 '2010-11-17 16:18:33', '2017-11-02 15:55:20', '2017-12-18 01:48:14',
 '2017-04-20 18:44:44', '2018-11-05 17:42:37', '2019-01-14 07:42:14',
 '2019-03-04 20:26:45', '2018-04-09 21:37:09', '2020-02-11 23:51:44',
 '2020-09-28 13:43:54']
Length: 190, dtype: datetime64[ns]' has dtype incompatible with datetime64[ns, UTC], please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2021-06-09 22:21:53', '2022-01-04 04:37:10', '2021-12-15 03:17:17',
 '2020-09-01 18:31:12', '2013-03-24 19:55:52', '2021-03-10 01:56:44',
 '2021-12

Planning-phase DataFrame columns:
['project_id', 'project_name', 'issue_count', 'fields.creator.key_<lambda>', 'fields.reporter.key_<lambda>', 'total_links', 'avg_links_per_issue', 'fields.components_<lambda>', 'total_components', 'avg_components_per_issue', 'total_labels', 'avg_labels_per_issue', 'project_start_date', 'project_latest_date', 'avg_resolution_hours', 'median_resolution_hours', 'min_resolution_hours', 'max_resolution_hours', 'resolution_hours_std', 'total_resolution_hours', 'fields.issuetype.name_<lambda>_RTC', 'fields.issuetype.name_<lambda>_Outage', 'fields.issuetype.name_<lambda>_Task', 'fields.issuetype.name_<lambda>_Sub-task', 'fields.issuetype.name_<lambda>_SVN->GIT Mirroring', 'fields.issuetype.name_<lambda>_Umbrella', 'fields.issuetype.name_<lambda>_Wish', 'fields.issuetype.name_<lambda>_Planned Work', 'fields.issuetype.name_<lambda>_New JIRA Project', 'fields.issuetype.name_<lambda>_New Git Repo', 'fields.issuetype.name_<lambda>_New Confluence Wiki', 'fields.issu

2025-03-12 07:50:31,997 - INFO     - Executing shutdown due to inactivity...
2025-03-12 07:50:32,143 - INFO     - Executing shutdown...
2025-03-12 07:50:32,144 - INFO     - Not running with the Werkzeug Server, exiting by searching gc for BaseWSGIServer

`torch.distributed.reduce_op` is deprecated, please use `torch.distributed.ReduceOp` instead

