In [None]:
import sys, os
project_root = os.path.abspath("..")  # adjust based on your directory structure
if project_root not in sys.path:
    sys.path.append(project_root)
    
import re
import pandas as pd
from FeatureCleaning.CleanDSDTale import export_clean_df  # Adjust import path as needed
import dtale

def extract_sprint_name(sprint_str):
    """
    Extract the sprint name from a sprint string.
    For example, from a value like:
      "com.atlassian.greenhopper.service.sprint.Sprint@16353814[id=5599,...,name=Sprint 9,...]"
    this returns "Sprint 9".
    """
    if not sprint_str or not isinstance(sprint_str, str):
        return None
    match = re.search(r'name=([^,]+)', sprint_str)
    if match:
        return match.group(1).strip()
    return None

def extract_sprint_from_fixversions(fix_versions):
    """
    Given a list of fixVersions dictionaries, return the first version name containing 'Sprint'.
    """
    if isinstance(fix_versions, list):
        for version in fix_versions:
            name = version.get("name", "")
            if "Sprint" in name:
                return name
    return None

def add_sprint_field(df):
    """
    Add a standardized 'sprint' column to the DataFrame.
    First, try using customfield_10557; if not present, fall back to fixVersions.
    """
    if "fields.customfield_10557" in df.columns:
        df["sprint"] = df["fields.customfield_10557"].apply(extract_sprint_name)
    elif "fields.fixVersions" in df.columns:
        df["sprint"] = df["fields.fixVersions"].apply(extract_sprint_from_fixversions)
    else:
        df["sprint"] = None
    return df

def extract_planning_fields(df):
    """
    From the full cleaned DataFrame, extract only the planning-phase fields.
    Enhanced to include all critical planning-phase data for task-level and project-level estimation.
    """
    df = df.copy()
    df = add_sprint_field(df)
    
    planning_cols = [
        # Basic issue identification
        "key",                    # Issue key
        "fields.summary",         # Summary text
        
        # Issue classification
        "fields.issuetype.name",  # Type
        "fields.status.name",     # Status
        "fields.priority.name",   # Priority
        
        # People involved
        "fields.assignee.key",    # Assignee ID
        "fields.creator.key",     # Creator ID
        "fields.reporter.key",    # Reporter ID
        
        # Project context
        "fields.project.id",      # Project ID
        "fields.project.name",    # Project name
        
        # Time information
        "fields.created",         # Creation date
        
        # Relationships
        "sprint",                 # Extracted sprint info
        "issuelinks_total",       # Total link count
        "has_issuelinks_relates", # Has 'relates to' links
        "has_issuelinks_cloners", # Has 'cloners' links
        
        # Components and labels
        "fields.components",      # Components
        "fields.labels",          # Labels
        
        # For training purposes
        "fields.resolutiondate"   # Used to calculate resolution time
    ]
    
    # Get all description embedding columns (they start with "desc_emb_")
    embedding_cols = [col for col in df.columns if col.startswith("desc_emb_")]
    
    # Combine all desired columns
    all_cols = planning_cols + embedding_cols
    
    # Filter to only include columns that exist in the DataFrame
    available_cols = [col for col in all_cols if col in df.columns]
    
    # Create the base planning DataFrame
    planning_df = df[available_cols].copy()
    
    # Add derived features
    
    # Component and label counts
    if "fields.components" in planning_df.columns:
        planning_df["component_count"] = planning_df["fields.components"].apply(
            lambda x: len(x) if isinstance(x, list) else 0
        )
    
    if "fields.labels" in planning_df.columns:
        planning_df["label_count"] = planning_df["fields.labels"].apply(
            lambda x: len(x) if isinstance(x, list) else 0
        )
    
    # Creation date features
    if "fields.created" in planning_df.columns:
        planning_df["created_date"] = pd.to_datetime(planning_df["fields.created"])
        planning_df["created_day_of_week"] = planning_df["created_date"].dt.dayofweek
        planning_df["created_month"] = planning_df["created_date"].dt.month
        planning_df["created_year"] = planning_df["created_date"].dt.year
    
    # Calculate resolution time (if available - for training only)
    if "fields.resolutiondate" in planning_df.columns and "fields.created" in planning_df.columns:
        planning_df["resolution_time"] = (
            pd.to_datetime(planning_df["fields.resolutiondate"]) - 
            pd.to_datetime(planning_df["fields.created"])
        ).dt.total_seconds() / 3600  # Convert to hours
        
        # Drop the raw resolution date as we now have the derived feature
        planning_df = planning_df.drop(columns=["fields.resolutiondate"])
    
    return planning_df

def export_clean_planningphase_df(open_dtale=True):
    """
    Run the full cleaning pipeline to obtain a cleaned task-level DataFrame,
    then extract only the planning-phase fields to create a planning-phase dataset.
    If open_dtale is True, launch a D-Tale session for interactive exploration.
    
    Returns:
      planning_df (pd.DataFrame): The planning-phase DataFrame with the selected fields.
    """
    full_df = export_clean_df()  # This returns your fully cleaned task-level DataFrame.
    planning_df = extract_planning_fields(full_df)

    
    if open_dtale:
        print("Launching D-Tale session for planning-phase DataFrame...")
        d_pf = dtale.show(planning_df, ignore_duplicate=True, allow_cell_edits=False)

        d_pf.open_browser()
        
    return planning_df

def expand_categorical_features(df):
    """
    Expand categorical distributions stored as dictionaries.
    """
    # Identify columns that contain dictionaries
    dict_cols = [col for col in df.columns 
                if isinstance(df[col].iloc[0], dict) if len(df) > 0]
    
    result_df = df.copy()
    
    for col in dict_cols:
        # Find all unique categories across all projects
        all_categories = set()
        for d in df[col]:
            if isinstance(d, dict):
                all_categories.update(d.keys())
        
        # Create a column for each category
        for category in all_categories:
            new_col_name = f"{col}_{category}"
            result_df[new_col_name] = df[col].apply(
                lambda x: x.get(category, 0) if isinstance(x, dict) else 0
            )
        
        # Drop the original dictionary column
        result_df = result_df.drop(columns=[col])
    
    return result_df

def create_project_features(df):
    """
    Aggregate task-level features to create project-level features.
    """
    # Group by project ID and name
    project_features = df.groupby(['fields.project.id', 'fields.project.name']).agg({
        # Issue counts
        'key': 'count',  # Total number of issues
        
        # Issue type distribution
        'fields.issuetype.name': lambda x: x.value_counts().to_dict(),
        
        # Priority distribution
        'fields.priority.name': lambda x: x.value_counts().to_dict(),
        
        # Status distribution
        'fields.status.name': lambda x: x.value_counts().to_dict(),
        
        # Team size metrics
        'fields.assignee.key': lambda x: x.nunique(),  # Number of unique assignees
        'fields.creator.key': lambda x: x.nunique(),   # Number of unique creators
        
        # Connectivity metrics
        'issuelinks_total': ['sum', 'mean'],  # Link density
        'has_issuelinks_relates': 'mean',     # Percentage with related links
        
        # Component usage
        'fields.components': lambda x: set(item for sublist in x if isinstance(sublist, list) for item in sublist),
        
        # Description embedding averages
        **{f'desc_emb_{i}': 'mean' for i in range(82)}  # Average embeddings across all tasks
    })
    
    # Flatten the multi-level columns
    project_features.columns = ['_'.join(col) if isinstance(col, tuple) else col 
                               for col in project_features.columns]
    
    # Expand the categorical distributions
    project_features = expand_categorical_features(project_features)
    
    # Add component count
    project_features['component_count'] = project_features['fields.components'].apply(len)
    
    # Compute complexity indicators
    project_features['issue_to_assignee_ratio'] = (
        project_features['key_count'] / project_features['fields.assignee.key_nunique']
    )
    
    # Calculate link density (connectedness of the task graph)
    project_features['link_density'] = (
        project_features['issuelinks_total_sum'] / project_features['key_count']
    )
    
    return project_features

# Example usage:
if __name__ == "__main__":
    df_planning = export_clean_planningphase_df(open_dtale=True)
    print("Planning-phase DataFrame columns:")
    print(df_planning.columns.tolist())
    print("Sample rows:")
    print(df_planning.head(5))

In [None]:
import sys, os
project_root = os.path.abspath("..")  # adjust based on your directory structure
if project_root not in sys.path:
    sys.path.append(project_root)
    
import re
import pandas as pd
import numpy as np
from FeatureCleaning.CleanDSDTale import export_clean_df  # Adjust import path as needed
import dtale

def extract_sprint_name(sprint_str):
    """
    Extract the sprint name from a sprint string.
    For example, from a value like:
      "com.atlassian.greenhopper.service.sprint.Sprint@16353814[id=5599,...,name=Sprint 9,...]"
    this returns "Sprint 9".
    """
    if not sprint_str or not isinstance(sprint_str, str):
        return None
    match = re.search(r'name=([^,]+)', sprint_str)
    if match:
        return match.group(1).strip()
    return None

def extract_sprint_from_fixversions(fix_versions):
    """
    Given a list of fixVersions dictionaries, return the first version name containing 'Sprint'.
    """
    if isinstance(fix_versions, list):
        for version in fix_versions:
            name = version.get("name", "")
            if "Sprint" in name:
                return name
    return None

def add_sprint_field(df):
    """
    Add a standardized 'sprint' column to the DataFrame.
    First, try using customfield_10557; if not present, fall back to fixVersions.
    """
    if "fields.customfield_10557" in df.columns:
        df["sprint"] = df["fields.customfield_10557"].apply(extract_sprint_name)
    elif "fields.fixVersions" in df.columns:
        df["sprint"] = df["fields.fixVersions"].apply(extract_sprint_from_fixversions)
    else:
        df["sprint"] = None
    return df

def extract_planning_fields(df):
    """
    From the full cleaned DataFrame, extract only the planning-phase fields.
    Enhanced to include all critical planning-phase data for task-level and project-level estimation.
    """
    df = df.copy()
    df = add_sprint_field(df)
    
    planning_cols = [
        # Basic issue identification
        "key",                    # Issue key
        "fields.summary",         # Summary text
        
        # Issue classification
        "fields.issuetype.name",  # Type
        "fields.status.name",     # Status
        "fields.priority.name",   # Priority
        
        # People involved
        "fields.assignee.key",    # Assignee ID
        "fields.creator.key",     # Creator ID
        "fields.reporter.key",    # Reporter ID
        
        # Project context
        "fields.project.id",      # Project ID
        "fields.project.name",    # Project name
        
        # Time information
        "fields.created",         # Creation date
        
        # Relationships
        "sprint",                 # Extracted sprint info
        "issuelinks_total",       # Total link count
        "has_issuelinks_relates", # Has 'relates to' links
        "has_issuelinks_cloners", # Has 'cloners' links
        
        # Components and labels
        "fields.components",      # Components
        "fields.labels",          # Labels
        
        # For training purposes
        "fields.resolutiondate"   # Used to calculate resolution time
    ]
    
    # Get all description embedding columns (they start with "desc_emb_")
    embedding_cols = [col for col in df.columns if col.startswith("desc_emb_")]
    
    # Combine all desired columns
    all_cols = planning_cols + embedding_cols
    
    # Filter to only include columns that exist in the DataFrame
    available_cols = [col for col in all_cols if col in df.columns]
    
    # Create the base planning DataFrame
    planning_df = df[available_cols].copy()
    
    # Add derived features
    
    # Component and label counts
    if "fields.components" in planning_df.columns:
        planning_df["component_count"] = planning_df["fields.components"].apply(
            lambda x: len(x) if isinstance(x, list) else 0
        )
    
    if "fields.labels" in planning_df.columns:
        planning_df["label_count"] = planning_df["fields.labels"].apply(
            lambda x: len(x) if isinstance(x, list) else 0
        )
    
    # Creation date features
    if "fields.created" in planning_df.columns:
        planning_df["created_date"] = pd.to_datetime(planning_df["fields.created"])
        planning_df["created_day_of_week"] = planning_df["created_date"].dt.dayofweek
        planning_df["created_month"] = planning_df["created_date"].dt.month
        planning_df["created_year"] = planning_df["created_date"].dt.year
    
    # Calculate resolution time (if available - for training only)
    if "fields.resolutiondate" in planning_df.columns and "fields.created" in planning_df.columns:
        planning_df["resolution_time"] = (
            pd.to_datetime(planning_df["fields.resolutiondate"]) - 
            pd.to_datetime(planning_df["fields.created"])
        ).dt.total_seconds() / 3600  # Convert to hours
        
        # Drop the raw resolution date as we now have the derived feature
        planning_df = planning_df.drop(columns=["fields.resolutiondate"])
    
    return planning_df

def expand_categorical_features(df):
    """
    Expand categorical distributions stored as dictionaries.
    """
    # Check if DataFrame is empty
    if len(df) == 0:
        return df
    
    # Identify columns that contain dictionaries
    dict_cols = []
    for col in df.columns:
        if df[col].dtype == 'object' and df[col].notna().any():
            first_valid = df[col].dropna().iloc[0] if not df[col].dropna().empty else None
            if isinstance(first_valid, dict):
                dict_cols.append(col)
    
    result_df = df.copy()
    
    for col in dict_cols:
        # Find all unique categories across all projects
        all_categories = set()
        for d in df[col]:
            if isinstance(d, dict):
                all_categories.update(d.keys())
        
        # Create a column for each category
        for category in all_categories:
            new_col_name = f"{col}_{category}"
            result_df[new_col_name] = df[col].apply(
                lambda x: x.get(category, 0) if isinstance(x, dict) else 0
            )
        
        # Drop the original dictionary column
        result_df = result_df.drop(columns=[col])
    
    return result_df

def create_project_features(df):
    """
    Aggregate task-level features to create project-level features.
    """
    # Group by project ID and name
    agg_dict = {
        # Issue counts
        'key': 'count',  # Total number of issues
    }
    
    # Add issue type distribution if available
    if 'fields.issuetype.name' in df.columns:
        agg_dict['fields.issuetype.name'] = lambda x: x.value_counts().to_dict()
    
    # Add priority distribution if available  
    if 'fields.priority.name' in df.columns:
        agg_dict['fields.priority.name'] = lambda x: x.value_counts().to_dict()
    
    # Add status distribution if available
    if 'fields.status.name' in df.columns:
        agg_dict['fields.status.name'] = lambda x: x.value_counts().to_dict()
    
    # Team size metrics if available
    if 'fields.assignee.key' in df.columns:
        agg_dict['fields.assignee.key'] = lambda x: x.nunique()
    
    if 'fields.creator.key' in df.columns:
        agg_dict['fields.creator.key'] = lambda x: x.nunique()
    
    # Connectivity metrics if available
    if 'issuelinks_total' in df.columns:
        agg_dict['issuelinks_total'] = ['sum', 'mean']
    
    if 'has_issuelinks_relates' in df.columns:
        agg_dict['has_issuelinks_relates'] = 'mean'
    
    # Component usage if available
    if 'fields.components' in df.columns:
        # Extract component names instead of using the component objects directly
        agg_dict['fields.components'] = lambda x: set(
            item.get('name', str(item)) if isinstance(item, dict) else str(item)
            for sublist in x if isinstance(sublist, list) 
            for item in sublist
        )
    
    # Add component and label counts if available
    if 'component_count' in df.columns:
        agg_dict['component_count'] = ['sum', 'mean']
    
    if 'label_count' in df.columns:
        agg_dict['label_count'] = ['sum', 'mean']
    
    # Add sprint information if available
    if 'sprint' in df.columns:
        agg_dict['sprint'] = lambda x: x.value_counts().to_dict()
    
    # Add creation time features if available
    if 'created_month' in df.columns:
        agg_dict['created_month'] = lambda x: x.value_counts().to_dict()
    
    if 'created_day_of_week' in df.columns:
        agg_dict['created_day_of_week'] = lambda x: x.value_counts().to_dict()
    
    # Add resolution time statistics if available (for model training)
    if 'resolution_time' in df.columns:
        agg_dict['resolution_time'] = ['mean', 'median', 'min', 'max', 'std', 'sum']
    
    # Add description embedding averages if available
    embedding_cols = [col for col in df.columns if col.startswith('desc_emb_')]
    for col in embedding_cols:
        agg_dict[col] = 'mean'
    
    # Perform the groupby aggregation
    project_features = df.groupby(['fields.project.id', 'fields.project.name']).agg(agg_dict)
    
    # Flatten the multi-level columns
    project_features.columns = ['_'.join(col) if isinstance(col, tuple) else col 
                               for col in project_features.columns]
    
    # Reset index to convert groupby result to regular DataFrame
    project_features = project_features.reset_index()
    
    # Expand the categorical distributions
    project_features = expand_categorical_features(project_features)
    
    # Add derived metrics
    
    # Compute team-related metrics if available
    if 'fields.assignee.key' in project_features.columns and 'key_count' in project_features.columns:
        project_features['issue_to_assignee_ratio'] = (
            project_features['key_count'] / project_features['fields.assignee.key'].apply(lambda x: max(1, x))
        )
    
    # Calculate link density if available
    if 'issuelinks_total_sum' in project_features.columns and 'key_count' in project_features.columns:
        project_features['link_density'] = (
            project_features['issuelinks_total_sum'] / project_features['key_count']
        )
    
    # Calculate semantic complexity if embedding columns exist
    emb_cols = [col for col in project_features.columns if col.startswith('desc_emb_')]
    if emb_cols:
        # Variance across embedding dimensions as a complexity measure
        project_features['semantic_complexity'] = project_features[emb_cols].var(axis=1)
    
    # Calculate effort metrics if resolution time is available
    if 'resolution_time_mean' in project_features.columns:
        # If team size is available
        if 'fields.assignee.key' in project_features.columns:
            # Total effort (team size × avg resolution time × issue count)
            project_features['total_effort'] = (
                project_features['fields.assignee.key'] * 
                project_features['resolution_time_mean'] * 
                project_features['key_count']
            )
            
            # Effort per issue
            project_features['effort_per_issue'] = (
                project_features['total_effort'] / project_features['key_count']
            )
            
            # Effort per team member
            project_features['effort_per_team_member'] = (
                project_features['total_effort'] / project_features['fields.assignee.key']
            )
    
    return project_features

def calculate_time_to_resolution(df):
    """
    Calculate time-to-resolution metrics for each project.
    
    Parameters:
        df (pd.DataFrame): The full task-level DataFrame with timestamps
        
    Returns:
        resolution_metrics (pd.DataFrame): DataFrame with resolution time metrics by project
    """
    # Check if required columns exist
    if 'fields.resolutiondate' not in df.columns or 'fields.created' not in df.columns:
        print("Warning: Resolution date or creation date columns missing. Cannot calculate resolution metrics.")
        return pd.DataFrame()
    
    # Calculate resolution time for each task
    df['time_to_resolution'] = (
        pd.to_datetime(df['fields.resolutiondate']) - 
        pd.to_datetime(df['fields.created'])
    ).dt.total_seconds() / 3600  # Convert to hours
    
    # Aggregate to project level
    resolution_metrics = df.groupby(['fields.project.id', 'fields.project.name']).agg({
        'time_to_resolution': ['mean', 'median', 'min', 'max', 'std', 'sum'],
        'fields.resolutiondate': lambda x: x.notna().mean()  # Completion rate
    }).reset_index()
    
    # Flatten the column names
    resolution_metrics.columns = [
        f"{col[0]}_{col[1]}" if isinstance(col, tuple) else col
        for col in resolution_metrics.columns
    ]
    
    # Rename completion rate column
    resolution_metrics = resolution_metrics.rename(
        columns={'fields.resolutiondate_<lambda>': 'completion_rate'}
    )
    
    # Calculate status transition counts if available
    if 'changelog_count_status' in df.columns:
        status_changes = df.groupby(['fields.project.id', 'fields.project.name']).agg({
            'changelog_count_status': ['sum', 'mean']
        }).reset_index()
        
        # Flatten column names
        status_changes.columns = [
            f"{col[0]}_{col[1]}" if isinstance(col, tuple) else col
            for col in status_changes.columns
        ]
        
        # Merge with resolution metrics
        resolution_metrics = pd.merge(
            resolution_metrics, status_changes,
            on=['fields.project.id', 'fields.project.name'],
            how='left'
        )
    
    return resolution_metrics

def create_project_dataset(full_df):
    """
    Create a comprehensive project-level dataset for analysis and modeling.
    
    Parameters:
        full_df (pd.DataFrame): The full task-level DataFrame
        
    Returns:
        project_dataset (pd.DataFrame): Combined project-level dataset
    """
    # Extract planning-phase fields
    planning_df = extract_planning_fields(full_df)
    
    # Create project-level features from planning data
    project_features = create_project_features(planning_df)
    
    # Calculate resolution metrics from full data
    resolution_metrics = calculate_time_to_resolution(full_df)
    
    # Merge features and metrics
    if not resolution_metrics.empty:
        project_dataset = pd.merge(
            project_features, resolution_metrics,
            on=['fields.project.id', 'fields.project.name'],
            how='left'
        )
    else:
        project_dataset = project_features
    
    return project_dataset

def export_clean_planningphase_df(open_dtale=True):
    """
    Run the full cleaning pipeline to obtain a cleaned task-level DataFrame,
    then extract only the planning-phase fields to create a planning-phase dataset.
    If open_dtale is True, launch a D-Tale session for interactive exploration.
    
    Returns:
      planning_df (pd.DataFrame): The planning-phase DataFrame with the selected fields.
      project_features (pd.DataFrame): Project-level features derived from planning data.
      project_dataset (pd.DataFrame): Complete project dataset with resolution metrics.
    """
    # Get the fully cleaned task-level DataFrame
    full_df = export_clean_df()  # Call without parameters to match your function signature
    
    # Extract planning-phase fields
    planning_df = extract_planning_fields(full_df)
    
    # Create project-level features
    project_features = create_project_features(planning_df)
    
    # Create complete project dataset with resolution metrics
    project_dataset = create_project_dataset(full_df)
    
    if open_dtale:
        print("Launching D-Tale session for planning-phase DataFrame...")
        d_planning = dtale.show(planning_df, ignore_duplicate=True, allow_cell_edits=False, name="Planning DataFrame")
        
        print("Launching D-Tale session for project-level features...")
        d_project = dtale.show(project_features, ignore_duplicate=True, allow_cell_edits=False, name="Project Features")
        
        print("Launching D-Tale session for complete project dataset...")
        d_dataset = dtale.show(project_dataset, ignore_duplicate=True, allow_cell_edits=False, name="Project Dataset")
    
    return planning_df, project_features, project_dataset

# Example usage:
if __name__ == "__main__":
    planning_df, project_features, project_dataset = export_clean_planningphase_df(open_dtale=True)
    
    print("\nPlanning-phase DataFrame shape:", planning_df.shape)
    print("Project features DataFrame shape:", project_features.shape)
    print("Project dataset shape:", project_dataset.shape)
    
    # Print key project metrics
    if len(project_dataset) > 0:
        print("\nProject-Level Feature Summary:")
        print(f"Number of projects: {len(project_dataset)}")
        
        # Display available metrics
        key_metrics = [
            'key_count', 'fields.assignee.key', 'issue_to_assignee_ratio',
            'resolution_time_mean', 'total_effort', 'semantic_complexity'
        ]
        
        print("\nAvailable Metrics:")
        for metric in key_metrics:
            if metric in project_dataset.columns:
                print(f"- {metric}")
        
        # Show sample of project data
        print("\nSample Project Data:")
        display_cols = ['fields.project.name', 'key_count']
        for metric in key_metrics:
            if metric in project_dataset.columns and metric != 'key_count':
                display_cols.append(metric)
        
        # Only include columns that exist
        valid_cols = [col for col in display_cols if col in project_dataset.columns]
        if valid_cols:
            print(project_dataset[valid_cols].head())


Processing repository: Hyperledger ...

Processing repository: SecondLife ...



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Data processed. Launching D-Tale session...



Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2011-04-11 10:43:13', '2012-12-17 15:41:37', '2012-01-25 15:00:30',
 '2011-01-16 17:32:25', '2011-01-14 09:54:49', '2011-08-04 19:01:01',
 '2010-11-01 09:38:06', '2011-05-26 00:08:40', '2016-06-20 18:54:08',
 '2010-11-12 22:44:10',
 ...
 '2017-02-20 15:49:37', '2019-10-01 20:39:14', '2018-12-01 11:31:04',
 '2018-03-09 15:37:50', '2018-07-19 22:16:42', '2020-03-16 14:48:39',
 '2019-09-25 14:24:16', '2018-04-25 20:53:29', '2017-09-01 02:53:49',
 '2018-04-12 16:35:20']
Length: 267, dtype: datetime64[ns]' has dtype incompatible with datetime64[ns, UTC], please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '<DatetimeArray>
['2011-02-16 01:02:24', '2012-12-15 03:01:55', '2011-06-15 19:35:48',
 '2010-10-18 12:37:15', '2010-11-16 11:55:02', '2011-07-15 17:25:12',
 '2010-07

✅ D-Tale session launched successfully.


TypeError: unhashable type: 'dict'