In [1]:
import numpy as np
import pandas as pd
import torch
import pickle
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
#from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import pandas.api.types
from typing import Dict, List, Tuple

In [2]:
individual_data = pd.read_csv('individual_data.csv')

In [3]:
series_id_column_name = "series_id"
time_column_name = "time"
event_column_name = "event"
score_column_name = "score"
use_scoring_intervals = False #adjust as necessary 

In [4]:
solution = pd.DataFrame({
    series_id_column_name:individual_data["series_id"],
    time_column_name: individual_data["timestamp"],
    event_column_name: individual_data["event"]
})

In [5]:
from evd.model import LSTMModel

In [6]:
input_dim = 4
hidden_dim = 64
num_layers = 3
num_classes = 3

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
model = LSTMModel(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers, num_classes=num_classes)
model.load_state_dict(torch.load("train_model.pth"))
model.to(device)
model.eval()

  model.load_state_dict(torch.load("train_model.pth"))


LSTMModel(
  (lstm): LSTM(4, 64, num_layers=3)
  (fc): Linear(in_features=64, out_features=3, bias=True)
)

In [10]:
with open('valid_loader.pkl', 'rb') as f:
    valid_loader = pickle.load(f)

In [11]:
from evd.utils import calculate_confidence_score

In [12]:
#Old version
all_series_id = []
all_timestamp = []
all_confidence_scores = []
all_predictions = []

with torch.no_grad():
    start_idx = 0  # Initial index for batch start
    
    for X_batch, y_batch in valid_loader:
        X_batch = X_batch.to(device)
        batch_size = X_batch.size(0)
        
        # Extract corresponding slice of series_id and timestamp
        series_id_batch = individual_data["series_id"].iloc[start_idx:start_idx + batch_size].values
        timestamp_batch = individual_data["timestamp"].iloc[start_idx:start_idx + batch_size].values
        
        # Process the model outputs
        outputs = model(X_batch)
        logits = outputs[2]

        probabilities = torch.softmax(logits, dim=1)
        confidence_scores = torch.max(probabilities, dim=1).values
        predictions = torch.argmax(logits, dim=1)

        # Append batch data to lists
        all_series_id.extend(series_id_batch)
        all_timestamp.extend(timestamp_batch)
        all_confidence_scores.extend(confidence_scores.cpu().numpy())
        all_predictions.extend(predictions.cpu().numpy())

        # Update start index for next batch
        start_idx += batch_size

print(len(all_series_id), len(all_timestamp), len(all_confidence_scores), len(all_predictions))

89728 89728 89728 89728


In [13]:
submission = pd.DataFrame({
    "series_id": all_series_id,
    "time": all_timestamp,
    "event": all_predictions,
    "score": all_confidence_scores  
})


In [14]:
individual_data = individual_data.merge(
    submission[["series_id", "time", "score"]],  
    how="left",
    left_on=["series_id", "timestamp"], 
    right_on=["series_id", "time"]
)

individual_data.drop(columns=["time"], inplace=True)


print(individual_data.head())

      series_id  step_x                 timestamp    anglez      enmo  event  \
0  038441c925bb       0  2018-08-14T15:30:00-0400  0.322257 -0.192628      0   
1  038441c925bb       1  2018-08-14T15:30:05-0400  0.322260 -0.194592      0   
2  038441c925bb       2  2018-08-14T15:30:10-0400  0.322266 -0.193610      0   
3  038441c925bb       3  2018-08-14T15:30:15-0400  0.322260 -0.196556      0   
4  038441c925bb       4  2018-08-14T15:30:20-0400  0.322260 -0.194592      0   

      _merge  anglez_change  enmo_change     score  
0  left_only       0.000000     0.000000  0.999876  
1  left_only       0.000003    -0.001964  0.999876  
2  left_only       0.000006     0.000982  0.999876  
3  left_only      -0.000006    -0.002946  0.999876  
4  left_only       0.000000     0.001964  0.999876  


In [15]:
print("all_series_id:", type(all_series_id), len(all_series_id))
print("all_timestamp:", type(all_timestamp), len(all_timestamp))
print("all_predictions:", type(all_predictions), len(all_predictions))
print("all_confidence_scores:", type(all_confidence_scores), len(all_confidence_scores))

all_series_id: <class 'list'> 89728
all_timestamp: <class 'list'> 89728
all_predictions: <class 'list'> 89728
all_confidence_scores: <class 'list'> 89728


In [16]:
print(submission["score"].describe())

count    8.972800e+04
mean     9.998760e-01
std      4.359525e-09
min      9.998758e-01
25%      9.998760e-01
50%      9.998760e-01
75%      9.998760e-01
max      9.998760e-01
Name: score, dtype: float64


In [17]:
submission["time"] = pd.to_datetime(submission["time"], errors = "coerce")

submission["time"] = submission["time"].astype("int64") // 10**9

print(submission["time"].dtype)                  

int64


In [18]:
solution["time"] = pd.to_datetime(solution["time"])
solution["time"] = solution["time"].view('int64') / 10**9
print(solution["time"].dtype)  

float64


In [19]:
class ParticipantVisibleError(Exception):
    pass

def score(
    solution: pd.DataFrame,
    submission: pd.DataFrame,
    tolerances: Dict[str, List[float]],
    series_id_column_name: str = "series_id",
    time_column_name: str = "time",
    event_column_name: str = "event",
    score_column_name: str = "score",
    use_scoring_intervals: bool = False,
) -> float:

    # Validate metric parameters
    assert len(tolerances) > 0, "Events must have defined tolerances."
    assert set(tolerances.keys()) == set(solution[event_column_name]).difference({'start', 'end'}),\
        (f"Solution column {event_column_name} must contain the same events "
         "as defined in tolerances.")
    assert pd.api.types.is_numeric_dtype(solution[time_column_name]),\
        f"Solution column {time_column_name} must be of numeric type."
    
    # Validate submission format
    for column_name in [
        series_id_column_name,
        time_column_name,
        event_column_name,
        score_column_name,
    ]:
        if column_name not in submission.columns:
            raise ParticipantVisibleError(f"Submission must have column '{column_name}'.")

    if not pd.api.types.is_numeric_dtype(submission[time_column_name]):
        raise ParticipantVisibleError(
            f"Submission column '{time_column_name}' must be of numeric type."
        )
    if not pd.api.types.is_numeric_dtype(submission[score_column_name]):
        raise ParticipantVisibleError(
            f"Submission column '{score_column_name}' must be of numeric type."
        )

  # Set these globally to avoid passing around a bunch of arguments
    globals()['series_id_column_name'] = series_id_column_name
    globals()['time_column_name'] = time_column_name
    globals()['event_column_name'] = event_column_name
    globals()['score_column_name'] = score_column_name
    globals()['use_scoring_intervals'] = use_scoring_intervals

    return event_detection_ap(solution, submission, tolerances)




In [20]:
tolerances = {
    0: [0.1, 0.5],
    1: [0.5, 1.0],
    2: [0.5, 1.0],
}


In [21]:
def filter_detections(
        detections: pd.DataFrame, intervals: pd.DataFrame
) -> pd.DataFrame:
    """Drop detections not inside a scoring interval."""
    detection_time = detections.loc[:, time_column_name].sort_values().to_numpy()
    intervals = intervals.to_numpy()
    is_scored = np.full_like(detection_time, False, dtype=bool)

    i, j = 0, 0
    while i < len(detection_time) and j < len(intervals):
        time = detection_time[i]
        int_ = intervals[j]

        # If the detection is prior in time to the interval, go to the next detection.
        if time < int_.left:
            i += 1
        # If the detection is inside the interval, keep it and go to the next detection.
        elif time in int_:
            is_scored[i] = True
            i += 1
        # If the detection is later in time, go to the next interval.
        else:
            j += 1

    return detections.loc[is_scored].reset_index(drop=True)


def match_detections(
        tolerance: float, ground_truths: pd.DataFrame, detections: pd.DataFrame
) -> pd.DataFrame:
    """Match detections to ground truth events. Arguments are taken from a common event x tolerance x series_id evaluation group."""
    detections_sorted = detections.sort_values(score_column_name, ascending=False).dropna()
    is_matched = np.full_like(detections_sorted[event_column_name], False, dtype=bool)
    gts_matched = set()
    for i, det in enumerate(detections_sorted.itertuples(index=False)):
        best_error = tolerance
        best_gt = None

        for gt in ground_truths.itertuples(index=False):
            error = abs(getattr(det, time_column_name) - getattr(gt, time_column_name))
            if error < best_error and gt not in gts_matched:
                best_gt = gt
                best_error = error

        if best_gt is not None:
            is_matched[i] = True
            gts_matched.add(best_gt)

    detections_sorted['matched'] = is_matched

    return detections_sorted


In [22]:
def precision_recall_curve(
        matches: np.ndarray, scores: np.ndarray, p: int
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    if len(matches) == 0:
        return [1], [0], []

    # Sort matches by decreasing confidence
    idxs = np.argsort(scores, kind='stable')[::-1]
    scores = scores[idxs]
    matches = matches[idxs]

    distinct_value_indices = np.where(np.diff(scores))[0]
    threshold_idxs = np.r_[distinct_value_indices, matches.size - 1]
    thresholds = scores[threshold_idxs]

    # Matches become TPs and non-matches FPs as confidence threshold decreases
    tps = np.cumsum(matches)[threshold_idxs]
    fps = np.cumsum(~matches)[threshold_idxs]

    precision = tps / (tps + fps)
    precision[np.isnan(precision)] = 0
    recall = tps / p  # total number of ground truths might be different than total number of matches

    # Stop when full recall attained and reverse the outputs so recall is non-increasing.
    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)

    # Final precision is 1 and final recall is 0
    return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]


def average_precision_score(matches: np.ndarray, scores: np.ndarray, p: int) -> float:
    precision, recall, _ = precision_recall_curve(matches, scores, p)
    # Compute step integral
    return -np.sum(np.diff(recall) * np.array(precision)[:-1])


In [23]:
def event_detection_ap(
        solution: pd.DataFrame,
        submission: pd.DataFrame,
        tolerances: Dict[str, List[float]],
) -> float:

    # Ensure solution and submission are sorted properly
    solution = solution.sort_values([series_id_column_name, time_column_name])
    submission = submission.sort_values([series_id_column_name, time_column_name])

    # Extract scoring intervals.
    if use_scoring_intervals:
        intervals = (
            solution
            .query("event in ['start', 'end']")
            .assign(interval=lambda x: x.groupby([series_id_column_name, event_column_name]).cumcount())
            .pivot(
                index='interval',
                columns=[series_id_column_name, event_column_name],
                values=time_column_name,
            )
            .stack(series_id_column_name)
            .swaplevel()
            .sort_index()
            .loc[:, ['start', 'end']]
            .apply(lambda x: pd.Interval(*x, closed='both'), axis=1)
        )

    # Extract ground-truth events.
    ground_truths = (
        solution
        .query("event not in ['start', 'end']")
        .reset_index(drop=True)
    )

    # Map each event class to its prevalence (needed for recall calculation)
    class_counts = ground_truths.value_counts(event_column_name).to_dict()

    # Create table for detections with a column indicating a match to a ground-truth event
    detections = submission.assign(matched = False)

    # Remove detections outside of scoring intervals
    if use_scoring_intervals:
        detections_filtered = []
        for (det_group, dets), (int_group, ints) in zip(
            detections.groupby(series_id_column_name), intervals.groupby(series_id_column_name)
        ):
            assert det_group == int_group
            detections_filtered.append(filter_detections(dets, ints))
        detections_filtered = pd.concat(detections_filtered, ignore_index=True)
    else:
        detections_filtered = detections

    # Create table of event-class x tolerance x series_id values
    aggregation_keys = pd.DataFrame(
        [(ev, tol, vid)
         for ev in tolerances.keys()
         for tol in tolerances[ev]
         for vid in ground_truths[series_id_column_name].unique()],
        columns=[event_column_name, 'tolerance', series_id_column_name],
    )

    # Create match evaluation groups: event-class x tolerance x series_id
    detections_grouped = (
        aggregation_keys
        .merge(detections_filtered, on=[event_column_name, series_id_column_name], how='left')
        .groupby([event_column_name, 'tolerance', series_id_column_name])
    )
    ground_truths_grouped = (
        aggregation_keys
        .merge(ground_truths, on=[event_column_name, series_id_column_name], how='left')
        .groupby([event_column_name, 'tolerance', series_id_column_name])
    )
    # Match detections to ground truth events by evaluation group
    detections_matched = []
    for key in aggregation_keys.itertuples(index=False):
        dets = detections_grouped.get_group(key)
        gts = ground_truths_grouped.get_group(key)
        detections_matched.append(
            match_detections(dets['tolerance'].iloc[0], gts, dets)
        )
    detections_matched = pd.concat(detections_matched)

    # Compute AP per event x tolerance group
    event_classes = ground_truths[event_column_name].unique()
    ap_table = (
        detections_matched
        .query("event in @event_classes")
        .groupby([event_column_name, 'tolerance']).apply(
            lambda group: average_precision_score(
                group['matched'].to_numpy(),
                group[score_column_name].to_numpy(),
                class_counts[group[event_column_name].iat[0]],
            )
        )
    )
    # Average over tolerances, then over event classes
    mean_ap = ap_table.groupby(event_column_name).mean().sum() / len(event_classes)

    return mean_ap

In [24]:
import torch

if torch.cuda.is_available():
    print(f"PyTorch use GPU: {torch.cuda.get_device_name(0)}")
else:
    print("PyTorch use CPU")

PyTorch use GPU: NVIDIA GeForce RTX 4080 Laptop GPU


In [25]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Move your model to the GPU
model = model.to(device)

In [27]:
def score_with_progress(solution, submission, tolerances, **kwargs):
    total = len(solution)
    for i in tqdm(range(total), desc="計算 Mean AP"):
        time.sleep(0.05)  # 模擬運算

In [28]:
mean_ap = score_with_progress(
    solution=solution,
    submission=submission,
    tolerances=tolerances
)

print(f"Mean AP: {mean_ap}")

計算 Mean AP: 100%|██████████| 389880/389880 [6:31:05<00:00, 16.62it/s]  

Mean AP: None



