In [1]:
import warnings
import numpy as np
import pandas as pd
import os
warnings.simplefilter("ignore", np.RankWarning)

if not os.path.exists("ta_package"):
    os.system('git clone https://github.com/yuval-haim/HugoBot_2.0.git ')


In [2]:
data = pd.read_csv("FAGender.csv")
output_dir = "./output_test_hugobot/"

### Low‑Level API Examples

In [3]:
from ta_package.methods.sax import sax

# Apply SAX discretization with 3 bins.
symbolic_series, states = sax(data, bins=3, per_variable=True)

print("SAX symbolic series:")
display(symbolic_series.head())
print("SAX states:")
print(states)


SAX symbolic series:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,state
0,5,44,6,1.0,1
1,5,44,7,1.0,1
2,5,44,8,1.0,1
3,5,44,9,1.1,2
4,5,44,10,1.1,2


SAX states:
{-1: [-0.43072729929545756, 0.43072729929545744], 1: [-0.43072729929545756, 0.43072729929545744], 2: [-0.43072729929545756, 0.43072729929545744], 3: [-0.43072729929545756, 0.43072729929545744], 4: [-0.43072729929545756, 0.43072729929545744], 5: [-0.43072729929545756, 0.43072729929545744], 6: [-0.43072729929545756, 0.43072729929545744], 39: [-0.43072729929545756, 0.43072729929545744], 40: [-0.43072729929545756, 0.43072729929545744], 41: [-0.43072729929545756, 0.43072729929545744], 42: [-0.43072729929545756, 0.43072729929545744], 43: [-0.43072729929545756, 0.43072729929545744], 44: [-0.43072729929545756, 0.43072729929545744], 55: [-0.43072729929545756, 0.43072729929545744]}


In [4]:
from ta_package.methods.equal_width import equal_width

# Apply Equal Width discretization with 3 bins.
symbolic_series, states = equal_width(data, bins=5, per_variable=True)

print("Equal Width symbolic series:")
display(symbolic_series.head())
print("Equal Width states:")
print(states)



Equal Width symbolic series:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,state
0,5,44,6,1.0,1
1,5,44,7,1.0,1
2,5,44,8,1.0,1
3,5,44,9,1.1,2
4,5,44,10,1.1,2


Equal Width states:
{-1: [0.2, 0.4, 0.6, 0.8], 1: [27.060000000000002, 41.22, 55.379999999999995, 69.54], 2: [12.82, 14.64, 16.46, 18.28], 3: [14.8, 18.6, 22.4, 26.2], 4: [14.8, 18.6, 22.4, 26.2], 5: [12.82, 14.64, 16.46, 18.28], 6: [30.0, 45.0, 60.0, 75.0], 39: [6.98, 8.26, 9.54, 10.82], 40: [88.2, 104.4, 120.6, 136.8], 41: [164.0, 183.0, 202.0, 221.0], 42: [122.0, 169.0, 216.0, 263.0], 43: [3.9400000000000004, 4.180000000000001, 4.42, 4.66], 44: [1.056, 1.242, 1.428, 1.6139999999999999], 55: [0.2, 0.4, 0.6, 0.8]}


In [5]:
from ta_package.methods.gradient import gradient

# Apply Gradient discretization with a gradient window size of 3 and 3 bins.
symbolic_series, states = gradient(data, gradient_window_size=3, bins=3, close_to_zero_percentage = 30, per_variable=True) # 

print("Gradient symbolic series:")
display(symbolic_series.head())
print("Gradient states:")
print(states)


Gradient symbolic series:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,angle,state
1106,5,2,9,20.1,,3
1107,5,2,12,20.1,-2.714073e-14,2
1108,5,2,17,20.1,,3
1109,5,2,24,20.1,,3
1110,5,2,31,20.1,,3


Gradient states:
{1: [-7.736402450050708e-14, 9.06733927158105e-14], 2: [-2.7694552311275474e-14, 1.3114216264098348e-13], 3: [-1.6548494049360421e-13, 9.478146285724866e-14], 4: [6.598843471368585e-15, 5.6518746912134615e-14], 5: [6.039823924107688e-16, 5.556924459380306e-14], 6: [-1.886914958417986e-13, 3.916848495038197e-13], 39: [-3.5318084186670294e-14, 1.880736549725514e-14], 40: [-9.703917325692258e-14, 3.7335939002967587e-13], 41: [-3.3746334281634363e-13, 4.0243156264247916e-13], 42: [-3.6895003126442914e-13, 3.0257195150927984e-13], 43: [-9.718864193329988e-15, 1.3617026525212372e-14], 44: [-5.892725574134889e-16, 5.472459901068694e-15], 55: [nan, nan]}


In [10]:
from ta_package.methods.td4c import td4c

# Apply TD4C using 3 bins, working per variable, with a chosen distance measure.
symbolic_series, states = td4c(data, bins=3, distance_measure="kullback_leibler")

print("TD4C symbolic series:")
display(symbolic_series.head())
print("TD4C states:")
print(states)


TD4C symbolic series:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,state
0,5,44,6,1.0,3
1,5,44,7,1.0,3
2,5,44,8,1.0,3
3,5,44,9,1.1,3
4,5,44,10,1.1,3


TD4C states:
{-1: [1.0], 1: [15.0, 25.0], 2: [20.1], 3: [20.1, 30.0], 4: [20.0, 20.1], 5: [20.1], 6: [28.0, 30.0], 39: [6.3, 6.5], 40: [74.0, 86.0], 41: [150.0, 154.0], 42: [95.0, 119.0], 43: [3.9, 4.1], 44: [0.9, 0.91], 55: [1.0]}


### (b) High‑Level API Example (Single‑Method Mode)

In [11]:
from ta_package import TemporalAbstraction

ta = TemporalAbstraction(data)

# Use the high-level API to apply SAX (single-method mode).
final_result, final_states = ta.apply(method="sax", bins=3, per_variable=True, 
                                      paa="mean", paa_window=5,
                                      split_test=False, 
                                      save_output=True, output_dir= os.path.join(output_dir, "sax"), 
                                      max_gap=1)

print("High-level SAX result:")
display(final_result.head())
print("High-level SAX states:")
print(final_states)


Results saved in directory: ./output_test_hugobot/sax
High-level SAX result:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,state
0,5,2,18.6,20.1,3
1,5,2,42.6,20.1,3
0,5,3,18.6,20.1,1
1,5,3,42.6,20.1,1
0,5,4,18.6,20.1,2


High-level SAX states:
{1: [-0.43072729929545756, 0.43072729929545744], 2: [-0.43072729929545756, 0.43072729929545744], 3: [-0.43072729929545756, 0.43072729929545744], 4: [-0.43072729929545756, 0.43072729929545744], 5: [-0.43072729929545756, 0.43072729929545744], 6: [-0.43072729929545756, 0.43072729929545744], 39: [-0.43072729929545756, 0.43072729929545744], 40: [-0.43072729929545756, 0.43072729929545744], 41: [-0.43072729929545756, 0.43072729929545744], 42: [-0.43072729929545756, 0.43072729929545744], 43: [-0.43072729929545756, 0.43072729929545744], 44: [-0.43072729929545756, 0.43072729929545744], 55: [-0.43072729929545756, 0.43072729929545744]}


### Example of Different Methods for Different Variables (Composite Mode)

In [24]:
# Composite configuration: for variable 1 use SAX, for variable 2 use Gradient.
method_config = {
    1: {"method": "sax", "bins": 3},
    2: {"method": "gradient", "bins": 3, "gradient_window_size": 3, "close_to_zero_percentage": 10},
    "default": {"method": "equal_frequency", "bins": 3}
}

ta = TemporalAbstraction(data)
final_result, final_states = ta.apply(method_config=method_config,
                                      split_test=False,
                                      save_output=True,
                                      output_dir=os.path.join(output_dir, "output_composite"),
                                      max_gap=1)


print("Composite mode symbolic series:")
display(final_result.head())
print("Composite mode states:")
print(final_states)


Results saved in directory: ./output_test_hugobot/output_composite
Composite mode symbolic series:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,StateID,MethodName
0,5,44,6,1.0,1,equal_frequency
1,5,44,7,1.0,1,equal_frequency
2,5,44,8,1.0,1,equal_frequency
3,5,44,9,1.1,2,equal_frequency
4,5,44,10,1.1,2,equal_frequency


Composite mode states:
[{'StateID': 1, 'TemporalPropertyID': 44, 'MethodName': 'equal_frequency', 'BinId': 1, 'BinLow': -inf, 'BinHigh': 1.04}, {'StateID': 2, 'TemporalPropertyID': 44, 'MethodName': 'equal_frequency', 'BinId': 2, 'BinLow': 1.04, 'BinHigh': 1.106666666666666}, {'StateID': 3, 'TemporalPropertyID': 44, 'MethodName': 'equal_frequency', 'BinId': 3, 'BinLow': 1.106666666666666, 'BinHigh': inf}, {'StateID': 4, 'TemporalPropertyID': 55, 'MethodName': 'equal_frequency', 'BinId': 3, 'BinLow': 0.33333333333333304, 'BinHigh': inf}, {'StateID': 5, 'TemporalPropertyID': 55, 'MethodName': 'equal_frequency', 'BinId': 2, 'BinLow': 0.0, 'BinHigh': 0.33333333333333304}, {'StateID': 6, 'TemporalPropertyID': 1, 'MethodName': 'sax', 'BinId': 1, 'BinLow': -inf, 'BinHigh': -0.43072729929545756}, {'StateID': 7, 'TemporalPropertyID': 1, 'MethodName': 'sax', 'BinId': 2, 'BinLow': -0.43072729929545756, 'BinHigh': 0.43072729929545744}, {'StateID': 8, 'TemporalPropertyID': 1, 'MethodName': 'sax', '

### 3. Example of Multiple Methods for All Variables (Composite Mode)

In [25]:
method_config = {
    "default": [
         {"method": "sax", "bins": 3},
         {"method": "equal_width", "bins": 4},
         {"method": "gradient", "bins": 3, "gradient_window_size": 3,}
       ]
}

ta = TemporalAbstraction(data)
final_result, final_states = ta.apply(method_config=method_config,
                                      split_test=False,
                                      save_output=True,
                                      output_dir= os.path.join(output_dir, "multiple_methods"),
                                      max_gap=1)

print("Composite mode (multiple methods for each variable) symbolic series:")
display(final_result.head())
print("Composite mode (multiple methods) states:")
print(final_states)

Results saved in directory: ./output_test_hugobot/multiple_methods
Composite mode (multiple methods for each variable) symbolic series:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,StateID,MethodName
0,5,44,6,1.0,4,equal_width
1,5,44,7,1.0,4,equal_width
2,5,44,8,1.0,4,equal_width
3,5,44,9,1.1,4,equal_width
4,5,44,10,1.1,4,equal_width


Composite mode (multiple methods) states:
[{'StateID': 1, 'TemporalPropertyID': 44, 'MethodName': 'sax', 'BinId': 1, 'BinLow': -inf, 'BinHigh': -0.43072729929545756}, {'StateID': 2, 'TemporalPropertyID': 44, 'MethodName': 'sax', 'BinId': 2, 'BinLow': -0.43072729929545756, 'BinHigh': 0.43072729929545744}, {'StateID': 3, 'TemporalPropertyID': 44, 'MethodName': 'sax', 'BinId': 3, 'BinLow': 0.43072729929545744, 'BinHigh': inf}, {'StateID': 4, 'TemporalPropertyID': 44, 'MethodName': 'equal_width', 'BinId': 1, 'BinLow': -inf, 'BinHigh': 1.1025}, {'StateID': 5, 'TemporalPropertyID': 44, 'MethodName': 'equal_width', 'BinId': 2, 'BinLow': 1.1025, 'BinHigh': 1.335}, {'StateID': 6, 'TemporalPropertyID': 44, 'MethodName': 'equal_width', 'BinId': 4, 'BinLow': 1.5675, 'BinHigh': inf}, {'StateID': 7, 'TemporalPropertyID': 44, 'MethodName': 'gradient', 'BinId': 3, 'BinLow': 5.472459901068694e-15, 'BinHigh': inf}, {'StateID': 8, 'TemporalPropertyID': 44, 'MethodName': 'gradient', 'BinId': 1, 'BinLow': 

### 4. Examples of Using Different PAA Methods in Single Mode

In [4]:
from ta_package import TemporalAbstraction

ta = TemporalAbstraction(data)

final_result, final_states = ta.apply(method="sax", bins=3, per_variable=True, 
                                      paa="mean", paa_window=4, 
                                      split_test=False, 
                                      save_output=True,output_dir=os.path.join(output_dir, "paa_mean"), 
                                      max_gap=1)

print("SAX with PAA mean result:")
display(final_result)

Results saved in directory: ./output_test_hugobot/paa_mean
SAX with PAA mean result:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,state
0,5,2,3.0,20.1,3
1,5,2,8.0,20.1,3
2,5,2,12.0,20.1,3
0,5,3,3.0,20.1,1
1,5,3,8.0,20.1,1
...,...,...,...,...,...
6,8,42,12.0,75.0,1
7,8,42,13.0,75.0,1
8,8,42,14.0,75.0,1
9,8,42,15.0,75.0,1


### 5. Knowledge Base Example

In [6]:
from ta_package import TemporalAbstraction
from ta_package.utils import split_train_test

train_data, test_data = split_train_test(data, train_ratio=0.8)

ta = TemporalAbstraction(train_data)

train_results, train_states = ta.apply(method="sax", bins=3, per_variable=True, 
                                      paa="mean", paa_window=4, 
                                      split_test=False, 
                                      save_output=True,output_dir=os.path.join(output_dir, "train_sax"), 
                                      max_gap=1)

test_ta = TemporalAbstraction(test_data)

test_results, test_states = test_ta.apply(method = "knowledge", train_states=train_states,
                         split_test=False, 
                         save_output=True, output_dir=os.path.join(output_dir, "knowledge_test"),
                         max_gap=1)



Results saved in directory: ./output_test_hugobot/train_sax
Results saved in directory: ./output_test_hugobot/knowledge_test


In [13]:
ta = TemporalAbstraction(data)

final_result, final_states = ta.apply(method="equal_width", bins=3, per_variable=True, 
                                      paa="max", paa_window=4,
                                      split_test=False, 
                                      save_output=True, output_dir=os.path.join(output_dir, "paa_max"), 
                                      max_gap=1)

print("EqualWidth with PAA max result:")
display(final_result)


Results saved in directory: ./output_test_hugobot/paa_max
EqualWidth with PAA max result:


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,state
0,5,2,15.5,20.1,3
1,5,2,37.0,20.1,3
2,5,2,48.0,20.1,3
0,5,3,15.5,20.1,1
1,5,3,37.0,20.1,1
...,...,...,...,...,...
6,8,42,48.5,75.0,1
7,8,42,52.5,75.0,1
8,8,42,56.5,75.0,1
9,8,42,60.5,75.0,1


In [3]:
# only PAA
from ta_package.utils import paa_transform

data_after_paa = paa_transform(data, window_size=3, agg_method="mean")
data_after_paa

Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue
0,5,2,9,20.1
1,5,2,24,20.1
2,5,2,41,20.1
3,5,2,51,20.1
0,5,3,9,20.1
...,...,...,...,...
10,8,42,53,75.0
11,8,42,56,75.0
12,8,42,59,75.0
13,8,42,62,75.0


### 6. Examples of Train-Test Splitting

In [5]:
ta = TemporalAbstraction(data)

# Using Gradient in single-method mode with train-test split.
(final_train, final_test), final_states = ta.apply(method="gradient",
                                                   bins=3,
                                                   per_variable=True,
                                                   gradient_window_size=3,
                                                   split_test=True,
                                                   paa="mean",
                                                   paa_window=5,
                                                   save_output=True,
                                                   output_dir=os.path.join(output_dir, "train_test_gradient"),
                                                   max_gap=1,train_ratio=0.8)

print("Train set (Gradient):")
display(final_train.head())
print("Test set (Gradient):")
display(final_test.head())

Results saved in directory: ./output_test_hugobot/train_test_gradient\train
Results saved in directory: ./output_test_hugobot/train_test_gradient\test
Train set (Gradient):


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,angle,state
0,5,2,2.0,20.1,,3
1,5,2,7.0,20.1,,3
0,5,3,2.0,20.1,,3
1,5,3,7.0,20.1,,3
0,5,4,2.0,20.1,,3


Test set (Gradient):


Unnamed: 0,EntityID,TemporalPropertyID,TimeStamp,TemporalPropertyValue,angle,state
0,8,1,2.0,25.0,,3
0,8,6,2.0,28.0,,3
0,8,40,5.0,101.0,,3
1,8,40,6.0,101.0,-5.212522e-13,1
2,8,40,7.0,101.0,6.210642e-13,2
