# Anomaly Detection <a id="top"></a>

Find all time intervals `[time1, time2]` where `time2-time1 < time_thres` and not nested.

**Testcases**

- [Testcase 1](#testcase1): 5 distinct adid within `time_thres`
- [Testcase 2](#testcase2): 2 sets of 5 distinct adid, each within `time_thres`, but more than `time_thres` apart
- [Testcase 3](#testcase3): 5 non-distinct adid within `time_thres`
- [Testcase 4](#testcase4): 2 sets of 5 non-distinct adid, each within `time_thres`, but more than `time_thres` apart
- [Testcase 5](#testcase5): 2 sets of 5 distinct adid, each within `time_thres`, but at most `time_thres` apart
- [Testcase 6](#testcase6): 2 sets of 2 non-distinct adid, each within `time_thres`, but at most `time_thres` apart
- [Testcase 7](#testcase7): More than 2 sets of overlapping adid, each within `time_thres`

In [53]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

In [54]:
def detect_weird_timeint(data, time_window):
    '''
    '''
    def check_multiple_adid(data, anomalies):
        adids = data.adid.unique()
        num_adid = len(adids)
        token = False

        if num_adid > 1:
            print("anomaly found")
            anomalies.loc[len(anomalies)] = [start_index, end_index, data.time[start_index],
                                            data.time[end_index], num_adid, list(adids)]
        return anomalies
    
    start_index = 0
    end_index = 1
    
    anomalies = pd.DataFrame(columns=["start_index", "end_index", "start_time", "end_time", "num_adid", "adids"])
    anomalies.dtypes
    
    while (start_index < end_index and end_index+1 < len(data)):
        print("-"*50)
        print(start_index, end_index)
        
        if end_index == len(data)-1:
            break
        
        if (data.time[end_index] - data.time[start_index] <= time_window):
            print("current time window not exceeded", str(data.time[end_index] - data.time[start_index]))
            if (data.time[end_index+1] - data.time[start_index] > time_window):
                print("next time window exceeded", str(data.time[end_index] - data.time[start_index]))
                anomalies = check_multiple_adid(data.loc[start_index:end_index], anomalies)

                start_index += 1
                
            end_index += 1
            
        else:
            print("current time window exceeded", str(data.time[end_index] - data.time[start_index]))
            if start_index + 1 == end_index:
                print("timestep = 1")
                start_index += 1
                end_index += 1
                continue
            else:
                print("timestep > 1")
                start_index += 1
    
    print(start_index, end_index)
    
    if end_index == len(data)-1:
        print("end of data")
        if end_index in set(anomalies.end_index):
            return anomalies
        
        while (start_index < end_index):
            print("-"*50)
            print(start_index, end_index)
            if (data.time[end_index] - data.time[start_index] <= time_window):
                print("current time window not exceeded", str(data.time[end_index] - data.time[start_index]))
                anomalies = check_multiple_adid(data.loc[start_index:end_index], anomalies)
                
                return anomalies
            start_index += 1
    
    return anomalies

In [55]:
START_TIME = datetime(2020,1,1)
time_thres = timedelta(minutes=10)
ADIDS = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"]

## Testcase 1 <a id="testcase1"></a>

5 distinct adid within `time_thres`

[Back to top](#top)

In [75]:
testcase1_df = pd.DataFrame(columns=["time", "adid"])

for i in range(5):
    testcase1_df.loc[i,:] = [START_TIME + i*time_thres/10, ADIDS[i]]
    
testcase1_df = testcase1_df.sort_values(by="time").reset_index(drop=True)
testcase1_df

Unnamed: 0,time,adid
0,2020-01-01 00:00:00,A
1,2020-01-01 00:01:00,B
2,2020-01-01 00:02:00,C
3,2020-01-01 00:03:00,D
4,2020-01-01 00:04:00,E


In [76]:
correct1_df = pd.DataFrame(columns=['start_index', 'end_index', 'start_time', 'end_time', 'num_adid', 'adids'])
correct1_df.loc[0,:] = [0, 4, testcase1_df.time[0], testcase1_df.time[4], 5, list(testcase1_df.loc[0:4, "adid"].unique())]
correct1_df = correct1_df.astype({'start_time': 'datetime64[ns]', 'end_time': 'datetime64[ns]'})
correct1_df

  arr_value = np.array(value)


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,4,2020-01-01,2020-01-01 00:04:00,5,"[A, B, C, D, E]"


In [77]:
output1_df = detect_weird_timeint(testcase1_df, timedelta(minutes=10))
output1_df.astype({'start_time': 'datetime64[ns]', 'end_time': 'datetime64[ns]'})
output1_df

--------------------------------------------------
0 1
current time window not exceeded 0:01:00
--------------------------------------------------
0 2
current time window not exceeded 0:02:00
--------------------------------------------------
0 3
current time window not exceeded 0:03:00
0 4
end of data
--------------------------------------------------
0 4
current time window not exceeded 0:04:00
anomaly found


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,4,2020-01-01,2020-01-01 00:04:00,5,"[A, B, C, D, E]"


In [78]:
pd.testing.assert_frame_equal(correct1_df, output1_df)

## Testcase 2 <a id="testcase2"></a>

2 sets of 5 distinct adid, each within `time_thres`, but more than `time_thres` apart

[Back to top](#top)

In [79]:
testcase2_df = pd.DataFrame(columns=["time", "adid"])

for i in range(5):
    testcase2_df.loc[i,:] = [START_TIME + i*time_thres/10, ADIDS[i]]
    
for i in range(5):
    testcase2_df.loc[5+i,:] = [START_TIME + timedelta(minutes=31) + i*time_thres/10, ADIDS[i]]
    
testcase2_df = testcase2_df.sort_values(by="time").reset_index(drop=True)
testcase2_df

Unnamed: 0,time,adid
0,2020-01-01 00:00:00,A
1,2020-01-01 00:01:00,B
2,2020-01-01 00:02:00,C
3,2020-01-01 00:03:00,D
4,2020-01-01 00:04:00,E
5,2020-01-01 00:31:00,A
6,2020-01-01 00:32:00,B
7,2020-01-01 00:33:00,C
8,2020-01-01 00:34:00,D
9,2020-01-01 00:35:00,E


In [87]:
correct2_df = pd.DataFrame(columns=['start_index', 'end_index', 'start_time', 'end_time', 'num_adid', 'adids'])
correct2_df.loc[0,:] = [0, 4, testcase2_df.time[0], testcase2_df.time[4], 5, list(testcase2_df.loc[0:4, "adid"].unique())]
correct2_df.loc[1,:] = [5, 9, testcase2_df.time[5], testcase2_df.time[9], 5, list(testcase2_df.loc[5:9, "adid"].unique())]
correct2_df = correct2_df.astype({'start_time': 'datetime64[ns]', 'end_time': 'datetime64[ns]'})
correct2_df

  arr_value = np.array(value)


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,4,2020-01-01 00:00:00,2020-01-01 00:04:00,5,"[A, B, C, D, E]"
1,5,9,2020-01-01 00:31:00,2020-01-01 00:35:00,5,"[A, B, C, D, E]"


In [85]:
output2_df = detect_weird_timeint(testcase2_df, timedelta(minutes=10))
output2_df

--------------------------------------------------
0 1
current time window not exceeded 0:01:00
--------------------------------------------------
0 2
current time window not exceeded 0:02:00
--------------------------------------------------
0 3
current time window not exceeded 0:03:00
--------------------------------------------------
0 4
current time window not exceeded 0:04:00
next time window exceeded 0:04:00
anomaly found
--------------------------------------------------
1 5
current time window exceeded 0:30:00
timestep > 1
--------------------------------------------------
2 5
current time window exceeded 0:29:00
timestep > 1
--------------------------------------------------
3 5
current time window exceeded 0:28:00
timestep > 1
--------------------------------------------------
4 5
current time window exceeded 0:27:00
timestep = 1
--------------------------------------------------
5 6
current time window not exceeded 0:01:00
--------------------------------------------------
5

Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,4,2020-01-01 00:00:00,2020-01-01 00:04:00,5,"[A, B, C, D, E]"
1,5,9,2020-01-01 00:31:00,2020-01-01 00:35:00,5,"[A, B, C, D, E]"


In [88]:
pd.testing.assert_frame_equal(correct2_df, output2_df)

## Testcase 3 <a id="testcase3"></a>

5 non-distinct adid within `time_thres`

[Back to top](#top)

In [89]:
testcase3_df = pd.DataFrame(columns=["time", "adid"])

for i in range(5):
    testcase3_df.loc[i,:] = [START_TIME + i*time_thres/10, ADIDS[i%3]]
    
testcase3_df = testcase3_df.sort_values(by="time").reset_index(drop=True)
testcase3_df

Unnamed: 0,time,adid
0,2020-01-01 00:00:00,A
1,2020-01-01 00:01:00,B
2,2020-01-01 00:02:00,C
3,2020-01-01 00:03:00,A
4,2020-01-01 00:04:00,B


In [90]:
correct3_df = pd.DataFrame(columns=['start_index', 'end_index', 'start_time', 'end_time', 'num_adid', 'adids'])
correct3_df.loc[0,:] = [0, 4, testcase3_df.time[0], testcase3_df.time[4], 3, list(testcase3_df.loc[0:4, "adid"].unique())]
correct3_df = correct3_df.astype({'start_time': 'datetime64[ns]', 'end_time': 'datetime64[ns]'})
correct3_df

  arr_value = np.array(value)


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,4,2020-01-01,2020-01-01 00:04:00,3,"[A, B, C]"


In [91]:
output3_df = detect_weird_timeint(testcase3_df, timedelta(minutes=10))
output3_df

--------------------------------------------------
0 1
current time window not exceeded 0:01:00
--------------------------------------------------
0 2
current time window not exceeded 0:02:00
--------------------------------------------------
0 3
current time window not exceeded 0:03:00
0 4
end of data
--------------------------------------------------
0 4
current time window not exceeded 0:04:00
anomaly found


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,4,2020-01-01,2020-01-01 00:04:00,3,"[A, B, C]"


In [92]:
pd.testing.assert_frame_equal(correct3_df, output3_df)

## Testcase 4 <a id="testcase4"></a>

2 sets of 5 non-distinct adid, each within `time_thres`, but more than `time_thres` apart

[Back to top](#top)

In [94]:
testcase4_df = pd.DataFrame(columns=["time", "adid"])

for i in range(5):
    testcase4_df.loc[i,:] = [START_TIME + i*time_thres/10, ADIDS[i%3]]
    
for i in range(5):
    testcase4_df.loc[5+i,:] = [START_TIME + timedelta(minutes=31) + i*time_thres/10, ADIDS[i%3]]
    
testcase4_df = testcase4_df.sort_values(by="time").reset_index(drop=True)
testcase4_df

Unnamed: 0,time,adid
0,2020-01-01 00:00:00,A
1,2020-01-01 00:01:00,B
2,2020-01-01 00:02:00,C
3,2020-01-01 00:03:00,A
4,2020-01-01 00:04:00,B
5,2020-01-01 00:31:00,A
6,2020-01-01 00:32:00,B
7,2020-01-01 00:33:00,C
8,2020-01-01 00:34:00,A
9,2020-01-01 00:35:00,B


In [97]:
correct4_df = pd.DataFrame(columns=['start_index', 'end_index', 'start_time', 'end_time', 'num_adid', 'adids'])
correct4_df.loc[0,:] = [0, 4, testcase4_df.time[0], testcase4_df.time[4], 3, list(testcase4_df.loc[0:4, "adid"].unique())]
correct4_df.loc[1,:] = [5, 9, testcase4_df.time[5], testcase4_df.time[9], 3, list(testcase4_df.loc[5:9, "adid"].unique())]
correct4_df = correct4_df.astype({'start_time': 'datetime64[ns]', 'end_time': 'datetime64[ns]'})
correct4_df

  arr_value = np.array(value)


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,4,2020-01-01 00:00:00,2020-01-01 00:04:00,3,"[A, B, C]"
1,5,9,2020-01-01 00:31:00,2020-01-01 00:35:00,3,"[A, B, C]"


In [98]:
output4_df = detect_weird_timeint(testcase4_df, timedelta(minutes=10))
output4_df

--------------------------------------------------
0 1
current time window not exceeded 0:01:00
--------------------------------------------------
0 2
current time window not exceeded 0:02:00
--------------------------------------------------
0 3
current time window not exceeded 0:03:00
--------------------------------------------------
0 4
current time window not exceeded 0:04:00
next time window exceeded 0:04:00
anomaly found
--------------------------------------------------
1 5
current time window exceeded 0:30:00
timestep > 1
--------------------------------------------------
2 5
current time window exceeded 0:29:00
timestep > 1
--------------------------------------------------
3 5
current time window exceeded 0:28:00
timestep > 1
--------------------------------------------------
4 5
current time window exceeded 0:27:00
timestep = 1
--------------------------------------------------
5 6
current time window not exceeded 0:01:00
--------------------------------------------------
5

Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,4,2020-01-01 00:00:00,2020-01-01 00:04:00,3,"[A, B, C]"
1,5,9,2020-01-01 00:31:00,2020-01-01 00:35:00,3,"[A, B, C]"


In [99]:
pd.testing.assert_frame_equal(correct4_df, output4_df)

## Testcase 5 <a id="testcase5"></a>

2 sets of 5 distinct adid, each within `time_thres`, but at most `time_thres` apart

[Back to top](#top)

In [101]:
testcase5_df = pd.DataFrame(columns=["time", "adid"])

for i in range(5):
    testcase5_df.loc[i,:] = [START_TIME + i*time_thres/10, ADIDS[i]]
    
for i in range(5):
    testcase5_df.loc[5+i,:] = [testcase5_df.time[2] + timedelta(seconds=5) + i*timedelta(seconds=177), ADIDS[i+5]]
    
testcase5_df = testcase5_df.sort_values(by="time").reset_index(drop=True)
testcase5_df

Unnamed: 0,time,adid
0,2020-01-01 00:00:00,A
1,2020-01-01 00:01:00,B
2,2020-01-01 00:02:00,C
3,2020-01-01 00:02:05,F
4,2020-01-01 00:03:00,D
5,2020-01-01 00:04:00,E
6,2020-01-01 00:05:02,G
7,2020-01-01 00:07:59,H
8,2020-01-01 00:10:56,I
9,2020-01-01 00:13:53,J


In [102]:
correct5_df = pd.DataFrame(columns=['start_index', 'end_index', 'start_time', 'end_time', 'num_adid', 'adids'])
correct5_df.loc[0,:] = [0, 7, testcase5_df.time[0], testcase5_df.time[7], 8, list(testcase5_df.loc[0:7, "adid"].unique())]
correct5_df.loc[1,:] = [1, 8, testcase5_df.time[1], testcase5_df.time[8], 8, list(testcase5_df.loc[1:8, "adid"].unique())]
correct5_df.loc[2,:] = [5, 9, testcase5_df.time[5], testcase5_df.time[9], 5, list(testcase5_df.loc[5:9, "adid"].unique())]
correct5_df = correct5_df.astype({'start_time': 'datetime64[ns]', 'end_time': 'datetime64[ns]'})
correct5_df

  arr_value = np.array(value)


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,7,2020-01-01 00:00:00,2020-01-01 00:07:59,8,"[A, B, C, F, D, E, G, H]"
1,1,8,2020-01-01 00:01:00,2020-01-01 00:10:56,8,"[B, C, F, D, E, G, H, I]"
2,5,9,2020-01-01 00:04:00,2020-01-01 00:13:53,5,"[E, G, H, I, J]"


In [103]:
output5_df = detect_weird_timeint(testcase5_df, timedelta(minutes=10))
output5_df

--------------------------------------------------
0 1
current time window not exceeded 0:01:00
--------------------------------------------------
0 2
current time window not exceeded 0:02:00
--------------------------------------------------
0 3
current time window not exceeded 0:02:05
--------------------------------------------------
0 4
current time window not exceeded 0:03:00
--------------------------------------------------
0 5
current time window not exceeded 0:04:00
--------------------------------------------------
0 6
current time window not exceeded 0:05:02
--------------------------------------------------
0 7
current time window not exceeded 0:07:59
next time window exceeded 0:07:59
anomaly found
--------------------------------------------------
1 8
current time window not exceeded 0:09:56
next time window exceeded 0:09:56
anomaly found
2 9
end of data
--------------------------------------------------
2 9
--------------------------------------------------
3 9
----------

Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,7,2020-01-01 00:00:00,2020-01-01 00:07:59,8,"[A, B, C, F, D, E, G, H]"
1,1,8,2020-01-01 00:01:00,2020-01-01 00:10:56,8,"[B, C, F, D, E, G, H, I]"
2,5,9,2020-01-01 00:04:00,2020-01-01 00:13:53,5,"[E, G, H, I, J]"


In [104]:
pd.testing.assert_frame_equal(correct5_df, output5_df)

## Testcase 6 <a id="testcase6"></a>

2 sets of 2 non-distinct adid, each within `time_thres`, but at most `time_thres` apart

[Back to top](#top)

In [106]:
testcase6_df = pd.DataFrame(columns=["time", "adid"])

testcase6_df.loc[0,:] = [START_TIME, ADIDS[0]]

for i in range(1, 5):
    testcase6_df.loc[i,:] = [START_TIME + i*time_thres/10, ADIDS[1]]
    
for i in range(4):
    testcase6_df.loc[5+i,:] = [testcase6_df.time[2] + timedelta(seconds=5) + i*timedelta(seconds=207), ADIDS[1]]

testcase6_df.loc[9,:] = [testcase6_df.time[2] + timedelta(seconds=5) + 4*timedelta(seconds=207), ADIDS[0]]
    
testcase6_df = testcase6_df.sort_values(by="time").reset_index(drop=True)
testcase6_df

Unnamed: 0,time,adid
0,2020-01-01 00:00:00,A
1,2020-01-01 00:01:00,B
2,2020-01-01 00:02:00,B
3,2020-01-01 00:02:05,B
4,2020-01-01 00:03:00,B
5,2020-01-01 00:04:00,B
6,2020-01-01 00:05:32,B
7,2020-01-01 00:08:59,B
8,2020-01-01 00:12:26,B
9,2020-01-01 00:15:53,A


In [112]:
correct6_df = pd.DataFrame(columns=['start_index', 'end_index', 'start_time', 'end_time', 'num_adid', 'adids'])
correct6_df.loc[0,:] = [0, 7, testcase6_df.time[0], testcase6_df.time[7], 2, list(testcase6_df.loc[0:7, "adid"].unique())]
correct6_df.loc[1,:] = [7, 9, testcase6_df.time[7], testcase6_df.time[9], 2, list(testcase6_df.loc[6:9, "adid"].unique())]
correct6_df = correct6_df.astype({'start_time': 'datetime64[ns]', 'end_time': 'datetime64[ns]'})
correct6_df

  arr_value = np.array(value)


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,7,2020-01-01 00:00:00,2020-01-01 00:08:59,2,"[A, B]"
1,7,9,2020-01-01 00:08:59,2020-01-01 00:15:53,2,"[B, A]"


In [110]:
output6_df = detect_weird_timeint(testcase6_df, timedelta(minutes=10))
output6_df

--------------------------------------------------
0 1
current time window not exceeded 0:01:00
--------------------------------------------------
0 2
current time window not exceeded 0:02:00
--------------------------------------------------
0 3
current time window not exceeded 0:02:05
--------------------------------------------------
0 4
current time window not exceeded 0:03:00
--------------------------------------------------
0 5
current time window not exceeded 0:04:00
--------------------------------------------------
0 6
current time window not exceeded 0:05:32
--------------------------------------------------
0 7
current time window not exceeded 0:08:59
next time window exceeded 0:08:59
anomaly found
--------------------------------------------------
1 8
current time window exceeded 0:11:26
timestep > 1
--------------------------------------------------
2 8
current time window exceeded 0:10:26
timestep > 1
--------------------------------------------------
3 8
current time wi

Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,7,2020-01-01 00:00:00,2020-01-01 00:08:59,2,"[A, B]"
1,7,9,2020-01-01 00:08:59,2020-01-01 00:15:53,2,"[B, A]"


In [113]:
pd.testing.assert_frame_equal(correct6_df, output6_df)

## Testcase 7 <a id="testcase7"></a>

More than 2 sets of overlapping adid, each within time_thres

[Back to top](#top)

In [131]:
testcase7_df = pd.DataFrame(columns=["time", "adid"])

for i in range(4):
    testcase7_df.loc[i,:] = [START_TIME + i*timedelta(seconds=207), ADIDS[i%3]]
    
for i in range(4):
    testcase7_df.loc[i+4,:] = [testcase7_df.time[1] + timedelta(minutes=3) + i*time_thres/10, ADIDS[i%3]]
    
for i in range(4):
    testcase7_df.loc[i+8,:] = [testcase7_df.time[3] + timedelta(minutes=5) + i*time_thres/4, ADIDS[1]]
    
testcase7_df = testcase7_df.sort_values(by="time").reset_index(drop=True)
testcase7_df

Unnamed: 0,time,adid
0,2020-01-01 00:00:00,A
1,2020-01-01 00:03:27,B
2,2020-01-01 00:06:27,A
3,2020-01-01 00:06:54,C
4,2020-01-01 00:07:27,B
5,2020-01-01 00:08:27,C
6,2020-01-01 00:09:27,A
7,2020-01-01 00:10:21,A
8,2020-01-01 00:15:21,B
9,2020-01-01 00:17:51,B


In [133]:
correct7_df = pd.DataFrame(columns=['start_index', 'end_index', 'start_time', 'end_time', 'num_adid', 'adids'])
correct7_df.loc[0,:] = [0, 6, testcase7_df.time[0], testcase7_df.time[6], 3, list(testcase7_df.loc[0:6, "adid"].unique())]
correct7_df.loc[1,:] = [1, 7, testcase7_df.time[1], testcase7_df.time[7], 3, list(testcase7_df.loc[1:7, "adid"].unique())]
correct7_df.loc[2,:] = [2, 8, testcase7_df.time[2], testcase7_df.time[8], 3, list(testcase7_df.loc[2:8, "adid"].unique())]
correct7_df.loc[3,:] = [5, 9, testcase7_df.time[5], testcase7_df.time[9], 3, list(testcase7_df.loc[5:9, "adid"].unique())]
correct7_df.loc[4,:] = [7, 10, testcase7_df.time[7], testcase7_df.time[10], 2, list(testcase7_df.loc[7:10, "adid"].unique())]
correct7_df = correct7_df.astype({'start_time': 'datetime64[ns]', 'end_time': 'datetime64[ns]'})
correct7_df

  arr_value = np.array(value)


Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,6,2020-01-01 00:00:00,2020-01-01 00:09:27,3,"[A, B, C]"
1,1,7,2020-01-01 00:03:27,2020-01-01 00:10:21,3,"[B, A, C]"
2,2,8,2020-01-01 00:06:27,2020-01-01 00:15:21,3,"[A, C, B]"
3,5,9,2020-01-01 00:08:27,2020-01-01 00:17:51,3,"[C, A, B]"
4,7,10,2020-01-01 00:10:21,2020-01-01 00:20:21,2,"[A, B]"


In [134]:
output7_df = detect_weird_timeint(testcase7_df, timedelta(minutes=10))
output7_df

--------------------------------------------------
0 1
current time window not exceeded 0:03:27
--------------------------------------------------
0 2
current time window not exceeded 0:06:27
--------------------------------------------------
0 3
current time window not exceeded 0:06:54
--------------------------------------------------
0 4
current time window not exceeded 0:07:27
--------------------------------------------------
0 5
current time window not exceeded 0:08:27
--------------------------------------------------
0 6
current time window not exceeded 0:09:27
next time window exceeded 0:09:27
anomaly found
--------------------------------------------------
1 7
current time window not exceeded 0:06:54
next time window exceeded 0:06:54
anomaly found
--------------------------------------------------
2 8
current time window not exceeded 0:08:54
next time window exceeded 0:08:54
anomaly found
--------------------------------------------------
3 9
current time window exceeded 0:10

Unnamed: 0,start_index,end_index,start_time,end_time,num_adid,adids
0,0,6,2020-01-01 00:00:00,2020-01-01 00:09:27,3,"[A, B, C]"
1,1,7,2020-01-01 00:03:27,2020-01-01 00:10:21,3,"[B, A, C]"
2,2,8,2020-01-01 00:06:27,2020-01-01 00:15:21,3,"[A, C, B]"
3,5,9,2020-01-01 00:08:27,2020-01-01 00:17:51,3,"[C, A, B]"
4,7,10,2020-01-01 00:10:21,2020-01-01 00:20:21,2,"[A, B]"


In [135]:
pd.testing.assert_frame_equal(correct7_df, output7_df)