### Project: Analyze fluctuations in HTTP request latency data to determine reliability

Appendix:
- Prepare data:
	- Create sample data from script (data has interval time is 30 second)

- Analyze data
	- Load data (.csv) to dataframe
	- Standardize data
	    - Set index for dataframe is timestamp
	- Explore data
	    - Overview dataset by summary buckets in time series
        - Identify issue about latency http request on dataset -> Result about time range issue occur
    - SLI Calculator
        - Simulator rate() on rolling windows
        - Calculate percentile: [50th, 90th, 95th, 99th] or latency in time vector
	
- Apply model

In [96]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import matplotlib.pyplot as plt

pd.options.mode.chained_assignment = None

In [114]:
# Load dataset from CSV file
root = pd.read_csv('../statics/csv/request_data_with_anomaly_pod_instance.csv', index_col=0)

In [117]:
root.head()

Unnamed: 0_level_0,path,method,controller_pod,service_name,le,count
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-02 10:42:00,/home,GET,pod-2,process_log,inf,1
2024-01-02 10:42:00,/home,GET,pod-2,process_log,1.0,1
2024-01-02 10:42:00,/home,GET,pod-2,process_log,2.0,1
2024-01-02 10:42:00,/users,GET,pod-2,process_log,inf,2
2024-01-02 10:42:00,/users,GET,pod-2,process_log,0.5,2


In [116]:
# Set index for dataframe is timestamp
root = root.set_index("timestamp")
# Convert buckets to string
root['le'] = root['le'].astype(str)

In [118]:
grouped = ['service_name', 'controller_pod', 'path', 'method', 'le']

In [159]:
root.query("path == '/users' and controller_pod == 'pod-1'").loc['2024-01-02 10:56:00' : '2024-01-02 10:58:00']

Unnamed: 0_level_0,path,method,controller_pod,service_name,le,count
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-02 10:56:00,/users,GET,pod-1,process_log,inf,120
2024-01-02 10:56:00,/users,GET,pod-1,process_log,0.1,21
2024-01-02 10:56:00,/users,GET,pod-1,process_log,0.25,40
2024-01-02 10:56:00,/users,GET,pod-1,process_log,0.5,81
2024-01-02 10:56:00,/users,GET,pod-1,process_log,1.0,120
2024-01-02 10:56:00,/users,GET,pod-1,process_log,2.0,120
2024-01-02 10:56:30,/users,GET,pod-1,process_log,inf,124
2024-01-02 10:56:30,/users,GET,pod-1,process_log,0.25,42
2024-01-02 10:56:30,/users,GET,pod-1,process_log,0.5,84
2024-01-02 10:56:30,/users,GET,pod-1,process_log,1.0,124


In [None]:
fig, ax = plt.subplots(figsize=(8,6))
for label, df in root.groupby(grouped):
    df['count'].plot(kind="hist", ax=ax, label=label, x="le")
plt.legend()