In [1]:
# Importing Necessary Libraries
import numpy as np
import pandas as pd
import os, time
import configparser
import influxdb_client

from datetime import datetime
from itertools import product
from influxdb_client import InfluxDBClient, Point, WritePrecision, WriteOptions
from influxdb_client import BucketsApi, Bucket, BucketRetentionRules
from influxdb_client.client.write_api import SYNCHRONOUS
from influxdb_client.rest import ApiException

In [20]:
def write_time_series_data(df: pd.DataFrame, 
                           bucket: str, 
                           measurement: str, 
                           tag_name: list
                           ) -> None:
        """
        Writes time series data to an InfluxDB bucket.

        Args:
            df (pd.DataFrame): The data to be written to InfluxDB. It should have a 'Timestamp' column in 
                            '%Y-%m-%d %H:%M:%S' format.
            bucket (str): The name of the bucket in InfluxDB to which data will be written.
            measurement (str): The name of the measurement or table within the bucket.
            tag_name (list): A list of tags to be assigned to the data.

        Returns:
            None
        """

        if 'Timestamp' not in df.columns:
            df.rename(columns={df.columns[0]: 'Timestamp'}, inplace=True)

        df['Timestamp'] = pd.to_datetime(df['Timestamp'], format="%d-%m-%Y %H:%M:%S")

        df_grouped = df.groupby(df['Timestamp']).cumcount()
        df['Timestamp'] += pd.to_timedelta(df_grouped, unit='s')
        start = time.time()
        with InfluxDBClient(url=influx_url, token=influx_token, org=influx_org) as client:
            with client.write_api() as write_api:
                try:
                    write_api.write(
                        bucket=bucket,
                        record=df,
                        data_frame_measurement_name=measurement,
                        data_frame_tag_columns=tag_name,
                        data_frame_field_columns=["_value"],
                        data_frame_timestamp_column="Timestamp",
                    )
                    print("Time-series data successfully pushed to InfluxDB.")
                except Exception as e:
                    print(f"Error while writing to InfluxDB: {e}")
                    raise e  # Re-raise exception to notify calling function
        end = time.time()
        print(end - start)

In [3]:
influx_token="sP8yUA0pOOBasWUk9GN9oeqEiZuLDJ31VcCNZlw6WAw0mZO_qCmLw-ztH0U2UXU_5x1ig1qoE0S9DNxrOlfneg=="
influx_org = "self"
influx_url = "http://localhost:8086/"

In [4]:
path = r"data/credit_card_transactions.csv"

In [5]:
df = pd.read_csv(path, index_col= "Unnamed: 0")
df

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
01-09-2013 00:00:00,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
01-09-2013 00:00:00,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
01-09-2013 00:01:00,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
01-09-2013 00:01:00,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
01-09-2013 00:02:00,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29-12-2013 23:46:00,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,4.356170,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
29-12-2013 23:47:00,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,-0.975926,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
29-12-2013 23:48:00,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,-0.484782,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
29-12-2013 23:48:00,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,-0.399126,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [22]:
ff = list(df.columns) 
ff = ['Class']
c = 0
for i in ff:
    gf = pd.DataFrame()
    gf = pd.DataFrame(df[i])
    gf['features'] = "Anomaly"
    gf.rename(columns={i: '_value'}, inplace=True)
    gf['_value'] = gf['_value'].astype(float)  # Convert _value to float
    gf = gf.reset_index()
    if gf.shape[1]>=2:
        print(f"{c} \t {datetime.now()} \t {gf.iloc[0,2]} \t {(gf.shape[0])}")
        print(gf)
        write_time_series_data(gf, "credit_card_transactions", "transactions", [gf.columns[2]])
    c+=1
    print()

0 	 2023-11-23 14:50:33.114338 	 Anomaly 	 284807
                      index  _value features
0       01-09-2013 00:00:00     0.0  Anomaly
1       01-09-2013 00:00:00     0.0  Anomaly
2       01-09-2013 00:01:00     0.0  Anomaly
3       01-09-2013 00:01:00     0.0  Anomaly
4       01-09-2013 00:02:00     0.0  Anomaly
...                     ...     ...      ...
284802  29-12-2013 23:46:00     0.0  Anomaly
284803  29-12-2013 23:47:00     0.0  Anomaly
284804  29-12-2013 23:48:00     0.0  Anomaly
284805  29-12-2013 23:48:00     0.0  Anomaly
284806  29-12-2013 23:52:00     0.0  Anomaly

[284807 rows x 3 columns]
Time-series data successfully pushed to InfluxDB.
6.161337375640869

