# Tinyflux: Example with fictious temperature measurements

## Import Python Libraries und Settings

In [70]:
# Libraries
import pandas as pd
import csv
from tinyflux import TinyFlux, Point, FieldQuery, TagQuery, TimeQuery
from datetime import datetime, timezone, timedelta

# Settings
import warnings
warnings.filterwarnings("ignore")

# Abkürzungen für Query-Typen
time = TimeQuery()
tags = TagQuery()
field = FieldQuery()

## Load function for creating a dataframe from query results into memory

In [71]:
# Define the function to transform the list of Point objects into a DataFrame
def points_to_dataframe(points_list):
    data = []
    
    # Extract the relevant information
    for point in points_list:
        data.append({
            "time": point.time,  # Access time attribute
            "sensor_id": point.tags['sensor_id'],  # Access sender_id from tags dictionary
            "temperature": point.fields['temperature'],  # Access receiver_id from tags dictionary
            "status": point.tags['status']  # Access status from tags dictionary
        })

    # Create a DataFrame
    df = pd.DataFrame(data)

    # Convert time to datetime
    df['time'] = pd.to_datetime(df['time'])

    return df

## Initialize the TinyFlux database

In [72]:
# Erstellung der Tinyflux-Datenbank (wird bereitgestellt)
db = TinyFlux("temperatures.db")

## Importing a CSV file and show first descriptive statistics

In [73]:
# CSV-Datei in Dataframe laden
df = pd.read_csv('example_data/synthetic_temperature_readings.csv')

# Das Format des Zeitstempels in Pandas setzen
df['time']= pd.to_datetime(df['time'], format='mixed')

In [74]:
# Erste 10 Records des Dataframes anzeigen
df.head(10)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 00:00:00,52,20.377761,operational
1,2023-01-01 08:44:41,93,2.217658,maintenance
2,2023-01-01 17:29:22,15,14.912385,offline
3,2023-01-02 02:14:03,72,6.517424,operational
4,2023-01-02 10:58:44,61,36.684591,operational
5,2023-01-02 19:43:25,21,-9.623282,operational
6,2023-01-03 04:28:06,83,1.26664,operational
7,2023-01-03 13:12:47,87,8.267841,operational
8,2023-01-03 21:57:28,75,14.39049,operational
9,2023-01-04 06:42:09,75,32.540876,operational


In [75]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   time         1000 non-null   datetime64[ns]
 1   sensor_id    1000 non-null   int64         
 2   temperature  1000 non-null   float64       
 3   status       1000 non-null   object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 31.4+ KB


In [76]:
# Deskriptive Statistiken des Dataframes
df.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,1000,1000.0,1000.0,1000
unique,,,,3
top,,,,operational
freq,,,,858
mean,2023-07-01 23:59:59.513999872,49.56,15.392825,
min,2023-01-01 00:00:00,1.0,-9.988124,
25%,2023-04-01 23:59:59.750000128,24.0,2.945454,
50%,2023-07-01 23:59:59.500000,50.0,16.125006,
75%,2023-09-30 23:59:59.249999872,75.0,27.474836,
max,2023-12-31 00:00:00,99.0,39.967675,


## Writing a CSV file into Tinyflux

In [77]:
# Read CSV and insert data into TinyFlux
with open("example_data/synthetic_temperature_readings.csv", mode="r") as file:
    reader = csv.DictReader(file)
    for row in reader:
        point = Point(
            time=datetime.strptime(row["time"], "%Y-%m-%d %H:%M:%S"),
            measurement="blockchain_transactions",
            fields={
                "temperature": float(row["temperature"])
                },  # Only numeric fields
            tags={
                "sensor_id": row["sensor_id"],
                "status": row["status"]  # Move non-numeric fields to tags
            }
        )
        db.insert(point)

## Writing manually into Tinyflux

In [78]:
# Create manual records
p1 = Point(
    time=datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
    tags={"sensor_id": "20", "status": "operational"},
    fields={"temperature": 15.240971}
)

p2 = Point(
    time=datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
    tags={"sensor_id": "30", "status": "maintenance"},
    fields={"temperature": 20.517200}
)

# Insert into the DB.
db.insert_multiple([p1, p2])

2

## Querying by Time

In [111]:
# Definieren der Query
time_query_conditions = (time >= datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc))

# Datenabfrage mittels der definierten Query
my_results = db.search(time_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_time = points_to_dataframe(my_results)
df_result_time

Unnamed: 0,time,sensor_id,temperature,status
0,2024-01-01 00:00:00+00:00,20,15.240971,operational
1,2024-01-01 00:00:00+00:00,30,20.5172,maintenance
2,2024-01-01 00:00:00+00:00,20,15.240971,operational
3,2024-01-01 00:00:00+00:00,30,20.5172,maintenance
4,2024-01-01 00:00:00+00:00,20,15.240971,operational
5,2024-01-01 00:00:00+00:00,30,20.5172,maintenance


In [80]:
df_result_time.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,6,6.0,6.0,6
unique,,2.0,,2
top,,20.0,,operational
freq,,3.0,,3
mean,2024-01-01 00:00:00+00:00,,17.879085,
min,2024-01-01 00:00:00+00:00,,15.240971,
25%,2024-01-01 00:00:00+00:00,,15.240971,
50%,2024-01-01 00:00:00+00:00,,17.879085,
75%,2024-01-01 00:00:00+00:00,,20.5172,
max,2024-01-01 00:00:00+00:00,,20.5172,


## Querying by Tag

In [81]:
df.status.unique()

array(['operational', 'maintenance', 'offline'], dtype=object)

In [82]:
# Definieren der Queries
tag_query_conditions_operational = (tags.status == "operational")
tag_query_conditions_maintenance = (tags.status == "maintenance")
tag_query_conditions_offline = (tags.status == "offline")

# Datenabfrage mittels der definierten Queries
my_results_operational = db.search(tag_query_conditions_operational)
my_results_maintenance = db.search(tag_query_conditions_maintenance)
my_results_offline = db.search(tag_query_conditions_offline)

# Erstellung eines Dataframes aus den Query-Resultaten
df_result_tag_operational = points_to_dataframe(my_results_operational)
df_result_tag_maintenance = points_to_dataframe(my_results_maintenance)
df_result_tag_offline = points_to_dataframe(my_results_offline)

### Dataframe with operational status

In [83]:
df_result_tag_operational.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2022-12-31 23:00:00+00:00,52,20.377761,operational
1,2022-12-31 23:00:00+00:00,52,20.377761,operational


In [84]:
df_result_tag_operational.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,2577,2577.0,2577.0,2577
unique,,99.0,,1
top,,92.0,,operational
freq,,54.0,,2577
mean,2023-07-01 00:07:47.378347264+00:00,,15.635,
min,2022-12-31 23:00:00+00:00,,-9.988124,
25%,2023-04-01 06:41:48+00:00,,3.584349,
50%,2023-07-01 00:08:17+00:00,,16.513257,
75%,2023-09-30 02:19:27+00:00,,27.662311,
max,2024-01-01 00:00:00+00:00,,39.967675,


### Dataframe with maintenance status

In [85]:
df_result_tag_maintenance.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 07:44:41+00:00,93,2.217658,maintenance
1,2023-01-01 07:44:41+00:00,93,2.217658,maintenance


In [86]:
df_result_tag_maintenance.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,270,270.0,270.0,270
unique,,64.0,,1
top,,99.0,,maintenance
freq,,9.0,,270
mean,2023-07-09 14:13:52.411111168+00:00,,13.510619,
min,2023-01-01 07:44:41+00:00,,-9.745893,
25%,2023-04-22 09:53:30+00:00,,2.217658,
50%,2023-07-04 15:35:07.500000+00:00,,12.917043,
75%,2023-10-11 09:24:41+00:00,,26.892788,
max,2024-01-01 00:00:00+00:00,,39.606666,


### Dataframe with offline status

In [87]:
df_result_tag_offline.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 16:29:22+00:00,15,14.912385,offline
1,2023-01-01 16:29:22+00:00,15,14.912385,offline


In [88]:
df_result_tag_maintenance.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,270,270.0,270.0,270
unique,,64.0,,1
top,,99.0,,maintenance
freq,,9.0,,270
mean,2023-07-09 14:13:52.411111168+00:00,,13.510619,
min,2023-01-01 07:44:41+00:00,,-9.745893,
25%,2023-04-22 09:53:30+00:00,,2.217658,
50%,2023-07-04 15:35:07.500000+00:00,,12.917043,
75%,2023-10-11 09:24:41+00:00,,26.892788,
max,2024-01-01 00:00:00+00:00,,39.606666,


## Querying by Field

In [89]:
# Definieren der Queries
field_query_conditions = (field.temperature > 0)

# Datenabfrage mittels der definierten Queries
my_results = db.search(field_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_field = points_to_dataframe(my_results)

print("\n Dataframe with temperature measurements > 0:")
df_result_field.head(2)


 Dataframe with temperature measurements > 0:


Unnamed: 0,time,sensor_id,temperature,status
0,2022-12-31 23:00:00+00:00,52,20.377761,operational
1,2022-12-31 23:00:00+00:00,52,20.377761,operational


In [90]:
df_result_field.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,2430,2430.0,2430.0,2430
unique,,99.0,,3
top,,62.0,,operational
freq,,54.0,,2088
mean,2023-07-01 13:14:36.150617344+00:00,,20.258389,
min,2022-12-31 23:00:00+00:00,,0.017957,
25%,2023-03-30 02:13:41+00:00,,11.101803,
50%,2023-07-01 00:08:17+00:00,,19.900241,
75%,2023-09-30 02:19:27+00:00,,29.714472,
max,2024-01-01 00:00:00+00:00,,39.967675,


## Variations of combining queries

### Querying by Time and Tag

In [112]:
# Querying all temperatures, that were measured before Mach 31 2023 with the status "operational"

# Definieren der Queries
time_query_conditions = (time < datetime(2023, 3, 31, 0, 0, 0, tzinfo=timezone.utc))
tag_query_conditions = (tags.status == "operational")

# Datenabfrage mittels der definierten Queries
my_results = db.search(time_query_conditions & tag_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_time_tag = points_to_dataframe(my_results)
df_result_time_tag.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2022-12-31 23:00:00+00:00,52,20.377761,operational
1,2022-12-31 23:00:00+00:00,52,20.377761,operational


In [113]:
df_result_time_tag.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,633,633.0,633.0,633
unique,,90.0,,1
top,,62.0,,operational
freq,,30.0,,633
mean,2023-02-14 06:33:52.061611264+00:00,,16.409696,
min,2022-12-31 23:00:00+00:00,,-9.864455,
25%,2023-01-23 04:25:45+00:00,,5.573659,
50%,2023-02-13 07:37:28+00:00,,17.142466,
75%,2023-03-09 00:01:58+00:00,,28.388975,
max,2023-03-30 19:43:03+00:00,,39.884631,


### Querying by Tag and Field

In [114]:
# Querying all temperatures higher than 20 degrees and with the status "maintenance"

# Definieren der Queries
tag_query_conditions = (tags.status == "maintenance")
field_query_conditions = (field.temperature > 20.0)

# Datenabfrage mittels der definierten Queries
my_results = db.search(tag_query_conditions & field_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_tag_field = points_to_dataframe(my_results)
df_result_tag_field.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-08 23:23:03+00:00,21,38.753358,maintenance
1,2023-01-08 23:23:03+00:00,21,38.753358,maintenance


In [115]:
df_result_tag_field.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,96,96.0,96.0,96
unique,,29.0,,1
top,,62.0,,maintenance
freq,,6.0,,96
mean,2023-07-24 02:58:37.749999872+00:00,,29.76959,
min,2023-01-08 23:23:03+00:00,,20.5172,
25%,2023-05-03 08:14:02.500000+00:00,,26.682858,
50%,2023-08-10 02:03:35.500000+00:00,,29.882287,
75%,2023-10-20 18:35:18.249999872+00:00,,33.438947,
max,2024-01-01 00:00:00+00:00,,39.606666,


## Variations of queries with multiple AND and OR conditions

In [122]:
# Querying all measurements during winter months with temperatures below 0
# OR conditions are separated with "|" (not to be confused with "/", see example below)

# Definieren der Queries
time_query_conditions = (time <= datetime(2023, 2, 28, 0, 0, 0, tzinfo=timezone.utc)) | (time >= datetime(2023, 12, 1, tzinfo=timezone.utc))
field_query_conditions = (field.temperature < 0.0)

# Datenabfrage mittels der definierten Queries
my_results = db.search(time_query_conditions & field_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_time_field = points_to_dataframe(my_results)
df_result_time_field.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-02 18:43:25+00:00,21,-9.623282,operational
1,2023-01-02 18:43:25+00:00,21,-9.623282,operational


In [123]:
df_result_time_field.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,138,138.0,138.0,138
unique,,32.0,,3
top,,90.0,,operational
freq,,12.0,,117
mean,2023-05-22 14:02:38.826087168+00:00,,-5.301463,
min,2023-01-02 18:43:25+00:00,,-9.938166,
25%,2023-01-19 21:43:36+00:00,,-7.383056,
50%,2023-02-20 18:53:30.500000+00:00,,-5.457587,
75%,2023-12-12 17:45:45+00:00,,-3.04419,
max,2023-12-28 09:47:12+00:00,,-0.024654,


In [124]:
# Querying all measurements during summer months with temperatures above 0
# AND conditions are separated with "&"

# Definieren der Queries
time_query_conditions = (time <= datetime(2023, 8, 31, 0, 0, 0, tzinfo=timezone.utc)) & (time >= datetime(2023, 6, 1, tzinfo=timezone.utc))
field_query_conditions = (field.temperature > 0.0)

# Datenabfrage mittels der definierten Queries
my_results = db.search(time_query_conditions & field_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_time_field_2 = points_to_dataframe(my_results)
df_result_time_field_2.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-06-01 03:04:08+00:00,60,35.516955,operational
1,2023-06-01 03:04:08+00:00,60,35.516955,operational


In [125]:
df_result_time_field_2.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,588,588.0,588.0,588
unique,,83.0,,3
top,,58.0,,operational
freq,,27.0,,501
mean,2023-07-17 06:04:56.025510144+00:00,,21.887445,
min,2023-06-01 03:04:08+00:00,,0.017957,
25%,2023-06-23 15:03:24.750000128+00:00,,13.152994,
50%,2023-07-18 07:30:48.500000+00:00,,22.545341,
75%,2023-08-09 19:30:05.249999872+00:00,,32.156976,
max,2023-08-30 20:30:37+00:00,,39.816789,
