# Tinyflux: Example with fictious temperature measurements

## Import Python Libraries und Settings

In [1]:
# Libraries
import pandas as pd
import csv
from tinyflux import TinyFlux, Point, FieldQuery, TagQuery, TimeQuery
from datetime import datetime, timezone, timedelta

# Settings
import warnings
warnings.filterwarnings("ignore")

# Abkürzungen für Query-Typen
time = TimeQuery()
tags = TagQuery()
field = FieldQuery()

## Load function for creating a dataframe from query results into memory

In [2]:
# Define the function to transform the list of Point objects into a DataFrame
def points_to_dataframe(points_list):
    data = []
    
    # Extract the relevant information
    for point in points_list:
        data.append({
            "time": point.time,  # Access time attribute
            "sensor_id": point.tags['sensor_id'],  # Access sender_id from tags dictionary
            "temperature": point.fields['temperature'],  # Access receiver_id from tags dictionary
            "status": point.tags['status']  # Access status from tags dictionary
        })

    # Create a DataFrame
    df = pd.DataFrame(data)

    # Convert time to datetime
    df['time'] = pd.to_datetime(df['time'])

    return df

## Initialize the TinyFlux database

In [3]:
# Erstellung der Tinyflux-Datenbank (wird bereitgestellt)
db = TinyFlux("temperatures.db")

## Importing a CSV file and show first descriptive statistics

In [4]:
# CSV-Datei in Dataframe laden
df = pd.read_csv('example_data/synthetic_temperature_readings.csv')

# Das Format des Zeitstempels in Pandas setzen
df['time']= pd.to_datetime(df['time'], format='mixed')

In [5]:
# Erste 10 Records des Dataframes anzeigen
df.head(10)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 00:00:00,52,20.377761,operational
1,2023-01-01 08:44:41,93,2.217658,maintenance
2,2023-01-01 17:29:22,15,14.912385,offline
3,2023-01-02 02:14:03,72,6.517424,operational
4,2023-01-02 10:58:44,61,36.684591,operational
5,2023-01-02 19:43:25,21,-9.623282,operational
6,2023-01-03 04:28:06,83,1.26664,operational
7,2023-01-03 13:12:47,87,8.267841,operational
8,2023-01-03 21:57:28,75,14.39049,operational
9,2023-01-04 06:42:09,75,32.540876,operational


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   time         1000 non-null   datetime64[ns]
 1   sensor_id    1000 non-null   int64         
 2   temperature  1000 non-null   float64       
 3   status       1000 non-null   object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(1)
memory usage: 31.4+ KB


In [7]:
# Deskriptive Statistiken des Dataframes
df.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,1000,1000.0,1000.0,1000
unique,,,,3
top,,,,operational
freq,,,,858
mean,2023-07-01 23:59:59.513999872,49.56,15.392825,
min,2023-01-01 00:00:00,1.0,-9.988124,
25%,2023-04-01 23:59:59.750000128,24.0,2.945454,
50%,2023-07-01 23:59:59.500000,50.0,16.125006,
75%,2023-09-30 23:59:59.249999872,75.0,27.474836,
max,2023-12-31 00:00:00,99.0,39.967675,


## Writing a CSV file into Tinyflux

In [8]:
# Read CSV and insert data into TinyFlux
with open("example_data/synthetic_temperature_readings.csv", mode="r") as file:
    reader = csv.DictReader(file)
    for row in reader:
        point = Point(
            time=datetime.strptime(row["time"], "%Y-%m-%d %H:%M:%S"),
            measurement="blockchain_transactions",
            fields={
                "temperature": float(row["temperature"])
                },  # Only numeric fields
            tags={
                "sensor_id": row["sensor_id"],
                "status": row["status"]  # Move non-numeric fields to tags
            }
        )
        db.insert(point)

## Writing manually into Tinyflux

In [9]:
# Create manual records
p1 = Point(
    time=datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
    tags={"sensor_id": "20", "status": "operational"},
    fields={"temperature": 15.240971}
)

p2 = Point(
    time=datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
    tags={"sensor_id": "30", "status": "maintenance"},
    fields={"temperature": 20.517200}
)

# Insert into the DB.
db.insert_multiple([p1, p2])

2

## Querying by Time

In [10]:
# Definieren der Query
time_query_conditions = (time >= datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc))

# Datenabfrage mittels der definierten Query
my_results = db.search(time_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_time = points_to_dataframe(my_results)
df_result_time

Unnamed: 0,time,sensor_id,temperature,status
0,2024-01-01 00:00:00+00:00,20,15.240971,operational
1,2024-01-01 00:00:00+00:00,30,20.5172,maintenance


In [1]:
df_result_time.describe(include='all')

NameError: name 'df_result_time' is not defined

## Querying by Tag

In [12]:
df.status.unique()

array(['operational', 'maintenance', 'offline'], dtype=object)

In [13]:
# Definieren der Queries
tag_query_conditions_operational = (tags.status == "operational")
tag_query_conditions_maintenance = (tags.status == "maintenance")
tag_query_conditions_offline = (tags.status == "offline")

# Datenabfrage mittels der definierten Queries
my_results_operational = db.search(tag_query_conditions_operational)
my_results_maintenance = db.search(tag_query_conditions_maintenance)
my_results_offline = db.search(tag_query_conditions_offline)

# Erstellung eines Dataframes aus den Query-Resultaten
df_result_tag_operational = points_to_dataframe(my_results_operational)
df_result_tag_maintenance = points_to_dataframe(my_results_maintenance)
df_result_tag_offline = points_to_dataframe(my_results_offline)

### Dataframe with operational status

In [14]:
df_result_tag_operational.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 00:00:00+00:00,52,20.377761,operational
1,2023-01-02 02:14:03+00:00,72,6.517424,operational


In [15]:
df_result_tag_operational.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,859,859.0,859.0,859
unique,,99.0,,1
top,,92.0,,operational
freq,,18.0,,859
mean,2023-07-01 01:43:41.511059456+00:00,,15.635,
min,2023-01-01 00:00:00+00:00,,-9.988124,
25%,2023-04-01 13:04:08.500000+00:00,,3.655681,
50%,2023-07-01 02:08:17+00:00,,16.513257,
75%,2023-09-29 23:57:06.500000+00:00,,27.64185,
max,2024-01-01 00:00:00+00:00,,39.967675,


### Dataframe with maintenance status

In [16]:
df_result_tag_maintenance.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 08:44:41+00:00,93,2.217658,maintenance
1,2023-01-09 00:23:03+00:00,21,38.753358,maintenance


In [17]:
df_result_tag_maintenance.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,90,90.0,90.0,90
unique,,64.0,,1
top,,48.0,,maintenance
freq,,3.0,,90
mean,2023-07-09 15:48:32.411110912+00:00,,13.510619,
min,2023-01-01 08:44:41+00:00,,-9.745893,
25%,2023-04-22 18:27:01+00:00,,2.257493,
50%,2023-07-04 17:35:07.500000+00:00,,12.917043,
75%,2023-10-10 22:17:39.249999872+00:00,,26.843465,
max,2024-01-01 00:00:00+00:00,,39.606666,


### Dataframe with offline status

In [18]:
df_result_tag_offline.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 17:29:22+00:00,15,14.912385,offline
1,2023-01-11 22:20:32+00:00,59,0.253878,offline


In [19]:
df_result_tag_maintenance.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,90,90.0,90.0,90
unique,,64.0,,1
top,,48.0,,maintenance
freq,,3.0,,90
mean,2023-07-09 15:48:32.411110912+00:00,,13.510619,
min,2023-01-01 08:44:41+00:00,,-9.745893,
25%,2023-04-22 18:27:01+00:00,,2.257493,
50%,2023-07-04 17:35:07.500000+00:00,,12.917043,
75%,2023-10-10 22:17:39.249999872+00:00,,26.843465,
max,2024-01-01 00:00:00+00:00,,39.606666,


## Querying by Field

In [20]:
# Definieren der Queries
field_query_conditions = (field.temperature > 0)

# Datenabfrage mittels der definierten Queries
my_results = db.search(field_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_field = points_to_dataframe(my_results)

print("\n Dataframe with temperature measurements > 0:")
df_result_field.head(2)


 Dataframe with temperature measurements > 0:


Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 00:00:00+00:00,52,20.377761,operational
1,2023-01-01 08:44:41+00:00,93,2.217658,maintenance


In [21]:
df_result_field.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,810,810.0,810.0,810
unique,,99.0,,3
top,,62.0,,operational
freq,,18.0,,696
mean,2023-07-01 14:49:42.817283840+00:00,,20.258389,
min,2023-01-01 00:00:00+00:00,,0.017957,
25%,2023-03-30 08:36:01.500000+00:00,,11.109966,
50%,2023-07-01 02:08:17+00:00,,19.900241,
75%,2023-09-30 02:08:16.750000128+00:00,,29.707653,
max,2024-01-01 00:00:00+00:00,,39.967675,


## Variations of combining queries

### Querying by Time and Tag

In [22]:
# Querying all temperatures, that were measured before Mach 31 2023 with the status "operational"

# Definieren der Queries
time_query_conditions = (time < datetime(2023, 3, 31, 0, 0, 0, tzinfo=timezone.utc))
tag_query_conditions = (tags.status == "operational")

# Datenabfrage mittels der definierten Queries
my_results = db.search(time_query_conditions & tag_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_time_tag = points_to_dataframe(my_results)
df_result_time_tag.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-01 00:00:00+00:00,52,20.377761,operational
1,2023-01-02 02:14:03+00:00,72,6.517424,operational


In [23]:
df_result_time_tag.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,211,211.0,211.0,211
unique,,90.0,,1
top,,62.0,,operational
freq,,10.0,,211
mean,2023-02-14 07:37:33.862559232+00:00,,16.409696,
min,2023-01-01 00:00:00+00:00,,-9.864455,
25%,2023-01-23 09:48:06+00:00,,5.691744,
50%,2023-02-13 08:37:28+00:00,,17.142466,
75%,2023-03-08 20:39:37.500000+00:00,,28.328395,
max,2023-03-30 21:43:03+00:00,,39.884631,


### Querying by Tag and Field

In [24]:
# Querying all temperatures higher than 20 degrees and with the status "maintenance"

# Definieren der Queries
tag_query_conditions = (tags.status == "maintenance")
field_query_conditions = (field.temperature > 20.0)

# Datenabfrage mittels der definierten Queries
my_results = db.search(tag_query_conditions & field_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_tag_field = points_to_dataframe(my_results)
df_result_tag_field.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-09 00:23:03+00:00,21,38.753358,maintenance
1,2023-01-09 17:52:25+00:00,76,26.144805,maintenance


In [25]:
df_result_tag_field.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,32,32.0,32.0,32
unique,,29.0,,1
top,,62.0,,maintenance
freq,,2.0,,32
mean,2023-07-24 04:32:22.750000128+00:00,,29.76959,
min,2023-01-09 00:23:03+00:00,,20.5172,
25%,2023-05-03 10:14:02.500000+00:00,,26.682858,
50%,2023-08-10 04:03:35.500000+00:00,,29.882287,
75%,2023-10-20 20:35:18.249999872+00:00,,33.438947,
max,2024-01-01 00:00:00+00:00,,39.606666,


## Variations of queries with multiple AND and OR conditions

In [26]:
# Querying all measurements during winter months with temperatures below 0
# OR conditions are separated with "|" (not to be confused with "/", see example below)

# Definieren der Queries
time_query_conditions = (time <= datetime(2023, 2, 28, 0, 0, 0, tzinfo=timezone.utc)) | (time >= datetime(2023, 12, 1, tzinfo=timezone.utc))
field_query_conditions = (field.temperature < 0.0)

# Datenabfrage mittels der definierten Queries
my_results = db.search(time_query_conditions & field_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_time_field = points_to_dataframe(my_results)
df_result_time_field.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-01-02 19:43:25+00:00,21,-9.623282,operational
1,2023-01-04 15:26:50+00:00,88,-5.605619,operational


In [27]:
df_result_time_field.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,46,46.0,46.0,46
unique,,32.0,,3
top,,90.0,,operational
freq,,4.0,,39
mean,2023-05-22 15:02:38.826086656+00:00,,-5.301463,
min,2023-01-02 19:43:25+00:00,,-9.938166,
25%,2023-01-20 07:28:17+00:00,,-7.341624,
50%,2023-02-20 19:53:30.500000+00:00,,-5.457587,
75%,2023-12-11 05:35:50.500000+00:00,,-3.047833,
max,2023-12-28 10:47:12+00:00,,-0.024654,


In [28]:
# Querying all measurements during summer months with temperatures above 0
# AND conditions are separated with "&"

# Definieren der Queries
time_query_conditions = (time <= datetime(2023, 8, 31, 0, 0, 0, tzinfo=timezone.utc)) & (time >= datetime(2023, 6, 1, tzinfo=timezone.utc))
field_query_conditions = (field.temperature > 0.0)

# Datenabfrage mittels der definierten Queries
my_results = db.search(time_query_conditions & field_query_conditions)

# Erstellung eines Dataframes aus den Query-Resultaten
# Die ugehörige Funktion "points_to_dataframe()" wurde zu Beginn dieses Notebooks in den Arbeitsspeicher geladen
df_result_time_field_2 = points_to_dataframe(my_results)
df_result_time_field_2.head(2)

Unnamed: 0,time,sensor_id,temperature,status
0,2023-06-01 05:04:08+00:00,60,35.516955,operational
1,2023-06-02 07:18:11+00:00,36,9.581507,maintenance


In [29]:
df_result_time_field_2.describe(include='all')

Unnamed: 0,time,sensor_id,temperature,status
count,196,196.0,196.0,196
unique,,83.0,,3
top,,58.0,,operational
freq,,9.0,,167
mean,2023-07-17 08:04:56.025510400+00:00,,21.887445,
min,2023-06-01 05:04:08+00:00,,0.017957,
25%,2023-06-23 17:03:24.750000128+00:00,,13.152994,
50%,2023-07-18 09:30:48.500000+00:00,,22.545341,
75%,2023-08-09 21:30:05.249999872+00:00,,32.156976,
max,2023-08-30 22:30:37+00:00,,39.816789,
