# logstar data download guide
This notebook gives you an introduction for the logstar-online-stream download tool written in python. You can find the sourcecode under: https://github.com/zalf-rdm/Logstar-online-Stream.
Using this tool, LoraWan-sensor-data can be downloaded raw, or with "cleaned".

**THIS NOTEBOOK IS READONLY SO IF YOU WANT WO WORK WITH IT PLEASE COPY THE NOTEBOOK**

In this example notebook we're going to install and import the required python packages, download only water_content data from all stations(patches) for the duration between 2021-01-01 and 2022-01-01.

before downloading the data we have to do some preparations. first of all install the logstar-online-stream python package with all requirements via:

In [None]:
! python -m pip install --force-reinstall --quiet git+https://github.com/zalf-rdm/Logstar-online-Stream.git

Now we have to define the configuration which we use to download the data. API-Docs via http://dokuwiki.weather-station-data.com/doku.php?id=:en:start:

## remove downloaded data if existing

## Run patchcrop data download

applied filters:
* Bulk Conductivity Filter
* water content jump filter

In [None]:
import logstar_stream.logstar as logstar
import logstar_stream.processing_steps.ProcessingStep as ps
import logging
import json

stations = [
            # weather
            "ws1_l1_rtu_BL",
            "ws2_l1_rtu_BL",
            "tbsl1_00172_BL",
            # stationary
            "tbs6a_01_180048_BL",
            "tbs6a_02_180049_BL",
            "tbs6a_03_180050_BL",
            "tbs6a_04_180051_BL",
            "tbs6a_05_180052_BL",
            "tbs6a_06_180054_BL",
            "tbs6a_07_180055_BL",
            "tbs6a_08_180056_BL",
            "tbs6a_09_180057_BL",
            "tbs6a_10_180058_BL",
            "tbs6a_11_180059_BL",
            "tbs6a_12_180060_BL",
            "tbs6a_13_180061_BL",
            "tbs6a_14_180108_BL",
            "tbs6a_15_180063_BL",
            "tbs6a_16_180068_BL",
            "tbs6a_17_180069_BL",
            "tbs6a_18_180070_BL",
            "tbs6a_19_180071_BL",
            "tbs6a_20_180072_BL",
            "tbs6a_21_180073_BL",
            "tbs6a_22_180075_BL",
            "tbs6a_23_180076_BL",
            "tbs6a_24_180078_BL",
            "tbs6a_25_180081_BL",
            "tbs6a_26_180082_BL",
            "tbs6a_27_180083_BL",
            "tbs6a_28_180084_BL",
            "tbs6a_30_180086_BL",
            "tbs6a_29_180085_BL",
            "tbs6a_30_180086_BL",
            # # mobile
            "wcecst_01_BL",
            "wcecst_02_BL",
            "wcecst_03_BL",
            "wcecst_04_BL",
            "wcecst_05_BL",
            "wcecst_06_BL",
            "wcecst_07_BL",
            "wcecst_08_BL",
            "wcecst_09_BL",
            "wcecst_10_BL"
]
conf = {
    "apikey": "", # logstar api key
    "stationlist": stations, # list of stations to process
    "geodata": True, # Returns longitude and latitude of the station as well as a comment (not implemented, i guess)
    "datetime": 1, #  Date and time format in the channel list: integer: 0/1
                   #  0 (default): „dateTime“: „2020-04-01 00:00:00“
                   #  1: „date“: „2020-04-01“, „time“: „00:00:00“ 
                   # USE 1 AS IT IS EXPECTED IN PS
    "startdate": "2020-01-01", # Day from which the data should be retrieved in the format: YYYY-MM-DD
    "enddate": "2022-12-31" # Day to which the data is to be retrieved in the format: YYYY-MM-DD
}

# load mapping file to translate sensor name to patch name and meassurement acronyms to names.
sensor_mapping = ""
with open("/home/jovyan/shared/patchcrop/patchcrop-sensor-mapping.json") as jsonfile:
        sensor_mapping = json.load(jsonfile)

        # configure logging
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)



In [None]:
from logstar_stream.processing_steps.ProcessingStep import ProcessingStep
from re import M
from typing import List, Dict
import math
import logging

import pandas as pd

class BulkConductivityDriftPS(ProcessingStep):
    ps_name = "BulkConductivityDriftPS"

    ps_description = "TODO"

    # value to fill if missmeasurement detected
    ERROR_VALUE = float("NaN")

    FORBIDDEN_VALUES = [{"value": 0, "duration": 100}]

    treshold_left_to_right = 50
    threshold_between_depth = 60
    threshold_max_value = 400
    
    ELEMENT_ORDER_LEFT = [
        "bulk_conductivity_left_30_cm",
        "bulk_conductivity_left_60_cm",
        "bulk_conductivity_left_90_cm",
    ]
    ELEMENT_ORDER_RIGHT = [
        "bulk_conductivity_right_30_cm",
        "bulk_conductivity_right_60_cm",
        "bulk_conductivity_right_90_cm",
    ]

    def __init__(self, kwargs):
        super().__init__(kwargs)
        self.treshold_left_to_right = float(kwargs['treshold_left_to_right']) if "treshold_left_to_right" in kwargs else self.treshold_left_to_right
        self.threshold_between_depth = float(kwargs['threshold_between_depth']) if "threshold_between_depth" in kwargs else self.threshold_between_depth
        self.threshold_max_value = float(kwargs['threshold_max_value']) if "threshold_max_value" in kwargs else self.threshold_max_value

        self.to_change = []

    def compare_and_prepare_to_change(self, row, row_num):
        
        for i in range(3):
            left_value = row[self.ELEMENT_ORDER_LEFT[i]]
            right_value = row[self.ELEMENT_ORDER_RIGHT[i]]

            left_del = False
            right_del = False

            if math.isnan(left_value):
                pass
            # compare diff between left and right side. If left or right higher than treshold_left_to_right + (left or right) remove the other
            elif left_value - right_value > self.treshold_left_to_right or left_value > self.threshold_max_value:
                    left_del = True
                    self.to_change.append((int(row_num), self.ELEMENT_ORDER_LEFT[i]))
            
            if math.isnan(right_value):
                pass
            elif right_value - left_value > self.treshold_left_to_right or right_value > self.threshold_max_value:
                    right_del = True
                    self.to_change.append((int(row_num), self.ELEMENT_ORDER_RIGHT[i]))

            # if 30cm depth
            if i == 0: 
              continue


            # check distance between depth and next depth is lower than threshold_between_depth
            left_lower_value = row[self.ELEMENT_ORDER_LEFT[i - 1]]
            right_lower_value = row[self.ELEMENT_ORDER_RIGHT[i - 1]]
            
            # check if nan or none is on left side
            if  None in (left_value, left_lower_value) or math.isnan(left_value) or math.isnan(left_lower_value):
                pass
            
            elif left_lower_value + self.threshold_between_depth < left_value and not left_del:
                self.to_change.append((int(row_num), self.ELEMENT_ORDER_LEFT[i]))
                
            if  None in (right_value, right_lower_value) or math.isnan(right_value) or math.isnan(right_lower_value):
                pass
            
            elif right_lower_value + self.threshold_between_depth < right_value and not right_del:
                self.to_change.append((int(row_num), self.ELEMENT_ORDER_RIGHT[i]))
            
            

    def process(self, df: pd.DataFrame, station: str):
        logging.debug(f"parsing data for station {station} ...")

        # check if all required fields are available
        all_requested_columns_available = set(
            self.ELEMENT_ORDER_LEFT + self.ELEMENT_ORDER_RIGHT
        ).issubset(df.columns)
        if not all_requested_columns_available:
            logging.debug(
                f"did not found all required columns in {station} to run {self.ps_name}"
            )
            return df

        if df is None:
            return None
        # iterate over each row of the given data
        for row_num, row in df.iterrows():
          [self.compare_and_prepare_to_change(row, row_num)]

        # run do change for all to change values
        [
            self.__do_change__(df, row_num, column_name)
            for row_num, column_name in self.to_change
        ]
        self.to_change = []
        # write logs
        self.write_log(station)
        return df

In [None]:
! rm data/* logs/*
BULK_CONDUCTIVITY_THRESHOLD_BETWEEN_LEFT_RIGHT = 50
BULK_CONDUCTIVITY_THRESHOLD_BETWEEN_DEPTH = 80
BULK_CONDUCTIVITY_THRESHOLD_MAX_VALUE  = 300 # Maximum allow BC if above it will be filtered out  
processing_steps = [
                    ps.load_class(["JumpCheckPS"]), 
                   BulkConductivityDriftPS({"treshold_left_to_right": BULK_CONDUCTIVITY_THRESHOLD_BETWEEN_LEFT_RIGHT, 
                                            "threshold_between_depth": BULK_CONDUCTIVITY_THRESHOLD_BETWEEN_DEPTH,
                                            "threshold_max_value": BULK_CONDUCTIVITY_THRESHOLD_MAX_VALUE
                                           })
]


df_dict = logstar.manage_dl_db( conf,                              # configuration
                                processing_steps=processing_steps, # loaded processing steps
                                sensor_mapping=sensor_mapping,     # translation file
                                csv_folder="data/")                # folder to write csv files to

2023-03-06 13:05:26,737 downloading data for station patch_51 from 2020-01-01 to 2022-12-31 ...


In [None]:
# # PLOT
import datetime
columns = [
        'date',
        'time',
        "bulk_conductivity_left_30_cm",
        "bulk_conductivity_left_60_cm",
        "bulk_conductivity_left_90_cm",
        "bulk_conductivity_right_30_cm",
        "bulk_conductivity_right_60_cm",
        "bulk_conductivity_right_90_cm"
]

df = df_dict['patch_12'].loc[:,columns]
df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d").dt.date
df = df[(df['date']>datetime.date(2020,1,1)) & (df['date']<datetime.date(2023,3,1))]
# plot the data (docs: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html)
fig = df.plot(x="date", 
              figsize=(24,8))