In [1]:
#|hide
#|default_exp reports

# LimeSurvey Maintenance Reports

In [2]:
#|export
import pandas as pd
from pandas._typing import (
    FilePath,
    ReadCsvBuffer,
)
import datetime as dt
import numpy as np
from toolz import assoc_in

import lib.utils as utils




## Survey Export
In the following, the functions are designed to work with the survey response export in the .csv format:
* Field separator: "Semicolon"
* Responses: "Answer codes"
* Headings: "Question code"

The responses can be exported manually from the website ...

In [3]:
fn = "../../example_data/results-survey224783.csv"

df = pd.read_csv(fn, sep=';')
df = df.fillna("None")
df


Unnamed: 0,id,submitdate,lastpage,startlanguage,seed,startdate,datestamp,Q00,Q01,MainQ01,...,interviewtime,groupTime57,Q00Time,Q01Time,groupTime59,MainQ01Time,MainQ02Time,groupTime58,ExtraQ01Time,ExtraQ02Time
0,1,2023-05-08 15:13:33,1,en,1428982518,2023-05-08 15:02:53,2023-05-08 15:13:33,1,this is a test,AO01,...,837.45,,,,,,,,,
1,2,2023-05-08 16:08:20,1,en,852861659,2023-05-08 16:08:09,2023-05-08 16:08:20,2,222,AO03,...,11.13,,,,,,,,,
2,3,2023-05-08 16:09:06,1,en,632878730,2023-05-08 16:08:46,2023-05-08 16:09:06,1,222,AO02,...,19.89,,,,,,,,,


... or via the *limepy* python package.

In [4]:
import limepy
import getpass
from io import StringIO

pwd = getpass.getpass("LimeSurvey BeRichter Password: ")
if pwd != '':
    url = "https://lgs-car.limesurvey.net/admin/remotecontrol"
    csv = limepy.download.get_responses(
        base_url=url,
        user_name="BeRichter",
        password=pwd,
        user_id=1,
        sid=224783
    )
    df = pd.read_csv(StringIO(csv), sep=';')
    df = df.fillna("None")
    df
else:
    print("No password, no data.")

In [5]:
#|export
def get_responses(
        *,
        fn: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str]|None = None,
        online: dict|None = None
) -> pd.DataFrame:
    """
    Get LimeSurvey responses as pandas Dataframe providing a file or online download information.

    Parameters
    ----------
    fn: str, path object or file-like object
        Any pandas readable representation of the LimeSurvey response export file
        (.csv, sep=';', answer and question codes).
    online: dict
        Dictionary of information required to download the responses via limepy:
        * base_url -> limesurvey remote_control url
        * user_name -> account name
        * password
        * user_id -> ID of account user (usually 1)
        * sid -> Survey ID
        Minimal information stored in *online is the base_url, other information will then be filled via user input promt.
    Returns
    -------
    pd.Dataframe
        parsed responses csv file
    """
    if fn is not None:
        filepath_or_buffer = fn
    elif online is not None:
        import limepy
        import getpass
        from io import StringIO
        if "base_url" not in online:
            raise ValueError
        if "user_name" not in online:
            online.update({'user_name': input("LimeSurvey Account Name: ")})
        if "password" not in online:
            online.update({'password': getpass.getpass("LimeSurvey Password: ")})
        if "user_id" not in online:
            online.update({"user_id": input("LimeSurvey User ID: ")})
        if "sid" not in online:
            online.update({'sid': input("LimeSurvey Survey ID: ")})

        csv = limepy.download.get_responses(**online)
        filepath_or_buffer = StringIO(csv)
    else:
        raise ValueError
    df = pd.read_csv(filepath_or_buffer, sep=';')
    df = df.fillna("None")
    return df

In [6]:
url = "https://lgs-car.limesurvey.net/admin/remotecontrol"
get_responses(
    online=dict(
        base_url=url,
        user_id=1,
        sid=224783
    )
)

Unnamed: 0,id,submitdate,lastpage,startlanguage,seed,startdate,datestamp,Q00,Q01,MainQ01,MainQ01[comment],MainQ02,MainQ02[comment],ExtraQ01,ExtraQ01[comment],ExtraQ02,ExtraQ02[comment]
0,1,2023-05-08 15:13:33,1,en,1428982518,2023-05-08 15:02:53,2023-05-08 15:13:33,1,this is a test,AO01,,AO02,testing notes,AO03,,AO01,
1,2,2023-05-08 16:08:20,1,en,852861659,2023-05-08 16:08:09,2023-05-08 16:08:20,2,222,AO03,,,,,,,
2,3,2023-05-08 16:09:06,1,en,632878730,2023-05-08 16:08:46,2023-05-08 16:09:06,1,222,AO02,test,AO03,,AO04,,AO02,


## Parse Responses to dict
Maintenance Flags shall be parsed to a dictionary sorted by the PyrNet box number. The Survey reports are collected over the entire campaign. Therefore, consider only reports within a certain time interval around maintenance time (2 days) for quality flagging the measurement periode.

Reports within +-2 days around maintenance time are considered, giving the opportunity for corrections within this time frame by issuing another response (or via insert in the website interface). For example, the first report at 1PM includes the quality marks and some notes. Later, if one want to add notes for this station or correct marks, another report can be filled. The Values will be updated by the parsing function:
    * Valid Marks (not None) of the latest report within +-2days
    * Notes of multiple reports are attached (separated by ";") starting with the oldest report notes.

In [7]:
#|export
_pollution_marks = {
    "None":4,
    "AO01":0,
    "AO02":1,
    "AO03":2,
    "AO04":3,
}
_alignment_marks = {
    "None":4,
    "AO01":0,
    "AO02":1,
    "AO03":2,
}
_note_keys = {
    "note_general": "Q01",
    "note_align": "MainQ01[comment]",
    "note_clean": "MainQ02[comment]",
    "note_align2": "ExtraQ01[comment]",
    "note_clean2": "ExtraQ02[comment]",
}
_mark_keys = {
    "clean": "MainQ01",
    "align": "MainQ02",
    "clean2": "ExtraQ01",
    "align2": "ExtraQ02",
}

def parse_report(
        df:  pd.DataFrame,
        date_of_maintenance: float | dt.datetime | np.datetime64,
) -> dict:
    """
    User pandas.read_csv (sep=';') to parse the survey report.

    Parameters
    ----------
   df: Dataframe
        LimeSurvey response parsed as pandas Dataframe.
    date_of_maintenance: float, datetime or datetime64
        A representation of time. If float, interpreted as Julian day from 2000-01-01T12:00.
    Returns
    -------
    dict
        Dictionary storing maintenance flags and notes by PyrNet box number.
    """
    date_of_maintenance = utils.to_datetime64(date_of_maintenance)

    results = {}
    for i in range(df.shape[0]):
        # consider only reports +-2 days around date of maintenance
        mdate = pd.to_datetime(df['datestamp'][i])
        if np.abs(mdate - date_of_maintenance) > np.timedelta64(2,'D'):
            continue

        # store report in dictionary
        box = int(df["Q00"].values[i])
        key = f"{box:03d}"
        if key not in results:
            # initialize marks
            for mkey in _mark_keys:
                results = assoc_in(results, [key,mkey], 4)
            # initialize notes
            for nkey in _note_keys:
                results = assoc_in(results, [key,nkey], "")

        # merge notes if multiple reports exist
        for nkey in _note_keys:
            new_note = df[_note_keys[nkey]].values[i]
            update_note = (results[key][nkey]+'; '+new_note).strip('; ')
            results = assoc_in(results, [key,nkey], update_note)

        # update marks with most recent report if not None
        for mkey in _mark_keys:
            new_mark = df[_mark_keys[mkey]][i]
            if new_mark=="None":
                continue
            if mkey.startswith("clean"):
                new_mark = _pollution_marks[new_mark]
            else:
                new_mark = _alignment_marks[new_mark]
            results = assoc_in(results, [key,mkey], new_mark)
    return results


In [8]:
parse_report(df, np.datetime64("2023-05-08T12:00"))

{'001': {'clean': 1,
  'align': 2,
  'clean2': 3,
  'align2': 1,
  'note_general': 'this is a test; 222',
  'note_align': 'None; test',
  'note_clean': 'testing notes; None',
  'note_align2': 'None; None',
  'note_clean2': 'None; None'},
 '002': {'clean': 2,
  'align': 4,
  'clean2': 4,
  'align2': 4,
  'note_general': '222',
  'note_align': 'None',
  'note_clean': 'None',
  'note_align2': 'None',
  'note_clean2': 'None'}}

## Make aggregated quality flags
Aggregate quality marks to a binary number according to CF-Convention section 3.5 for quality flags.

QC flag binary representation, bits: XXYY  with:
* XX - level - [00,01,10] - good, slight out of level, bad out of level
* YY - clean - [00,01,10,11] good, slight-, moderate-, strong covered
```
flag_mask = '3b,3b,3b, 12b, 12b'
flag_values = '1b, 2b, 3b, 4b, 8b '
flag_meanings = "
    soiling_light
    soiling_moderate
    soiling_heavy
    level_problematic
    level_bad"
```


In [9]:
#|export
def get_qcflag(qc_clean, qc_level):
    """
    Aggregate quality flags.

    Parameters
    ----------
    qc_clean: int
        [0,1,2,3] [clean, slight-, moderate-, strong covered]
    qc_level: int
        [0,1,2] [good, slight misalignment, strong misalignment]

    Returns
    -------
    int
        aggregated quality flagg [0-11]
    """
    qc = (qc_level<<2) + qc_clean
    return qc


In [10]:
get_qcflag(np.array([0,1,2,3]),np.array([0,1,2,2]))


array([ 0,  5, 10, 11])

## Export module
Requires *nbdev* to export and update the */lib/reports.py* module

In [11]:
#|eval: false
import nbdev.export
nbdev.export.nb_export('lib_reports.ipynb','../../lib')