# Read CSV into xarray Dataset with quality codes

In [1]:
import xarray as xr
import pandas as pd

  data = yaml.load(f.read()) or {}
  defaults = yaml.load(f)


## Load data

In [2]:
df = pd.read_csv("data/mooring-buoys-time-series-62450.csv", parse_dates=["DATE (yyyy-mm-ddThh:mi:ssZ)"],
                 infer_datetime_format=True)
df.head()

Unnamed: 0,PLATFORM,DATE (yyyy-mm-ddThh:mi:ssZ),LATITUDE (degree_north),LONGITUDE (degree_east),TEMP LEVEL0 (degree_Celsius),DRYT LEVEL0 (degree_Celsius),PSAL LEVEL0 (psu),BATTERY LEVEL0 (volt),CNDC LEVEL0 (S/m),DEPH LEVEL0 (meter),...,PCO2_PUMP_CURR LEVEL1 (TBD),PCO2_THERMISTOR LEVEL1 (TBD),PHPH LEVEL1 (none),PHPH_ADJUSTED LEVEL1 (none),PSAL_ADJUSTED LEVEL1 (psu),START_CYCLE_FLOW LEVEL1 (l/minute),TEMP_ADJUSTED LEVEL1 (degree_Celsius),TEMP_CO2_ADJUSTED LEVEL1 (degree_Celsius),TUR4 LEVEL1 (ntu),QC
0,62450,2000-07-19 15:33:16,48.358,-4.5518,,,,,,,...,,,,,,,,,,0111999999999999991999799991199999999999999999...
1,62450,2000-07-19 15:58:32,48.358,-4.5518,,,,,,,...,,,,,,,,,3.0,0111999999999999999999799991199999999999999999...
2,62450,2000-07-19 16:18:32,48.358,-4.5518,,,,,,,...,,,,,,,,,,0111999999999999991999799991199999999999999999...
3,62450,2000-07-19 16:33:17,48.358,-4.5518,,,,,,,...,,,,,,,,,2.0,0111999999999999991999799991199999999999999999...
4,62450,2000-07-19 16:58:32,48.358,-4.5518,,,,,,,...,,,,,,,,,,0111999999999999991999799991199999999999999999...


## Extract each quality code into its own column

In [3]:
qc_df = df.QC.str.split('', expand=True)
qc_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,53
0,,0,1,1,1,9,9,9,9,9,...,9,9,9,9,9,9,9,9,9,
1,,0,1,1,1,9,9,9,9,9,...,9,9,9,9,9,9,9,9,1,
2,,0,1,1,1,9,9,9,9,9,...,9,9,9,9,9,9,9,9,9,
3,,0,1,1,1,9,9,9,9,9,...,9,9,9,9,9,9,9,9,1,
4,,0,1,1,1,9,9,9,9,9,...,9,9,9,9,9,9,9,9,9,


## Merge quality code columns into the original dataset

In [4]:
for i, column in enumerate(df.columns[:-1], 1):
    df[f"QC {column}"] = qc_df[[i]]
df.head()

Unnamed: 0,PLATFORM,DATE (yyyy-mm-ddThh:mi:ssZ),LATITUDE (degree_north),LONGITUDE (degree_east),TEMP LEVEL0 (degree_Celsius),DRYT LEVEL0 (degree_Celsius),PSAL LEVEL0 (psu),BATTERY LEVEL0 (volt),CNDC LEVEL0 (S/m),DEPH LEVEL0 (meter),...,QC PCO2_LOW_REF LEVEL1 (TBD),QC PCO2_PUMP_CURR LEVEL1 (TBD),QC PCO2_THERMISTOR LEVEL1 (TBD),QC PHPH LEVEL1 (none),QC PHPH_ADJUSTED LEVEL1 (none),QC PSAL_ADJUSTED LEVEL1 (psu),QC START_CYCLE_FLOW LEVEL1 (l/minute),QC TEMP_ADJUSTED LEVEL1 (degree_Celsius),QC TEMP_CO2_ADJUSTED LEVEL1 (degree_Celsius),QC TUR4 LEVEL1 (ntu)
0,62450,2000-07-19 15:33:16,48.358,-4.5518,,,,,,,...,9,9,9,9,9,9,9,9,9,9
1,62450,2000-07-19 15:58:32,48.358,-4.5518,,,,,,,...,9,9,9,9,9,9,9,9,9,1
2,62450,2000-07-19 16:18:32,48.358,-4.5518,,,,,,,...,9,9,9,9,9,9,9,9,9,9
3,62450,2000-07-19 16:33:17,48.358,-4.5518,,,,,,,...,9,9,9,9,9,9,9,9,9,1
4,62450,2000-07-19 16:58:32,48.358,-4.5518,,,,,,,...,9,9,9,9,9,9,9,9,9,9


## Index data by date

In [5]:
timed_df = df.set_index(["DATE (yyyy-mm-ddThh:mi:ssZ)"])

## Convert to xarray Dataset

In [7]:
ds = xr.Dataset.from_dataframe(timed_df)
ds

<xarray.Dataset>
Dimensions:                                       (DATE (yyyy-mm-ddThh:mi:ssZ): 498742)
Coordinates:
  * DATE (yyyy-mm-ddThh:mi:ssZ)                   (DATE (yyyy-mm-ddThh:mi:ssZ)) datetime64[ns] 2000-07-19T15:33:16 ... 2019-01-29T03:28:49
Data variables:
    PLATFORM                                      (DATE (yyyy-mm-ddThh:mi:ssZ)) int64 62450 ... 62450
    LATITUDE (degree_north)                       (DATE (yyyy-mm-ddThh:mi:ssZ)) float64 48.36 ... 48.36
    LONGITUDE (degree_east)                       (DATE (yyyy-mm-ddThh:mi:ssZ)) float64 -4.552 ... -4.552
    TEMP LEVEL0 (degree_Celsius)                  (DATE (yyyy-mm-ddThh:mi:ssZ)) float64 nan ... nan
    DRYT LEVEL0 (degree_Celsius)                  (DATE (yyyy-mm-ddThh:mi:ssZ)) float64 nan ... nan
    PSAL LEVEL0 (psu)                             (DATE (yyyy-mm-ddThh:mi:ssZ)) float64 nan ... nan
    BATTERY LEVEL0 (volt)                         (DATE (yyyy-mm-ddThh:mi:ssZ)) float64 nan ... nan
    CNDC LEVEL0