In [None]:
import getpass
import itertools
import pathlib
import re
import shutil
from collections import defaultdict
from pprint import pprint

import httpx
import matplotlib.pyplot as plt
import msgpack
import numpy as np
import pandas as pd
import pydantic
import pymongo
import tiled
from bson.objectid import ObjectId
from pymongo import MongoClient
from tiled.client import from_uri
from tiled.examples.xdi import read_xdi

In [None]:
import aimmdb
import aimmdb.models
from aimm_adapters.heald_labview import (
    mangle_dup_names,
    normalize_dataframe,
    parse_heald_labview,
)
from aimmdb.serialization import serialize_parquet

In [None]:
c = from_uri("https://aimm-staging.lbl.gov/api")

In [None]:
data_root = pathlib.Path("/run/media/joseph/seagate/jkleinhenz/projects/aimm/data")

In [None]:
data_path = data_root / "newville" / "data"
assert data_path.exists()

In [None]:
def load_newville(data_path):
    files = list(data_path.rglob("*.xdi"))
    print(f"found {len(files)} xdi files to ingest")

    data_list = []

    for f in files:
        name = f.stem
        _, metadata = read_xdi(str(f))
        fields = metadata.pop("fields")
        metadata.update({k.lower(): v for k, v in fields.items()})
        sample = metadata["sample"]
        name = sample.pop("name")
        prep = sample.pop("prep", None)

        # FIXME extract formula if present
        data_list.append(
            {
                "name": f.stem,
                "file": str(f),
                "sample.name": name,
                "sample.prep": prep,
                "metadata": metadata,
            }
        )

    df = pd.DataFrame(data_list)

    return df

In [None]:
# FIXME compute mu if possible
def ingest_newville(c, df):
    provenance = {
        "source": "newville",
        "url": "https://github.com/XraySpectroscopy/XASDataLibrary",
    }

    for (name, prep), g in df.groupby(["sample.name", "sample.prep"]):
        print(f"{name}: {prep}, {len(g)}")
        sample = aimmdb.models.SampleData(
            name=name, prep=prep, dataset="newville", provenance=provenance
        )

        request = c.context._client.build_request(
            "POST", "/samples", json=sample.dict()
        )
        r = c.context._send(request)
        if not r.status_code == 200:
            print(r.json())
            assert False

        data = r.json()
        if "uid" in data:
            sample_id = data["uid"]
        else:
            raise RuntimeError(data)

        for i, row in g.iterrows():
            xas_df, _ = read_xdi(row.file)
            metadata = row.metadata
            element = aimmdb.models.XDIElement(**metadata.pop("element"))
            metadata = aimmdb.models.XASMetadata(
                element=element, provenance=provenance, sample_id=sample_id, **metadata
            )
            data = aimmdb.models.DataFrameData.from_pandas(xas_df)
            doc = aimmdb.models.XASData(
                structure_family="dataframe", metadata=metadata, data=data
            )

            request = c.context._client.build_request(
                "POST",
                "/xas",
                content=msgpack.packb(doc.dict()),
                headers={"content-type": "application/msgpack"},
            )
            r = c.context._send(request)
            if not r.status_code == 200:
                assert False

In [None]:
df = load_newville(data_path)

In [None]:
ingest_newville(c, df)

In [None]:
data_path = data_root / "NCM"
assert data_path.exists()

In [None]:
def get_aimm_ncm_samples():
    aimm_ncm_samples = {
        "BM_NCM622": {
            "icp_oes": {
                "Li": 0.99,
                "Ni": 0.62,
                "Co": 0.19,
                "Mn": 0.19,
                "Al": 0.0,
                "Zr": 0.004,
                "B": 0.004,
            }
        },
        "BM_NCM712": {
            "icp_oes": {
                "Li": 0.98,
                "Ni": 0.73,
                "Co": 0.09,
                "Mn": 0.19,
                "Al": 0.007,
                "Zr": 0.002,
                "B": 0.003,
            }
        },
        "BM_NCMA": {
            "icp_oes": {
                "Li": 0.98,
                "Ni": 0.89,
                "Co": 0.05,
                "Mn": 0.07,
                "Al": 0.019,
                "Zr": 0.003,
                "B": 0.005,
            }
        },
    }
    return aimm_ncm_samples


def get_aimm_ncm_params():
    # cycle, voltage, charge state
    values = [
        (0, 0, "DC"),
        (1, 4.3, "C"),
        (1, 4.8, "C"),
        (1, 3.0, "DC"),
        (2, 4.3, "C"),
        (2, 4.8, "C"),
        (10, 4.8, "C"),
        (10, 3.0, "DC"),
    ]
    keys = ["cycle", "voltage", "state"]
    params = [dict(zip(keys, v)) for v in values]
    return params

In [None]:
def ingest_aimm_ncm_samples(c):
    aimm_ncm_samples = get_aimm_ncm_samples()
    aimm_ncm_params = get_aimm_ncm_params()

    sample_ids = {}

    # FIXME replace chenjun with facility + beamline
    provenance = {"source": "wanli"}

    for name, metadata in aimm_ncm_samples.items():
        print(f"{name}")
        sample = aimmdb.models.SampleData(
            name=name, dataset="aimm_ncm", provenance=provenance, **metadata
        )
        # sample_ids[name] = "abc"

        request = c.context._client.build_request(
            "POST", "/samples", json=sample.dict()
        )
        r = c.context._send(request)
        if not r.status_code == 200:
            print(r.json())
            assert False

        data = r.json()
        if "uid" in data:
            sample_ids[name] = data["uid"]
        else:
            raise RuntimeError(data)

    return sample_ids

In [None]:
sample_ids = ingest_aimm_ncm_samples(c)

# NOTE the order is important here
sample_ids = {
    "BM_NCM622": "ZidLZ8PqqJU",
    "BM_NCM712": "geLewrYwdDT",
    "BM_NCMA": "7RSuyWbCsQK",
}

In [None]:
def ingest_aimm_ncm_chenjun(c, sample_ids, data_path):
    aimm_ncm_samples = get_aimm_ncm_samples()
    aimm_ncm_params = get_aimm_ncm_params()

    # FIXME replace chenjun with facility + beamline
    provenance = {"source": "chenjun"}

    for atom in ["Ni", "Mn", "Co"]:
        with open(data_path / f"NCMBM24{atom}.last", "r") as f:
            N = int(f.read().split()[0])
            print(f"{atom}: {N=}")

            for i, (charge, sample) in zip(
                range(1, N + 1),
                itertools.cycle(itertools.product(aimm_ncm_params, sample_ids.items())),
            ):
                path = data_path / f"NCMBM24{atom}.{i:04d}"
                with open(path) as f:
                    fname = path.name
                    print(fname, charge, sample)

                    df, metadata = parse_heald_labview(f)
                    df, translation = normalize_dataframe(df, standardize=True)
                    metadata["translation"] = translation
                    df["mutrans"] = np.log(df["i0"] / df["itrans"])
                    df["murefer"] = np.log(df["i0"] / df["irefer"])
                    metadata["charge"] = charge
                    metadata["fname"] = fname

                    element = aimmdb.models.XDIElement(symbol=atom, edge="K")

                    data = aimmdb.models.DataFrameData.from_pandas(df)

                    metadata = aimmdb.models.XASMetadata(
                        sample_id=sample[1],
                        element=element,
                        provenance=provenance,
                        **metadata,
                    )

                    doc = aimmdb.models.XASData(
                        structure_family="dataframe", metadata=metadata, data=data
                    )

                    request = c.context._client.build_request(
                        "POST",
                        "/xas",
                        content=msgpack.packb(doc.dict()),
                        headers={"content-type": "application/msgpack"},
                    )
                    r = c.context._send(request)
                    if not r.status_code == 200:
                        assert False

In [None]:
ingest_aimm_ncm_chenjun(c, sample_ids, data_path / "chenjun")

In [None]:
sample_ids

In [None]:
def read_header(f):
    header = ""
    for line in f:
        if line.startswith("Time (s)"):
            header = line.split("\t")
            return header


def read_wanli(f):
    names = read_header(f)
    names = mangle_dup_names(names)
    df = pd.read_csv(f, sep="\t", names=names)

    translation = {
        "Mono Energy": "energy",
        "Counter 3": "i0",
        "Counter 1": "tey",
        "Counter 2": "tfy",
        "Counter 0": "i0_alt",
    }
    df = df.rename(columns=translation)[list(translation.values())]

    df["mu_tfy"] = df["tfy"] / df["i0"]
    df["mu_tey"] = df["tey"] / df["i0"]

    return df


# NOTE this hardcodes BM prefix
def parse_filename(name):
    if "622" in name:
        sample = "BM_NCM622"
    elif "NCMA" in name:
        sample = "BM_NCMA"
    elif "712" in name:
        sample = "BM_NCM712"
    else:
        raise KeyError(f"unable to parse sample from {name}")

    if sample == "Ni_metal":
        charge = None
    elif "Pristine" in name:
        charge = (0, 0.0, "DC")
    else:
        if "1st" in name:
            cycle = 1
        elif "2nd" in name:
            cycle = 2
        elif "10th" in name:
            cycle = 10
        else:
            raise KeyError(f"unable to parse cycle from {name}")

        voltage_str = re.search("(\d*)V", name)[0]
        if voltage_str == "43V":
            voltage = 4.3
            state = "C"
        elif voltage_str == "48V":
            voltage = 4.8
            state = "C"
        elif voltage_str == "3V":
            voltage = 3.0
            state = "DC"
        else:
            raise KeyError(f"unable to parse voltage from {voltage_str}")

        charge = (cycle, voltage, state)

    if charge:
        keys = ["cycle", "voltage", "state"]
        charge = dict(zip(keys, charge))
    return sample, charge


def ingest_aimm_ncm_wanli(c, sample_ids, data_path):
    files = list(data_path.glob("*.txt"))
    provenance = {"source": "wanli"}
    for file in files:
        fname = file.name
        print(fname)

        try:
            sample_name, charge = parse_filename(fname)
        except KeyError as e:
            print(f"failed to extract sample from {fname}")
            continue

        sample_id = sample_ids[sample_name]
        element = aimmdb.models.XDIElement(symbol="Ni", edge="L3")

        with open(file, "r") as f:
            df = read_wanli(f)

        metadata = {}
        metadata["charge"] = charge
        metadata["fname"] = fname

        element = aimmdb.models.XDIElement(symbol="Ni", edge="L3")

        data = aimmdb.models.DataFrameData.from_pandas(df)

        metadata = aimmdb.models.XASMetadata(
            sample_id=sample_id,
            element=element,
            provenance=provenance,
            **metadata,
        )

        doc = aimmdb.models.XASData(
            structure_family="dataframe", metadata=metadata, data=data
        )

        request = c.context._client.build_request(
            "POST",
            "/xas",
            content=msgpack.packb(doc.dict()),
            headers={"content-type": "application/msgpack"},
        )
        r = c.context._send(request)
        if not r.status_code == 200:
            assert False

In [None]:
ingest_aimm_ncm_wanli(
    c, sample_ids, data_path / "wanli" / "Unimodal NCM622_712Al-doped_NCMA_Ni L3"
)

In [None]:
def ingest_aimm_core_wanli_oxygen_k(c, data_path):
    files = list((data_path / "O_K").glob("*.txt"))

    provenance = {"source": "wanli"}
    element = aimmdb.models.XDIElement(symbol="O", edge="K")

    for file in files:
        fname = file.name
        name = file.stem
        print(name)

        sample = aimmdb.models.SampleData(
            name=name, dataset="aimm_core", provenance=provenance
        )

        request = c.context._client.build_request(
            "POST", "/samples", json=sample.dict()
        )

        r = c.context._send(request)
        if not r.status_code == 200:
            print(r.json())
            assert False

        data = r.json()
        if "uid" in data:
            sample_id = data["uid"]
        else:
            raise RuntimeError(data)

        df = pd.read_csv(file, header=None, delimiter="\t", names=["energy", "mu"])
        data = aimmdb.models.DataFrameData.from_pandas(df)

        metadata = {}
        metadata["fname"] = fname

        metadata = aimmdb.models.XASMetadata(
            sample_id=sample_id,
            element=element,
            provenance=provenance,
            **metadata,
        )

        doc = aimmdb.models.XASData(
            structure_family="dataframe", metadata=metadata, data=data
        )

        request = c.context._client.build_request(
            "POST",
            "/xas",
            content=msgpack.packb(doc.dict()),
            headers={"content-type": "application/msgpack"},
        )
        r = c.context._send(request)
        if not r.status_code == 200:
            assert False

In [None]:
ingest_aimm_core_wanli_oxygen_k(c, data_root / "wanli" / "core")

In [None]:
def ingest_aimm_core_wanli_tm_l(c, data_path):

    provenance = {"source": "wanli"}

    for d in (data_path / "TM_L").iterdir():
        if d.is_file():
            continue
        symbol = d.stem
        element = aimmdb.models.XDIElement(symbol=symbol, edge="L")

        for file in d.glob("*.txt"):
            if file.stem.startswith("IgorPlot"):
                continue

            fname = file.name
            name = file.stem
            print(name)

            sample = aimmdb.models.SampleData(
                name=name, dataset="aimm_core", provenance=provenance
            )

            request = c.context._client.build_request(
                "POST", "/samples", json=sample.dict()
            )

            r = c.context._send(request)
            if not r.status_code == 200:
                print(r.json())
                assert False

            data = r.json()
            if "uid" in data:
                sample_id = data["uid"]
            else:
                raise RuntimeError(data)

            df = pd.read_csv(file, header=None, delimiter="\t", names=["energy", "mu"])
            data = aimmdb.models.DataFrameData.from_pandas(df)

            metadata = {}
            metadata["fname"] = fname

            metadata = aimmdb.models.XASMetadata(
                sample_id=sample_id,
                element=element,
                provenance=provenance,
                **metadata,
            )

            doc = aimmdb.models.XASData(
                structure_family="dataframe", metadata=metadata, data=data
            )

            request = c.context._client.build_request(
                "POST",
                "/xas",
                content=msgpack.packb(doc.dict()),
                headers={"content-type": "application/msgpack"},
            )
            r = c.context._send(request)
            if not r.status_code == 200:
                assert False

In [None]:
ingest_aimm_core_wanli_tm_l(c, data_root / "wanli" / "core")

In [None]:
def ingest_aimm_core_ni_metal(c, data_path):
    provenance = {"source": "wanli"}

    file = data_path / "Unimodal NCM622_712Al-doped_NCMA_Ni L3" / "Ni metal.txt"

    assert file.exists()
    
    fname = file.name
    name = file.stem
    print(name)

    sample = aimmdb.models.SampleData(
        name=name, dataset="aimm_core", provenance=provenance
    )

    request = c.context._client.build_request(
        "POST", "/samples", json=sample.dict()
    )

    r = c.context._send(request)
    if not r.status_code == 200:
        print(r.json())
        assert False

    data = r.json()
    if "uid" in data:
        sample_id = data["uid"]
    else:
        raise RuntimeError(data)  
  
    with open(file, "r") as f:
              df = read_wanli(f)

    metadata = {}
    metadata["fname"] = fname

    element = aimmdb.models.XDIElement(symbol="Ni", edge="L3")

    data = aimmdb.models.DataFrameData.from_pandas(df)

    metadata = aimmdb.models.XASMetadata(
        sample_id=sample_id,
        element=element,
        provenance=provenance,
        **metadata,
    )

    doc = aimmdb.models.XASData(
        structure_family="dataframe", metadata=metadata, data=data
    )

    request = c.context._client.build_request(
        "POST",
        "/xas",
        content=msgpack.packb(doc.dict()),
        headers={"content-type": "application/msgpack"},
    )
    r = c.context._send(request)
    if not r.status_code == 200:
        assert False

In [None]:
ingest_aimm_core_ni_metal(c, data_root / "NCM" / "wanli")