In [None]:
import pathlib
import shutil
import itertools
import getpass
import re
from collections import defaultdict

import pymongo
from pymongo import MongoClient

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import models
import serialization

In [None]:
from aimm_adapters.heald_labview import parse_heald_labview, normalize_dataframe, mangle_dup_names

In [None]:
mongo_uri = "mongodb://localhost:27017/aimm?authSource=admin"
mongo_pass = getpass.getpass()

In [None]:
data_path = pathlib.Path("/run/media/joseph/seagate/jkleinhenz/projects/aimm/data/NCM/")

In [None]:
db_name = "aimm"
client = MongoClient(mongo_uri, username="root", password=mongo_pass)
db = client[db_name]

In [None]:
# c = db.create_collection("import")
c = db["import"]

In [None]:
# Ni, Co, Mn, K edges

In [None]:
list(data_path.rglob("NCMBM24Ni*"))

samples = ["NCM622", "NCM712_Al", "NCMA"]

# cycle, voltage, charge state
values = [(0, 0, "DC"), (1, 4.3, "C"), (1, 4.8, "C"), (1, 3.0, "DC"), (2, 4.3, "C"), (2, 4.8, "C"), (10, 4.8, "C"), (10, 3.0, "DC")]
keys = ["cycle", "voltage", "state"]
params = [dict(zip(keys, v)) for v in values]

In [None]:
def get_folder_name(p):
  cycle, V, state = p["cycle"], p["voltage"], p["state"]
  
  if cycle == 0:
    assert V == 0
    assert state == "DC"
    return "pristine"
  else:
    return f"{cycle:02d}_{V:0.1f}V_{state}"

In [None]:
  print(i, p, sample)

In [None]:
def ingest_chenjun(c, data_path, atom, n):
  counter = defaultdict(int)
  
  for i, (charge, sample) in zip(range(1, n+1), itertools.cycle(itertools.product(params, samples))):
    path = data_path / "chenjun" / f"NCMBM24{atom}.{i:04d}"
    with open(path) as f:
      fname = path.name
      print(fname)
      df, metadata = parse_heald_labview(f)
      df, translation = normalize_dataframe(df, standardize=True)
      metadata["translation"] = translation
      df["mutrans"] = np.log(df["i0"] / df["itrans"])
      df["murefer"] = np.log(df["i0"] / df["irefer"])
      metadata["charge"] = charge
      metadata["sample"] = sample
      metadata["fname"] = fname

      element = models.XDIElement(symbol=atom, edge="K")

      data = models.DataFrameData.from_pandas(df)
      
      if charge and charge["cycle"] > 0:
        cycle, voltage, state = charge["cycle"], charge["voltage"], charge["state"]
        name = f"{element.symbol}-{element.edge}-cycle{cycle:d}-{voltage:0.1f}V-{state}"
      else:
        name = f"{element.symbol}-{element.edge}-pristine"

      counter[(sample,name)] += 1
      i = counter[(sample,name)]

      name += f"-{i}"

      print(sample, name)

      doc = models.XASMeasurement(
        name=name,
        element=element,
        metadata=metadata,
        data=data)

      c.insert_one(doc.dict())

In [None]:
ingest_chenjun(c, data_path, "Mn", 24)

In [None]:
atom = "Co"
# indices = list(range(1, 24+1))
indices = list(range(25, 34+1))

for i, (p, sample) in list(zip(indices, itertools.product(params, samples))):
  path = data_path / "chenjun" / f"NCMBM24{atom}.{i:04d}"
  with open(path) as f:
    fname = path.name
    print(fname)
    df, metadata = parse_heald_labview(f)
    df, translation = normalize_dataframe(df, standardize=True)
    metadata["translation"] = translation
    df["mutrans"] = np.log(df["i0"] / df["itrans"])
    df["murefer"] = np.log(df["i0"] / df["irefer"])
    metadata["charge"] = p
    metadata["sample"] = sample
    metadata["fname"] = fname
    
    element = models.XDIElement(symbol=atom, edge="K")
    
    data = models.DataFrameData.from_pandas(df)
    
    doc = models.XASMeasurement(
      element=element,
      metadata=metadata,
      data=data)
    
    c.insert_one(doc.dict())

In [None]:
models.XASMeasurement.parse_obj(c.find_one({}))

In [None]:
list(c.find({}, {"metadata.sample" : 1, "_id" : 0}))

In [None]:
serialization.deserialize_parquet(c.find_one({})["data"]["blob"])

In [None]:
fig, ax = plt.subplots()
df.plot("energy", "mutrans", ax=ax)
df.plot("energy", "murefer", ax=ax)

In [None]:
def read_header(f):
  header = ""
  for line in f:
    if line.startswith("Time (s)"):
      header = line.split("\t")
      return header
  

def read_wanli(f):
  names = read_header(f)
  names = mangle_dup_names(names)
  df = pd.read_csv(f, sep="\t", names=names)
  
  translation = {"Mono Energy" : "energy", "Counter 3" : "i0", "Counter 1" : "tey", "Counter 2" : "tfy", "Counter 0" : "i0_alt"}
  df = df.rename(columns=translation)[list(translation.values())]
  
  df["mu_tfy"] = df["tfy"] / df["i0"]
  df["mu_tey"] = df["tey"] / df["i0"]
  
  return df

def parse_filename(name):
  if "622" in name:
    sample = "NCM622"
  elif "NCMA" in name:
    sample = "NCMA"
  elif "712" in name:
    sample = "NCM712_Al"
  elif "metal" in name:
    sample = "Ni_metal"
  else:
    raise KeyError(f"unable to parse sample from {name}")
    
  if sample == "Ni_metal":
    charge = None
  elif "Pristine" in name:
    charge = (0, 0.0, "DC")
  else:
    if "1st" in name:
      cycle = 1
    elif "2nd" in name:
      cycle = 2
    elif "10th" in name:
      cycle = 10
    else:
      raise KeyError(f"unable to parse cycle from {name}")
      
    voltage_str = re.search("(\d*)V", name)[0]
    if voltage_str == "43V":
      voltage = 4.3
      state = "C"
    elif voltage_str == "48V":
      voltage = 4.8
      state = "C"
    elif voltage_str == "3V":
      voltage = 3.0
      state = "DC"
    else:
      raise KeyError(f"unable to parse voltage from {voltage_str}")
    
    charge = (cycle, voltage, state)

  if charge:
    charge = dict(zip(keys, charge))
  return sample, charge

In [None]:
files = list((data_path / "wanli" / "Unimodal NCM622_712Al-doped_NCMA_Ni L3").glob("*.txt"))

In [None]:
def ingest_wanli(c, files):
  counter = defaultdict(int)
  for file in files:
    fname = file.name
    sample, charge = parse_filename(fname)

    metadata = {}
    element = models.XDIElement(symbol="Ni", edge="L3")

    with open(file, "r") as f:
      df = read_wanli(f)

    metadata["charge"] = charge
    metadata["sample"] = sample
    metadata["fname"] = fname

    data = models.DataFrameData.from_pandas(df)

    if charge and charge["cycle"] > 0:
      cycle, voltage, state = charge["cycle"], charge["voltage"], charge["state"]
      name = f"{element.symbol}-{element.edge}-cycle{cycle:d}-{voltage:0.1f}V-{state}"
    else:
      name = f"{element.symbol}-{element.edge}-pristine"
      
    counter[(sample,name)] += 1
    i = counter[(sample,name)]
    
    name += f"-{i}"

    print(sample, name)

    doc = models.XASMeasurement(
      name=name,
      element=element,
      metadata=metadata,
      data=data)

    c.insert_one(doc.dict())

In [None]:
ingest_wanli(c, files)

In [None]:
c.delete_many({"element.edge" : "L3"})

In [None]:
fig, ax = plt.subplots()

x.plot("energy", "mu_tfy", ax=ax)
x.plot("energy", "mu_tey", ax=ax)

In [None]:
c.find({}).distinct("metadata.sample")

In [None]:
c.find({}).distinct("metadata.charge")

In [None]:
db["spike"].count_documents({})

In [None]:
tiled_collection = db["tiled"]

In [None]:
list(tiled_collection.find({}))

In [None]:
import util

In [None]:
tiled_collection.delete_many({})

In [None]:
util.mk_path(tiled_collection, ["NCM", "BM"])

In [None]:
list(tiled_collection.find({}))

In [None]:
g = c.aggregate([
  {"$group" : {"_id" : "$metadata.sample", "measurements" : {"$push": "$$ROOT"}}},
  {"$project" : {"_id" : 0, "metadata.sample" : "$_id", "measurements" : "$measurements"}},
])

for x in g:
  sample_name = x["metadata"]["sample"]
  if sample_name.startswith("NCM"):
    x["metadata"]["particle_size_configuration"] = "BM"
  print(sample_name)
  measurements = list(map(lambda x: models.XASMeasurement.parse_obj(x), x["measurements"]))
  
  path = "/" + "/".join(["NCM", "BM", sample_name])
  sample = models.Sample(name=sample_name, folder=False, path=path, metadata=x["metadata"], measurements=measurements)
  tiled_collection.insert_one(sample.dict())

In [None]:
path = "/NCM/"
list(tiled_collection.find({"$and" : [{"path" : {"$regex" : f"^{path}[^/]*$"}}]}))

In [None]:
list(tiled_collection.find({}))

In [None]:
from pathlib import PurePosixPath

In [None]:
p = PurePosixPath("/")

In [None]:
str((p / "abc" / "123" / "fdsafsda"))

In [None]:
PurePosixPath("fdasfda")