In [141]:
# environment set up
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Reshape, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import tensorflow as tf # only use tensorflow keras
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import pandas as pd
from spectra_ml.io_ import load_spectra_metadata

In [142]:
spectrum_len = 500 # automate this
parent_dir = os.environ['PWD']
stddata_path = os.path.join(os.environ['DATA_DIR'], "StdData-" + str(spectrum_len))
os.chdir(os.path.join(parent_dir, "lab-notebook", "smunukutla"))

In [143]:
metadata = load_spectra_metadata(os.path.join(stddata_path,"spectra-metadata.csv"))

metadata = metadata[metadata['value_type'] == "reflectance"]
metadata = metadata[~metadata['spectrometer_purity_code'].str.contains("NIC4")]
metadata = metadata[metadata['raw_data_path'].str.contains("ChapterM")]

metadata.head()

Unnamed: 0,spectrum_id,value_type,material,spectrometer_purity_code,measurement_type,raw_data_path
1210,9344,reflectance,Quartz HS32.3B,ASDFRc,AREF,ChapterM_Minerals/splib07a_Quartz_HS32.3B_ASDF...
1211,195,reflectance,Actinolite HS315.2B,ASDFRb,AREF,ChapterM_Minerals/splib07a_Actinolite_HS315.2B...
1212,2780,reflectance,Chromite HS281.3B,ASDFRc,AREF,ChapterM_Minerals/splib07a_Chromite_HS281.3B_A...
1213,7185,reflectance,Montmorillonite CM20,BECKb,AREF,ChapterM_Minerals/splib07a_Montmorillonite_CM2...
1215,5827,reflectance,Jarosite NMNH95074-1 (Na),BECKb,AREF,ChapterM_Minerals/splib07a_Jarosite_NMNH95074-...


In [144]:
nan_records = set()
x = 0
for i in range(metadata.shape[0]):
    spectrum = pd.read_csv(os.path.join(stddata_path,"{}.csv".format(str(metadata.iloc[i, 0]))))
    for j in range(spectrum.shape[0]):
        if np.isnan(spectrum.iloc[j, 1]):
            print(str(metadata.iloc[i, 3]) + " " + str(metadata.iloc[i, 0]) + " " + str(metadata.iloc[i, 2]))
            nan_records.add(metadata.iloc[i, 0])
            x += 1
            break

print(x)

ASDFRb 08927 Psilomelane HS139.1B
BECKb 07311 Montmorillonite STx-1
ASDFRb 08962 Psilomelane HS139.4B
BECKc 06856 Microcline HS103.3B Feldspar
BECKa 06792 Mascagnite GDS65.b (fr)
ASDFRb 08943 Psilomelane HS139.3B
ASDFRb 08933 Psilomelane HS139.2B
7


In [146]:
nan_records

{'06792', '06856', '07311', '08927', '08933', '08943', '08962'}

In [149]:
# for row in range(metadata.iloc[:, 0].shape[0]):
#     print(metadata.index[row])
#     print(metadata.iat[row, 0])

In [150]:
def removenans(metadata, nan_set):
#     cols = ['index', 'notnan']
    indices = []
    ret = []
    for row in range(metadata.iloc[:, 0].shape[0]):
        indices.append(metadata.index[row])
        if metadata.iat[row, 0] in nan_set:
            ret.append(False)
        else:
            ret.append(True)
    ret = pd.Series(ret, index=indices)
    return ret

In [153]:
# for y in metadata:
#     print(y)

In [154]:
removenans(metadata, nan_records)

1210    True
1211    True
1212    True
1213    True
1215    True
1217    True
1218    True
1219    True
1220    True
1221    True
1222    True
1225    True
1228    True
1229    True
1230    True
1231    True
1232    True
1234    True
1235    True
1236    True
1239    True
1240    True
1241    True
1242    True
1243    True
1244    True
1246    True
1248    True
1250    True
1252    True
        ... 
2443    True
2444    True
2445    True
2446    True
2447    True
2449    True
2450    True
2452    True
2453    True
2457    True
2458    True
2460    True
2461    True
2462    True
2464    True
2467    True
2468    True
2469    True
2470    True
2471    True
2472    True
2473    True
2474    True
2476    True
2478    True
2479    True
2480    True
2481    True
2483    True
2484    True
Length: 887, dtype: bool

In [155]:
metadata = metadata[removenans(metadata, nan_records)]

In [156]:
metadata

Unnamed: 0,spectrum_id,value_type,material,spectrometer_purity_code,measurement_type,raw_data_path
1210,09344,reflectance,Quartz HS32.3B,ASDFRc,AREF,ChapterM_Minerals/splib07a_Quartz_HS32.3B_ASDF...
1211,00195,reflectance,Actinolite HS315.2B,ASDFRb,AREF,ChapterM_Minerals/splib07a_Actinolite_HS315.2B...
1212,02780,reflectance,Chromite HS281.3B,ASDFRc,AREF,ChapterM_Minerals/splib07a_Chromite_HS281.3B_A...
1213,07185,reflectance,Montmorillonite CM20,BECKb,AREF,ChapterM_Minerals/splib07a_Montmorillonite_CM2...
1215,05827,reflectance,Jarosite NMNH95074-1 (Na),BECKb,AREF,ChapterM_Minerals/splib07a_Jarosite_NMNH95074-...
1217,10141,reflectance,Sphalerite S26-35,BECKb,AREF,ChapterM_Minerals/splib07a_Sphalerite_S26-35_B...
1218,09530,reflectance,Riebeckite NMNH122689 Amph,BECKa,AREF,ChapterM_Minerals/splib07a_Riebeckite_NMNH1226...
1219,04139,reflectance,Galena S102-1B,BECKb,AREF,ChapterM_Minerals/splib07a_Galena_S102-1B_BECK...
1220,03859,reflectance,Enstatite NMNH128288,BECKc,AREF,ChapterM_Minerals/splib07a_Enstatite_NMNH12828...
1221,08438,reflectance,Orthoclase NMNH113188,BECKb,AREF,ChapterM_Minerals/splib07a_Orthoclase_NMNH1131...


In [157]:
num_nic = 0
for i in range(metadata.shape[0]):
    if metadata.iloc[i, 3].find("NIC") != -1:
        num_nic += 1

print(num_nic)

0
