In [1]:
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Reshape, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import pandas as pd
import ast
from spectra_ml.io_ import load_spectra_metadata

In [2]:
spectrum_len = 500 # automate this
parent_dir = os.environ['PWD']
stddata_path = os.path.join(os.environ['DATA_DIR'], "StdData-" + str(spectrum_len))
os.chdir(os.path.join(parent_dir, "lab-notebook", "smunukutla"))

In [3]:
metadata = load_spectra_metadata(os.path.join(stddata_path,"spectra-metadata.csv"))

metadata = metadata[metadata['value_type'] == "reflectance"]
metadata = metadata[~metadata['spectrometer_purity_code'].str.contains("NIC4")]
metadata = metadata[metadata['raw_data_path'].str.contains("ChapterM")] # add in ChapterS Soils and Mixtures later

In [4]:
metadata.sort_values('material',inplace=True)

In [5]:
print(metadata.to_string())

     spectrum_id   value_type                      material spectrometer_purity_code measurement_type                                      raw_data_path
1692       00099  reflectance    Acmite NMNH133746 Pyroxene                    BECKa             AREF  ChapterM_Minerals/splib07a_Acmite_NMNH133746_P...
1545       00108  reflectance           Actinolite HS116.1B                   ASDFRb             AREF  ChapterM_Minerals/splib07a_Actinolite_HS116.1B...
1936       00114  reflectance           Actinolite HS116.2B                   ASDFRb             AREF  ChapterM_Minerals/splib07a_Actinolite_HS116.2B...
1372       00120  reflectance           Actinolite HS116.3B                    BECKb             AREF  ChapterM_Minerals/splib07a_Actinolite_HS116.3B...
2289       00124  reflectance           Actinolite HS116.3B                   ASDFRb             AREF  ChapterM_Minerals/splib07a_Actinolite_HS116.3B...
1441       00130  reflectance           Actinolite HS116.4B                   ASDF

In [6]:
names = []
num = []

frame = pd.DataFrame(columns=['material', 'count'])

series = metadata['material']
series = series.apply(lambda x: x.split(" ")[0])

series = series.value_counts()
# series = series.to_frame()

frame['count'] = series.values
frame['material'] = series.index
# frame = frame[frame['count'] >= 12]
# series.columns = ['count']
# series['material'] = series.index
# series.reset_index([], inplace=True)
# # series.columns = ['material', 'count']

# # for i in range(counts.size):
# #     print(counts.index[i] + " " + str(counts[i]))
# series
frame.iloc[:, 0].tolist()

print(frame.to_string())

                 material  count
0                   Topaz     25
1                 Olivine     24
2               Muscovite     20
3                  Albite     17
4              Actinolite     16
5              Microcline     15
6                Diopside     13
7                 Alunite     13
8              Serpentine     12
9                Jarosite     12
10               Hematite     12
11              Almandine     11
12              Kaolinite     11
13              Andradite     11
14               Chlorite     11
15        Montmorillonite     10
16             Hornblende     10
17             Lepidolite     10
18              Magnetite     10
19            Hypersthene      9
20             Antigorite      9
21                 Illite      9
22                   Talc      9
23               Goethite      8
24             Phlogopite      8
25             Richterite      7
26                 Galena      7
27                Epidote      7
28                  Beryl      7
29        

In [7]:
dictionary = {frame.iloc[:, 0].tolist()[i] : i for i in range(len(frame.iloc[:, 0].tolist()))}
dictionary

{'Topaz': 0,
 'Olivine': 1,
 'Muscovite': 2,
 'Albite': 3,
 'Actinolite': 4,
 'Microcline': 5,
 'Diopside': 6,
 'Alunite': 7,
 'Serpentine': 8,
 'Jarosite': 9,
 'Hematite': 10,
 'Almandine': 11,
 'Kaolinite': 12,
 'Andradite': 13,
 'Chlorite': 14,
 'Montmorillonite': 15,
 'Hornblende': 16,
 'Lepidolite': 17,
 'Magnetite': 18,
 'Hypersthene': 19,
 'Antigorite': 20,
 'Illite': 21,
 'Talc': 22,
 'Goethite': 23,
 'Phlogopite': 24,
 'Richterite': 25,
 'Galena': 26,
 'Epidote': 27,
 'Beryl': 28,
 'Stilbite': 29,
 'Quartz': 30,
 'Gibbsite': 31,
 'Gypsum': 32,
 'Bytownite': 33,
 'Anorthite': 34,
 'Axinite': 35,
 'Jadeite': 36,
 'Siderite': 37,
 'Pyrophyllite': 38,
 'Knebelite': 39,
 'Carnallite': 40,
 'Halite': 41,
 'Chalcopyrite': 42,
 'Cummingtonite': 43,
 'Natrolite': 44,
 'Perthite': 45,
 'Smaragdite': 46,
 'Zoisite': 47,
 'Sphalerite': 48,
 'Allanite': 49,
 'Thenardite': 50,
 'Uralite': 51,
 'Tremolite': 52,
 'Diaspore': 53,
 'Pyrite': 54,
 'Glaucophane': 55,
 'Dolomite': 56,
 'Vesuvianit