In [1]:
import os
from dotenv import load_dotenv
#import sys
#import time
#import struct
#import glob
#import importlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

MONGO_DBR_URI = os.getenv('MONGO_DBR_URI')

#from collections import Counter

#import libMaker as lm

In [2]:
uri = MONGO_DBR_URI

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

db = client["upwins_db"]
view_name = "spectral_library"
spectral_library = db[view_name]

Pinged your deployment. You successfully connected to MongoDB!


In [3]:
records = spectral_library.find()
df = pd.DataFrame(records)

In [4]:
names = df['ASD UPWINS base_fname'].to_numpy()
wl = np.array(list(df['spectrum'][0].keys()), dtype=float)
spectra = np.array([list(spectrum.values()) for spectrum in df['spectrum']])

In [5]:
# remove spectra with large derivate, which are the spectra collected using sunlight
derivitive = np.mean(np.abs(spectra[:,1:]-spectra[:,:-1]), axis=1)
select_indices = np.where(derivitive<0.0025)[0]
# create variables for the library data 
names = names[select_indices]
spectra = spectra[select_indices,:]
#wl = np.asarray(lib.bands.centers)
# create variables for the library metadata 
nSpec = len(spectra)
nBands = len(wl)
nNames = len(names)

print(' ')
print('...remove spectra collected using outdoor solar illumination...')
print(' ')
print(f'Number of names: {nNames}')
print(f'Number of spectra: {nSpec}')
print(f'Number of bands: {nBands}')

 
...remove spectra collected using outdoor solar illumination...
 
Number of names: 1020
Number of spectra: 1020
Number of bands: 2151


In [133]:
# Extract the metadata as numpy arrays
genus = []
species = []
principle_part = []
health = []
growth_stage = []
DateTimeId = []
month = []
plant_type = []
name = []
name_full_category = []
name_genus_species = []
select_indices = []
for i in range(len(df)):
    
    try:

        row = df.loc[df['ASD UPWINS base_fname'] == names[i]]

        # if the spectrum has a corresponding row in the csv
        if (row['sub-category'].values[0] not in ['NA','backrounds']) & (row['category'].values[0] in ['vegetation','target_vegetation','soil','road']):
            select_indices.append(i)
            plant_type.append(row['sub-category'].values[0])
            genus.append(row['genus'].values[0])
            species.append(row['species'].values[0])
            principle_part.append(row['principal_part'].values[0])
            health.append(row['health'].values[0])
            growth_stage.append(row['growth_stage'].values[0])
            name_genus_species.append(row['genus'].values[0]+'_'+row['species'].values[0])
            DateTimeId.append(row['DateTimeUniqueIdentifier'].values[0])
            month.append(row['DateTimeUniqueIdentifier'].values[0][4:6])
            if row['genus'].values[0]=='NA':
                #not in our primary target vegetation library
                name.append(row['sub-category'].values[0])
                name_full_category.append(row['sub-category'].values[0]+'_'+row['principal_part'].values[0]+'_'+row['health'].values[0]+'_'+row['growth_stage'].values[0])
            else:
                name.append(row['genus'].values[0]+'_'+row['species'].values[0])
                name_full_category.append(row['genus'].values[0]+'_'+row['species'].values[0]+'_'+row['principal_part'].values[0]+'_'+row['health'].values[0]+'_'+row['growth_stage'].values[0])
    except:
        continue

print(len(select_indices))

names = names[select_indices]
spectra = spectra[select_indices,:]
# create variables for the library metadata 
nSpec = len(names)
nBands = len(wl)

print(f'Number of spectra: {nSpec}')
print(f'Number of bands: {nBands}')
genus = np.asarray(genus)
species = np.asarray(species)
principle_part = np.asarray(principle_part)
health = np.asarray(health)
growth_stage = np.asarray(growth_stage)
DateTimeId = np.asarray(DateTimeId)
month = np.asarray(month)
name = np.asarray(name)
name_genus_species = np.asarray(name_genus_species)
name_full_category = np.asarray(name_full_category)
plant_type = np.asarray(plant_type)


1020
Number of spectra: 1020
Number of bands: 2151


In [20]:
# save numpy variables for the spectra and metadata
np.save('data/spectra', spectra)
np.save('data/wl', wl)
np.save('data/genus', genus)
np.save('data/species', species)
np.save('data/principle_part', principle_part)
np.save('data/health', health)
np.save('data/growth_stage', growth_stage)
np.save('data/DateTimeId', DateTimeId)
np.save('data/month', month)
np.save('data/name', name) # the genus and species if known, otherwise the common name (or material name for soil, road, etc.)
np.save('data/name_genus_species', name_genus_species) # the genus and species if known - otherwise NA
np.save('data/name_full_category', name_full_category) # all the metadata available in one string
np.save('data/plant_type', plant_type) # the type of plant (tree, shrub, grass, etc.) - otherwise NA