In [1]:
import numpy as np
import pandas as pd
import pickle
import os

In [2]:
import os

def find_roi_files(root_dir):
    string_list = ['.pkl', 'roi']
    
    matching_files = []
    for root, _, files in os.walk(root_dir):
        for filename in files:
            if all(string in filename.lower() for string in string_list):
                matching_files.append(os.path.join(root, filename))
    return matching_files

In [3]:
# codes for species with information and health\growth-stage\etc..
plant_codes = {
    'Ammo_bre': ['Ammophila', 'breviligulata', 'American Beachgrass', 'grass', 'https://en.wikipedia.org/wiki/Ammophila_breviligulata'],
    'Chas_lat': ['Chasmanthium', 'latifolium', 'River Oats', 'grass', 'https://en.wikipedia.org/wiki/Chasmanthium_latifolium'],
    'Pani_ama': ['Panicum', 'amarum', 'Coastal Panic Grass', 'grass', 'https://en.wikipedia.org/wiki/Panicum_amarum'],
    'Pani_vir': ['Panicum', 'virgatum', 'Switch Grass', 'grass', 'https://en.wikipedia.org/wiki/Panicum_virgatum'],
    'Soli_sem': ['Solidago', 'sempervirens', 'Seaside Goldenrod', 'succulent', 'https://en.wikipedia.org/wiki/Chasmanthium_latifolium'],
    'Robi_his': ['Robinia', 'hispida', 'Bristly locust', 'shrub', 'https://en.wikipedia.org/wiki/Robinia_hispida'],
    'More_pen': ['Morella', 'pennsylvanica', 'Bristly locust', 'shrub', 'https://en.wikipedia.org/wiki/Myrica_pensylvanica'],    
    'Rosa_rug': ['Rosa', 'rugosa', 'Sandy Beach Rose', 'shrub', 'https://en.wikipedia.org/wiki/Rosa_rugosa'],
    'Cham_fas': ['Chamaecrista', 'fasciculata', 'Partridge Pea', 'legume', 'https://en.wikipedia.org/wiki/Chamaecrista_fasciculata'],
    'Soli_rug': ['Solidago', 'rugosa', 'Wrinkleleaf goldenrod', 'shrub', 'https://en.wikipedia.org/wiki/Solidago_rugosa'],
    'Bacc_hal': ['Baccharis', 'halimifolia', 'Groundseltree', 'shrub', 'https://en.wikipedia.org/wiki/Baccharis_halimifolia'],
    'Iva_fru_': ['Iva', 'frutescens', 'Jesuits Bark ', 'shrub', 'https://en.wikipedia.org/wiki/Iva_frutescens'],
    'Ilex_vom': ['Ilex', 'vomitoria', 'Yaupon Holly', 'evergreen shrub', 'https://en.wikipedia.org/wiki/Ilex_vomitoria']
}  
age_codes = {  
    'PE': ['Post Germination Emergence', 'PE'],
	#'RE': ['Re-emergence', 'RE'],
    #'RE': ['Year 1 growth', '1G'],
	#'E': ['Emergence (from seed)', 'E'],
    'E': ['Post Germination Emergence', 'PE'],
	#'D': ['Dormant', 'D'],
	'1G': ['Year 1 growth', '1G'],
    '2G': ['Year 2 growth', '2G'],
	#'1F': ['Year 1 Flowering', '1F'],
    'J': ['Juvenile', 'J'],
	'M': ['Mature', 'M']
}
principal_part_codes = {  
    'MX': ['Mix', 'MX'],
    #'S': ['Seed', 'SE'],
	#'SA': ['Shoot Apex', 'SA'],
    'SA': ['Internode Stem', 'ST'],
	'L': ['Leaf/Blade', 'L'],
	#'IS': ['Internode Stem', 'IS'],
    'ST': ['Internode Stem', 'ST'],
    'SP': ['Sprout', 'SP'],
	#'CS': ['Colar Sprout', 'CS'],
    'CS': ['Sprout', 'SP'],
	#'RS': ['Root Sprout', 'RS'],
    'RS': ['Sprout', 'SP'],
	'LG': ['Lignin', 'LG'],
	'FL': ['Flower', 'FL'],
    #'B': ['Blade', 'B'],
	'B': ['Leaf/Blade', 'L'],
    'FR': ['Fruit', 'FR'],
	#'S': ['Seed', 'SE'], #moved above because 'S' is in other codes; this is an old code
    'SE': ['Seed', 'SE'],
	#'St': ['Stalk', 'St']
}
health_codes = {
    'MH': ['Healthy/Unhealthy Mix', 'MH'],
	'DS': ['Drought Stress', 'DS'],
	'SS': ['Salt Stress (soak)', 'SS'],
    'SY': ['Salt Stress (spray)', 'SY'],
	'S': ['Stressed', 'S'],
    'LLRZ': ['LLRZ Lab Stress', 'LLRZ'],
	#'D': ['Dormant', 'D'],
    'R': ['Rust', 'R'],
    'H': ['Healthy', 'H']
}

lifecycle_codes = { 
	'D': ['Dormant', 'D'],
    'RE': ['Re-emergence', 'RE'],
    'FLG': ['Flowering', 'FLG'],
    'FRG': ['Fruiting', 'FRG'],
    "FFG": ['Fruiting and Flowering', 'FFG'],
    'N': ['Neither', 'N']
}

# data lists

d_spectra = []
d_plant = []
d_part = []
d_health = []
d_age = []
d_lifecycle = []

yd_all_dict = {
    'plant': d_plant,
    'age': d_age,
    'part': d_part,
    'health': d_health,
    'lifecycle': d_lifecycle
}

code_category_dict = {
    'plant': plant_codes,
    'age': age_codes,
    'part': principal_part_codes,
    'health': health_codes,
    'lifecycle': lifecycle_codes
}

In [4]:
roi_files = find_roi_files('data/pkl/rois/')

print(f"Number of ROI files found: {len(roi_files)}")

for roi_filename in roi_files:
   # Unpickling the dictionary
    with open(roi_filename, 'rb') as f:
        roiData = pickle.load(f)
        roi_df = roiData.df # a DataFrame holding all the data for the ROI

    spectra = roi_df.to_numpy()[:,4:]
    spectra = spectra.astype(np.float32)
    spectra_names = roi_df['Name'].to_numpy()

    roi_names = roiData.names # the names of the ROIs

    print(f"Number of ROIs found in {roi_filename}: {len(roi_names)}")

    for name in roi_names:
        class_spectra = spectra[spectra_names==name]

        if name[-1] != '_':
            name = name + '_'

        #print(name)

        # parse name for metadata
        class_data_dict = {}          
        
        for cat, codes in code_category_dict.items():
            class_data_dict[cat] = '-1'
            for key, value in codes.items():
                if cat == 'plant':
                    if name[:8].lower()==key.lower():
                        class_data_dict[cat] = value[0] + '_' + value[1]
                else:
                    if '_'+key+'_' in name:
                        #print(f'here: {name}')
                        class_data_dict[cat] = value[1]
                        #print(key, class_data_dict[cat])
                        #print(class_data_dict)

        #print(class_data_dict)

        # for each spectrum in class_spectra, append to each list
        for spectrum in class_spectra:
            d_spectra.append(spectrum)
            
            for key in yd_all_dict:
                #print(key)
                #print(len(yd_all_dict[key]))
                #print(yd_all_dict[key])
                yd_all_dict[key].append(class_data_dict[key])
                #print(len(yd_all_dict[key]))


Number of ROI files found: 1
Number of ROIs found in data/pkl/rois/ROIs_4-25_Ilex_vom.pkl: 2


In [5]:
print(len(d_spectra))

for key in yd_all_dict:
    print(len(yd_all_dict[key]))

2506
2506
2506
2506
2506
2506


In [6]:
print(roi_names)

['Ilex_vom_1G-FL', 'Ilex_vom_J']


In [7]:
#print(spectra_names[2505])
print(d_spectra[2505])
for key in yd_all_dict:
    print(key, yd_all_dict[key][2505])

[ 0.14013967  0.12249807  0.1625661   0.11155654 -0.00356607  0.11247256
  0.03450296  0.04424099  0.05918107  0.03874531  0.07475629  0.04366582
  0.06613352  0.04818413  0.07135563  0.0797596   0.04574396  0.06271511
  0.0771033   0.02558763  0.04790235  0.05029211  0.07983825  0.05385766
  0.04666815  0.05226171  0.04608772  0.06354913  0.03709427  0.0617037
  0.05559583  0.04997871  0.03684599  0.03880306  0.04616464  0.06713266
  0.04615017  0.05755078  0.05911346  0.06523842  0.05877242  0.05359749
  0.05121133  0.05942667  0.04706436  0.05448588  0.04399405  0.06656574
  0.05501363  0.06873445  0.06475929  0.0757568   0.07243631  0.08855163
  0.09842151  0.1009552   0.1113176   0.11622603  0.12783916  0.12490982
  0.13170357  0.1393295   0.14042693  0.146815    0.15629038  0.1544635
  0.15365875  0.15495467  0.15107535  0.15176056  0.15092959  0.17237388
  0.14900811  0.15034378  0.15691485  0.14706612  0.14113092  0.13542119
  0.13954699  0.12394089  0.11552539  0.11308115  0.1

In [8]:
d_spectra = np.asarray(d_spectra)
print(d_spectra.shape)

for key in yd_all_dict:
    yd_all_dict[key] = np.asarray(yd_all_dict[key])
    print(key, yd_all_dict[key].shape)


(2506, 272)
plant (2506,)
age (2506,)
part (2506,)
health (2506,)
lifecycle (2506,)
