In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import os
import cv2
import pandas as pd

def convert_coordinates(df):
    min_x_micro, min_y_micro,max_x_micro, max_y_micro = df['x'].min(), df['y'].min(),df['x'].max(), df['y'].max()
    pixel_length_x = df['x'].drop_duplicates().sort_values().diff().value_counts().idxmax()
    pixel_length_y = df['y'].drop_duplicates().sort_values().diff().value_counts().idxmax()

    df_converted = df.copy()
    df_converted['x'] = ((df_converted['x'] - min_x_micro) / pixel_length_x).round().astype(int)
    df_converted['y'] = ((df_converted['y'] - min_y_micro) / pixel_length_y).round().astype(int)
    return df_converted,(min_y_micro, min_x_micro,max_y_micro, max_x_micro), (pixel_length_y, pixel_length_x)

def create_intensity_image(df_intensity, df_coordinates, mz_value,
                          norm=True,denoise=False, smooth=True, smooth_method='gaussian', kernel_size=3):
    
    intensities = df_intensity[df_intensity['m/z'] == mz_value].iloc[0, 1:]
    #df_coordinates already converted and start from (0,0)
    max_x, max_y = df_coordinates['x'].max(), df_coordinates['y'].max()
    image = np.zeros((max_y  + 1, max_x  + 1))

    for spot, intensity in intensities.items():
        spot_number = int(spot.split(' ')[1])
        x, y = df_coordinates[df_coordinates['Spot'] == spot_number][['x', 'y']].iloc[0]
        image[y,x] = intensity
    
    if norm:
        image = (image - np.min(image)) / (np.max(image) - np.min(image))
#     if image.dtype != np.uint8:
#         image = (255 * image).astype(np.uint8)
    if denoise:
        image = cv2.fastNlMeansDenoising(image, None)
    if smooth:
        if smooth_method == 'gaussian':
            image = cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
        elif smooth_method == 'median':
            image = cv2.medianBlur(image, kernel_size)

    return image

def find_closest_mz_name(mz_value, df_feature_list):
    # Find the closest m/z value
    closest_match = df_feature_list.iloc[(df_feature_list['m/z'] - mz_value).abs().argsort()[:1]]
    return closest_match['Name'].values[0]



In [2]:
import pandas as pd
import matplotlib.image as mpimg
import os
import matplotlib.lines as mlines
import seaborn as sns
import matplotlib.pyplot as plt

def getImages(parent_dir,file_intensity,file_coordinates,feature_list,
              skip_rows_featureList,skip_rows_intensity,skip_rows_coordinates,verbose=True):
    
    #Data loading
    file_intensity_path = os.path.join(parent_dir,file_intensity)
    file_coordinates_path = os.path.join(parent_dir,file_coordinates)
    feature_list_path = os.path.join(parent_dir,feature_list)
    
    df_feature_list = pd.read_csv(feature_list_path, delimiter=';', skiprows=skip_rows_featureList)
    df_intensity = pd.read_csv(file_intensity_path, skiprows=skip_rows_intensity, header=0, delimiter=';')
    df_coordinates = pd.read_csv(file_coordinates_path, skiprows=skip_rows_coordinates, delimiter=';')
    df_coordinates.columns = ['Spot', 'x', 'y']
    #!!!!!mismatch between the 2 df(regions start from spot 0, intensities start from spot 1)
    df_coordinates['Spot'] = df_coordinates['Spot']+1

    #select only the spots used in df_intensities(ASSUME df_coordinates['Spot'] > spot_numbers)
    spot_numbers = [int(col.split(' ')[1]) for col in df_intensity.columns if 'Spot' in col]
    selected_coordinates = df_coordinates[df_coordinates['Spot'].isin(spot_numbers)]
    
    #convert df_coordinates from micro to pixel, and start from (0,0)
    df_converted_coordinates,(min_y_micro, min_x_micro,max_y_micro, max_x_micro), (pixel_length_y, pixel_length_x)\
                                                                    =convert_coordinates(selected_coordinates)
    num_mz_values = len(df_intensity['m/z'])
    print(f'parent dir {parent_dir} ')
    print(f'file_coordinates {file_coordinates} \n \
            top left&Bottom right {(min_y_micro, min_x_micro,max_y_micro, max_x_micro)} \n \
          pixel length in microns: {(pixel_length_y, pixel_length_x)}')
    print(f'\n feature list {feature_list} \n total number of m/z values: {num_mz_values}')

    image_dict = {}

    for idx, mz_value in enumerate(df_intensity['m/z']):
        mz_name = find_closest_mz_name(mz_value, df_feature_list)
        mz_name = mz_name.replace(":","-")
        image = create_intensity_image(df_intensity, df_converted_coordinates, mz_value)
        image_dict[mz_name] = image
        if not verbose:
            print(f'moclecule {mz_name}:{mz_value} m/z done')
    return image_dict


In [3]:
parent_dir='/mnt/data10/shared/yujie/LIPID_data/RawData/NA/A2/Met/230703Spheroids-6-A2-Laser70_left_NA_rawDATA_inner'
file_intensity = 'inner region-Total Ion Count_col.csv'
file_coordinates = '230703Spheroids-6-A2-Laser70_left_NA_rawDATA_regionSpots.csv'
feature_list = 'K17Metabolites-Raafat-FeatureList-adjustedglucose.csv'
skip_rows_featureList = 8
skip_rows_intensity = 10
skip_rows_coordinates = 8
image_dict_met = getImages(parent_dir,file_intensity,file_coordinates,feature_list,
                       skip_rows_featureList,skip_rows_intensity,skip_rows_coordinates)
image_dict_met.pop('Cytidine', None)#its empty for cytidine


parent_dir='/mnt/data10/shared/yujie/LIPID_data/RawData/NA/A2/Cer/Updated-230622Spheroids-5-A2-Laser30_left_NA_rawDATA_inner'
file_intensity = 'inner region-Total Ion Count_col.csv'
file_coordinates = '230622Spheroids-5-A2_left_NA_rawDATA_regionSpots.csv'
feature_list = '05-PeakList-PyMTSphingolipidProfile-Neg-3.csv'
skip_rows_featureList = 8
skip_rows_intensity = 10
skip_rows_coordinates = 8
image_dict_cer = getImages(parent_dir,file_intensity,file_coordinates,feature_list,
                       skip_rows_featureList,skip_rows_intensity,skip_rows_coordinates)
parent_dir='/mnt/data10/shared/yujie/LIPID_data/RawData/NA/A2/SM/Updated-230616Spheroids-4-A2_left_NA_rawDATA_inner'
file_intensity = 'Inner-Total Ion Count_col.csv'
file_coordinates = '230616Spheroids-4-A2_left_NA_rawDATA_regionSpots.csv'
feature_list = '04-PeakList-PyMTSphingolipidProfile-Pos-2.csv'
skip_rows_featureList = 8
skip_rows_intensity = 10
skip_rows_coordinates = 8
image_dict_sm = getImages(parent_dir,file_intensity,file_coordinates,feature_list,
                       skip_rows_featureList,skip_rows_intensity,skip_rows_coordinates)



parent dir /mnt/data10/shared/yujie/LIPID_data/RawData/NA/A2/Met/230703Spheroids-6-A2-Laser70_left_NA_rawDATA_inner 
file_coordinates 230703Spheroids-6-A2-Laser70_left_NA_rawDATA_regionSpots.csv 
             top left&Bottom right (208.48829650879, 208.08517456055, 1568.48828125, 1548.0852050781) 
           pixel length in microns: (20.0, 20.0)

 feature list K17Metabolites-Raafat-FeatureList-adjustedglucose.csv 
 total number of m/z values: 64


  image = (image - np.min(image)) / (np.max(image) - np.min(image))


parent dir /mnt/data10/shared/yujie/LIPID_data/RawData/NA/A2/Cer/Updated-230622Spheroids-5-A2-Laser30_left_NA_rawDATA_inner 
file_coordinates 230622Spheroids-5-A2_left_NA_rawDATA_regionSpots.csv 
             top left&Bottom right (253.20205688477, 191.15405273438, 1513.2020263672, 1531.1540527344) 
           pixel length in microns: (20.0, 20.0)

 feature list 05-PeakList-PyMTSphingolipidProfile-Neg-3.csv 
 total number of m/z values: 46
parent dir /mnt/data10/shared/yujie/LIPID_data/RawData/NA/A2/SM/Updated-230616Spheroids-4-A2_left_NA_rawDATA_inner 
file_coordinates 230616Spheroids-4-A2_left_NA_rawDATA_regionSpots.csv 
             top left&Bottom right (225.70343017578, 197.89024353027, 1515.7033691406, 1527.8902587891) 
           pixel length in microns: (10.0, 10.0)

 feature list 04-PeakList-PyMTSphingolipidProfile-Pos-2.csv 
 total number of m/z values: 39


In [4]:
import pickle
target_image = next(iter(image_dict_cer.values()))
target_height, target_width = target_image.shape[:2]

image_dict_all = image_dict_cer.copy()
print('overlapped keys',set(image_dict_all.keys())&set(image_dict_sm.keys()))
for mz_name, image in image_dict_sm.items():
    resized_image = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR)
    image_dict_all[mz_name] = resized_image#overlapped molecules in the feature list
print('overlapped keys',set(image_dict_all.keys())&set(image_dict_met.keys()))
for mz_name, image in image_dict_met.items():
    resized_image = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR)
    image_dict_all[mz_name] = resized_image

with open('./image_dict_all_NA_A2.pkl', 'wb') as f:
    pickle.dump(image_dict_all, f)

overlapped keys set()
overlapped keys set()
