In [1]:
import pickle
import scanpy as sc
import anndata
import pandas as pd
import numpy as np

In [2]:
def wta_to_AnnData(data_path, target_counts_file, probe_counts_file, feature_annotations_file,
                   sample_annotations_file, saving = 'AnnData', returning = False):

    target_counts_path = data_path + target_counts_file
    probe_counts_path = data_path + probe_counts_file
    feature_annotations_path = data_path + feature_annotations_file
    sample_annotations_path = data_path + sample_annotations_file
    
    target_counts = pd.read_table(target_counts_path, index_col=0)
    probe_counts = pd.read_table(probe_counts_path, index_col=0)
    feature_annotations = pd.read_table(feature_annotations_path, index_col=0)
    sample_annotations = pd.read_table(sample_annotations_path)
    
    sample_annotations.index = sample_annotations['SegmentDisplayName']
    sample_annotations = sample_annotations.loc[target_counts.columns,:]
    
    negative_probe_names = np.array(feature_annotations['ProbeID'].loc[feature_annotations['Negative']])
    negative_probe_counts = probe_counts.loc[negative_probe_names,:]
    negative_probe_counts = negative_probe_counts.loc[:,target_counts.columns]
    
    feature_annotations.index = feature_annotations['TargetName']
    feature_annotations = feature_annotations[~feature_annotations['Negative']]
    target_counts = target_counts.loc[feature_annotations.index,:]
    
    adata = sc.AnnData(X = np.array(target_counts).T)
    adata.var_names = target_counts.index
    adata.var = feature_annotations
    adata.obs = sample_annotations
    adata.obsm['negProbes'] = np.array(negative_probe_counts.T)
    
    if saving:
        pickle.dump(adata, open(data_path + saving, "wb" ))
    
    if returning:
        return adata

In [3]:
wta_to_AnnData(data_path = '/nfs/team283/aa16/data/Kidney-Nanostring/',
               target_counts_file = 'Kidney_Raw_TargetCountMatrix.txt',
                probe_counts_file = 'Kidney_Raw_BioProbeCountMatrix.txt',
                feature_annotations_file = 'Kidney_Feature_Annotations.txt',
                sample_annotations_file = 'Kidney_Sample_Annotations.txt',
              saving = 'Kidney_AnnData.p',
              returning = False)