In [28]:
"""
Data parsers for files generated from different types of assays:
- ELISA (Enzyme-linked immunosorbent assay):
    -> data files are generated by a microplate reader from 96-well plates
- Softmax:
    -> another microplate reader for ELISA assays that allows for more extensive 
       data analysis for 96 and 384-well plates
- FACS (Fluorescence-activated cell sorting):
    -> data files are generated by different FACS machines and preprocessed with FlowJo. 
    -> the output data files are parsed
- Biacore/Biacore4000
    -> a molecular interaction analysis system for rapid screening and characterization.
    -> the output plate data is parsed
- MSD:
    -> Electrochemiluminescence detection system
    -> the output plate data is parsed

Some of the machine-generated data is sometimes preprocessed with a specific software, i.e. FlowJo for FACS. 
"""

from StringIO import StringIO
import re
import os
import csv
import pprint

# init pprint
pp = pprint.PrettyPrinter(indent=2)

In [29]:
def straight_elisa_parse_file(data_file):
    """ Parsing Straight ELISA files. Used for a single and multiple plates in one file.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file.seek(0)
    data_file = StringIO(data_file.read().replace('\r', '\n'))
    plate_pattern = re.compile(r"^Plate:\t([^\t]+)\t")
    values_pattern = re.compile(r"^\t[^\t()A-Za-z]*((\t\d+(\.\d+)?){%d})\s*$" % 12)
    plates = {}
    for line in data_file:
        plate_match = plate_pattern.match(line)
        if plate_match:
            # Reset count
            last_position = 1
            plate_name = plate_match.group(1).strip()
            plates[plate_name] = []
        else:
            values_match = values_pattern.match(line)
            if values_match:
                values = values_match.group(1).strip().split("\t")
                for position, value in zip( range(last_position, last_position + len(values)), values):
                    plates[plate_name].append(float(value))
                last_position += len(values)
    return plates

# test a single 96 ELISA plate
#file_name = 'Data/1_straight_elisa.txt'
# test multiple 96 ELISA plates | NOTE: make sure to test with an original file (not splitted) 
file_name = 'Data/9_splitted_384.txt'
with open(file_name) as data_file: 
    plates = straight_elisa_parse_file(data_file)
    for antigen, values in plates.iteritems():
        print antigen, "=>", values
        print "==="*30

A3792.Ag25-01 => [0.0468, 0.0438, 0.0449, 0.0439, 0.0439, 0.0467, 0.0439, 0.0433, 0.0446, 0.0445, 0.0441, 0.0457, 0.0449, 0.044, 0.0455, 0.0442, 0.0522, 0.045, 0.0442, 0.0441, 0.0539, 0.0459, 0.0463, 0.0446, 0.0452, 0.0459, 0.045, 0.0464, 0.0536, 0.0452, 0.0445, 0.0447, 0.0532, 0.0455, 0.0446, 0.0458, 0.0454, 0.0443, 0.0444, 0.0442, 0.0442, 0.0448, 0.0442, 0.0441, 0.0536, 0.0442, 0.0451, 0.0463, 0.0437, 0.0432, 0.0449, 0.0432, 0.0449, 0.0426, 0.0442, 0.0436, 0.054, 0.0437, 0.0446, 0.0444, 0.0472, 0.0477, 0.0443, 0.0439, 0.0461, 0.061, 0.0442, 0.0443, 0.0536, 0.0433, 0.0446, 0.047, 0.0464, 0.0454, 0.0449, 0.0441, 0.053, 0.0457, 0.0431, 0.0444, 0.0476, 0.0437, 0.0451, 0.0446, 0.0477, 0.0465, 0.0458, 0.0462, 0.0537, 0.0454, 0.0458, 0.0573, 0.0504, 0.0453, 0.0477, 0.0447]
A3792.Ag25-02 => [0.0439, 0.0448, 0.048, 0.0448, 0.0441, 0.0457, 0.0531, 0.0445, 0.0461, 0.0446, 0.0447, 0.0454, 0.0444, 0.0468, 0.0451, 0.0532, 0.0455, 0.0449, 0.0456, 0.0466, 0.0455, 0.0446, 0.0447, 0.0453, 0.0486, 0.04

In [30]:
def softmax_parse_file(data_file):
    """ Parsing 384 ELISA files. Used for a single and multiple plates in one file.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file = StringIO(data_file.read().replace('\r', '\n'))
    cols = 24
    plate_pattern = re.compile(r"^Plate:\t([^\t]+)\t")
    values_pattern = re.compile(r"^\t[^\t()A-Za-z]*((\t\d+(\.\d+)?){%d})\s*$" % cols)
    plate_names = []
    col_headers = '123456789101112131415161718192021222324' # 1-24 numeric col headers
    plates = {}
    for line in data_file:
        plate_match = plate_pattern.match(line)
        value_match = values_pattern.match(line)
        if plate_match:
            last_position = 1
            plate_name = plate_match.group(1).strip()
            plate_names.append(plate_name)
        elif value_match:
            values = value_match.group(1).strip().split("\t")
            current_plate_name = plate_names[-1]
            plates.setdefault(current_plate_name, [])
            for value in values:
                plates[current_plate_name].append(float(value))
    return plates

# test a single 384 ELISA plate
#file_name = 'Data/2_elisa_384.txt'
file_name = 'Data/2a_multiple_elisa_384.txt'
#file_name = 'Data/5_elisa_384_no_temp.txt'
with open(file_name) as data_file:
    plates = softmax_parse_file(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "==="*30

PTH cross screen S3210-01 and 02 => [0.0548, 0.0511, 0.0546, 0.0551, 0.117, 0.0568, 0.1388, 0.0957, 0.3267, 0.2228, 0.1439, 0.0761, 0.2641, 0.13, 0.0924, 0.1247, 0.1051, 0.0799, 0.1259, 0.1028, 0.453, 0.0716, 0.1445, 0.0636, 0.0516, 0.0488, 0.0522, 0.0523, 0.0984, 0.0537, 0.1241, 0.0803, 0.2265, 0.2136, 0.1365, 0.0668, 0.2241, 0.1297, 0.0914, 0.0818, 0.1106, 0.0853, 0.1208, 0.088, 0.4499, 0.0682, 0.0994, 0.0715, 0.1326, 0.0526, 0.0857, 0.0521, 0.0638, 0.0511, 0.0686, 0.0535, 0.0647, 0.0538, 0.1989, 0.0538, 0.1713, 0.0594, 0.0886, 0.0598, 0.1646, 0.0603, 0.099, 0.0647, 0.0974, 0.0668, 0.2022, 0.0689, 0.1276, 0.052, 0.0883, 0.0518, 0.063, 0.0521, 0.0666, 0.0522, 0.0647, 0.0533, 0.2082, 0.0552, 0.1865, 0.0617, 0.0931, 0.0627, 0.1534, 0.0617, 0.1001, 0.063, 0.0948, 0.0657, 0.2291, 0.0712, 0.0636, 0.0508, 0.0617, 0.0525, 0.0571, 0.052, 0.0564, 0.0525, 0.0945, 0.0537, 0.3076, 0.0547, 0.0759, 0.0595, 0.0748, 0.0577, 0.098, 0.0633, 0.1012, 0.0655, 0.096, 0.0673, 0.0724, 0.0697, 0.0635, 0.0511,

In [31]:
def get_matrix_pos(col, row):
    """ From col and row show position in the matrix. """
    return col + 24 * (row - 1)

def split_4_quadrants(data_file):
    """ Split 384 file into 4 plates represented by each quadrant: 
            UL (Upper Left) | UR (Upper Right)
            LL (Lower Left) | LR (Lower Right)
    Args:
        data_file: ELISA 384-well plate
    Returns:
        An ordered dictionary of quadrant_platename_1: {pos_1: value, ...},
                                 quadrant_platename_2: {pos_1: value, ...}
    """
    plates = softmax_parse_file(data_file)
    quadrants = {'UL': {'x': [1, 12], 'y': [1, 8]},
                 'UR': {'x': [13, 24], 'y': [1, 8]},
                 'LL': {'x': [1, 12], 'y': [9, 16]},
                 'LR': {'x': [13, 24], 'y': [9, 16]}
                 }
    quadrant_plates = {}
    for plate, values in plates.iteritems():
        for quadrant, coordinates in quadrants.iteritems():
            # plate name is: "quadrant_plate"
            plate_name = "{0}_{1}".format(quadrant, plate)
            # get the list of predefined coordinates on x and y axis
            cols_coordinates = coordinates['x']
            rows_coordinates = coordinates['y']
            quadrant_plates.setdefault(plate_name, [])
            # walk through coordinates, get the position on the matrix, and show the correspoinding value
            for row in range(rows_coordinates[0], rows_coordinates[1] + 1):
                for col in range(cols_coordinates[0], cols_coordinates[1] + 1):
                    pos = get_matrix_pos(col, row)
                    quadrant_plates[plate_name].append(values[pos-1])
    return quadrant_plates

# test a single 384 ELISA - splitting into 4 quadrants
file_name = 'Data/2_elisa_384.txt'
with open(file_name) as data_file: 
    plates = split_4_quadrants(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "==="*30

UR_PTH cross screen S3210-01 and 02 => [0.2641, 0.13, 0.0924, 0.1247, 0.1051, 0.0799, 0.1259, 0.1028, 0.453, 0.0716, 0.1445, 0.0636, 0.2241, 0.1297, 0.0914, 0.0818, 0.1106, 0.0853, 0.1208, 0.088, 0.4499, 0.0682, 0.0994, 0.0715, 0.1713, 0.0594, 0.0886, 0.0598, 0.1646, 0.0603, 0.099, 0.0647, 0.0974, 0.0668, 0.2022, 0.0689, 0.1865, 0.0617, 0.0931, 0.0627, 0.1534, 0.0617, 0.1001, 0.063, 0.0948, 0.0657, 0.2291, 0.0712, 0.0759, 0.0595, 0.0748, 0.0577, 0.098, 0.0633, 0.1012, 0.0655, 0.096, 0.0673, 0.0724, 0.0697, 0.0756, 0.0609, 0.0758, 0.0571, 0.1073, 0.0634, 0.0935, 0.0653, 0.0943, 0.0659, 0.0734, 0.0695, 0.5475, 0.0613, 0.0908, 0.0588, 0.2709, 0.0621, 0.1356, 0.0621, 0.0927, 0.0634, 0.2799, 0.0655, 0.4668, 0.0698, 0.0874, 0.0584, 0.2609, 0.0629, 0.1374, 0.0613, 0.0847, 0.0637, 0.307, 0.0683]
LL_PTH cross screen S3210-01 and 02 => [0.083, 0.0516, 0.0504, 0.0519, 0.0531, 0.0517, 0.0736, 0.0519, 0.0553, 0.0527, 0.0763, 0.0558, 0.0844, 0.0494, 0.051, 0.0502, 0.0537, 0.0527, 0.0747, 0.053, 0.05

In [32]:
def prepare_default_dict(plates):
    """ Quadrants are used to separate plate names; user-defined plate names will be used in the future.
    Args:
        plates: a list of plate names being parsed
    """
    quadrants = ['UL', 'UR', 'LL', 'LR']
    quadrant_plates = {}
    for plate in plates:
        for quadrant in quadrants:
            plate_name = "{0}_{1}".format(quadrant, plate)
            quadrant_plates.setdefault(plate_name, [])
    return quadrant_plates

def get_matrix(rows, cols):
    """
    Args:
        rows: a number of rows in the matrix
        cols: a number of cols in the matrix
    Return:
        A matrix for rows x cols and take into account offset of 1.
    """
    return [(i, j) for i in range(1, cols + 1) for j in range(1, rows + 1)]

def update_pos_value(plate_name, quadrant, pos, values, quadrant_plates):
    """ Add pos:value to a plate.
    """
    plate_name = "{0}_{1}".format(quadrant, plate_name)
    return quadrant_plates[plate_name].append(values[pos-1])

def split_96_quadrants(data_file):
    """ Split 384 plate into 96 quadrant in the following format:
          1 2 3 4 ...
        1 A B A B ...
        2 C D C D ...
        .
        .
        where A == Plate 1, B == Plate 2, C == Plate 3, D == Plate 4
    """
    # get the plates
    plates = softmax_parse_file(data_file)
    # generate default dict
    quadrant_plates = prepare_default_dict(plates.keys())
    # generate the matrix for the 384 plates
    matrix = get_matrix(24, 16)
    # add the approapiate data to each plate
    for plate, values in plates.iteritems():
        for pos in matrix:
            pos_y = pos[0]
            pos_x = pos[1]
            # algorithm to determine which data is related to which plate
            # based on (x, y) coordinates in the 384 plate matrix
            if pos_x % 2 != 0 and pos_y % 2 != 0:
                pos = get_matrix_pos(pos_x, pos_y)
                update_pos_value(plate, 'UL', pos, values, quadrant_plates)
            elif pos_x % 2 != 0 and pos_y % 2 == 0:
                pos = get_matrix_pos(pos_x, pos_y)
                update_pos_value(plate, 'UR', pos, values, quadrant_plates)
            elif pos_x % 2 == 0 and pos_y % 2 != 0:
                pos = get_matrix_pos(pos_x, pos_y)
                update_pos_value(plate, 'LL', pos, values, quadrant_plates)
            elif pos_x % 2 == 0 and pos_y % 2 == 0:
                pos = get_matrix_pos(pos_x, pos_y)
                update_pos_value(plate, 'LR', pos, values, quadrant_plates)

    return quadrant_plates

file_name = 'Data/2_elisa_384.txt'
with open(file_name) as data_file:
    plates = split_96_quadrants(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "***"*10

UR_PTH cross screen S3210-01 and 02 => [0.0516, 0.0522, 0.0984, 0.1241, 0.2265, 0.1365, 0.2241, 0.0914, 0.1106, 0.1208, 0.4499, 0.0994, 0.1276, 0.0883, 0.063, 0.0666, 0.0647, 0.2082, 0.1865, 0.0931, 0.1534, 0.1001, 0.0948, 0.2291, 0.0635, 0.0547, 0.0562, 0.0555, 0.0789, 0.4249, 0.0756, 0.0758, 0.1073, 0.0935, 0.0943, 0.0734, 0.0561, 0.0808, 0.0578, 0.1509, 0.0708, 0.0817, 0.4668, 0.0874, 0.2609, 0.1374, 0.0847, 0.307, 0.0844, 0.051, 0.0537, 0.0747, 0.056, 0.0611, 0.1386, 0.3768, 0.1061, 0.0711, 0.0731, 0.0983, 0.0555, 0.0555, 0.0831, 0.057, 0.2429, 0.1296, 0.0632, 0.0668, 0.113, 0.1095, 0.1374, 0.1444, 0.0777, 0.0563, 0.8789, 0.1444, 0.0712, 0.0807, 0.0754, 0.0782, 0.0673, 0.0682, 0.0788, 0.0818, 0.0598, 0.0804, 0.149, 0.1509, 0.1851, 0.0844, 0.1712, 0.0914, 0.1033, 0.1804, 0.0797, 0.1054]
******************************
LL_PTH cross screen S3210-01 and 02 => [0.0511, 0.0551, 0.0568, 0.0957, 0.2228, 0.0761, 0.13, 0.1247, 0.0799, 0.1028, 0.0716, 0.0636, 0.0526, 0.0521, 0.0511, 0.0535, 0.

In [33]:
FACS_PATTERN = re.compile(r'[\w\d_-]+_[A-Z]\d+_(?P<well>[A-Z]\d{2})')
ANTIGEN_PATTERN = re.compile(r'(Ag\d+)')
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"


def well_name_position(well_name, columns=12):
    '''Convert a well name (A01-K12) or (1-96) to its position (1-96)
    '''
    match = re.match( r"^(?P<row>[A-W])?(?P<column>\d{1,2})$", well_name.upper())
    if match:
        # If there is an alphanumeric row position
        if match.group("row"):
            row = ALPHABET.index(match.group("row"))
            column = int(match.group("column"))
            return (row * columns) + column
        # Otherwise the only position is an absolute one
        else:
            return int(match.group("column"))
    else:
        raise ValueError("Invalid well_name: %s" % str(well_name))


def antigen_header(antigen_header):
    """ Return antigen name
    """
    match = ANTIGEN_PATTERN.search(antigen_header)
    return match.group(1) if match else antigen_header


def facs_parse_file(data):
    """ Parsing ACCURI and FLOWJO FACS files.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file.seek(0)
    filename = os.path.split(data_file.name)[1]
    data = data_file.read().replace('\r', '\n')
    reader = csv.reader(StringIO(data), dialect=csv.Sniffer().sniff(data))
    rows = [ row for row in reader if row ]
    default_name = os.path.splitext(filename)[0]
    antigen_names = [antigen_header(c) for c in rows[0][1:]]
    plates = {}
    for row in rows[1:]:
        match = FACS_PATTERN.search(row[0])
        if match:
            well = match.groupdict()['well']
            position = well_name_position(well)
            for column, ag_name in zip(row[1:], antigen_names):
                plate_name = match.groupdict().get('plate', default_name)
                full_plate_name = "{0} - {1}".format(plate_name, ag_name)
                plates.setdefault(full_plate_name, []).append(float(column))
    return plates

# file_name = 'Data/6_facs_96.txt'
file_name = 'Data/7_facs_384.txt'
with open(file_name) as data_file:
    plates = facs_parse_file(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "==="*30

7_facs_384 - Ratio => [2.05, 2.05, 283.0, 392.0, 1.36, 1.43, 2.06, 8.77, 1.66, 0.58, 1.16, 1.62, 1.19, 1.97, 1.67, 2.94, 1.2, 1.17, 1.96, 1.33, 1.29, 1.27, 1.55, 1.32, 1.88, 2.25, 282.0, 246.0, 1.55, 1.21, 1.32, 2.14, 1.69, 1.47, 1.2, 1.16, 1.36, 1.5, 1.45, 2.04, 1.65, 1.08, 1.23, 1.38, 1.42, 1.44, 1.34, 1.09, 1.52, 1.22, 1.1, 1.47, 1.43, 1.54, 1.38, 1.41, 2.87, 1.18, 1.78, 3.03, 1.37, 1.35, 2.0, 1.22, 1.46, 1.33, 1.46, 1.34, 1.28, 2.03, 1.37, 1.58, 1.82, 1.3, 1.34, 1.44, 1.39, 1.18, 1.32, 1.24, 1.04, 1.39, 1.35, 1.72, 1.76, 2.77, 1.16, 2.57, 1.31, 1.26, 0.68, 1.14, 1.14, 1.43, 1.4, 1.54, 1.52, 2.98, 4.57, 14.3, 1.83, 0.79, 1.32, 1.32, 2.31, 3.09, 3.0, 1.94, 1.5, 1.29, 1.37, 1.25, 1.23, 1.34, 0.92, 1.62, 1.78, 1.07, 2.6, 2.0, 1.67, 3.14, 0.71, 1.6, 1.06, 1.1, 0.89, 1.49, 1.17, 1.17, 1.53, 1.29, 2.77, 2.42, 1.39, 1.38, 1.99, 2.04, 0.85, 1.27, 1.27, 0.93, 1.89, 1.34, 1.58, 1.25, 1.16, 0.67, 1.33, 1.35, 1.36, 1.62, 1.36, 1.72, 1.29, 2.78, 1.65, 3.08, 1.79, 1.78, 1.37, 1.19, 1.45, 1.24, 1.

In [34]:
def biacore_parse_file(data_file):
    """ Parsing Biacore files.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file.seek(0)
    data = data_file.read().replace('\r', '\n')
    reader = csv.reader(StringIO(data), dialect=csv.Sniffer().sniff(data))
    rows = [row for row in reader if row]
    plates = {}
    for row in rows:
        if len(row) != 3:
            raise ValueError("Every row should have 3 columns")
        name = row[0].strip()
        position = well_name_position(row[1])
        value = float(row[2])
        plates.setdefault(name, []).append(value)
    return plates


def biacore4000_parse_file(data_file):
    """ Parsing Biacore4000 files.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file.seek(0)
    data = data_file.read().replace('\r', '\n')
    reader = csv.reader(StringIO(data), dialect=csv.Sniffer().sniff(data))
    rows = [row for row in reader if row]
    plates = {}
    # skip the first line
    for row in rows[1:]:
        sample_name = row[7]
        antigen = row[5]
        value = row[3]
        match = re.match(r'^(.+)_(\w\d{2})$', sample_name)
        if match:
            plate_name = '{0} - {1}'.format(match.group(1), antigen)
            plates.setdefault(plate_name, []).append(float(value))
    return plates


print "Biacore: " + "==="*30
#file_name = 'Data/3_biacore.csv'
file_name_biacore = 'Data/3a_biacore_multiple.csv'
with open(file_name_biacore) as data_file:
    plates = biacore_parse_file(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "***"*10
    print "\n\n"


print "Biacore4000: " + "==="*30

file_name_biacore_4000 = 'Data/4_biacore_4000.txt'
with open(file_name_biacore_4000) as data_file:
    plates = biacore4000_parse_file(data_file)
    for plate_name, values in plates.iteritems():
        print plate_name, "=>", values
        print "***"*10
    
    

260.B1.8B2.Ag112-02 => [12.8, 24.9, 69.2, 30.3, 103.3, 15.7, 12.4, 56.2, 14.5, 44.0, 46.5, 76.3, 60.0, 31.7, 23.3, 33.0, 14.3, 15.8, 12.7, 59.5, 12.9, 39.8, 10.7, 52.3]
******************************
260.B1.8B2.Ag104-01 => [7.7, 8.0, 470.5, 856.9, 871.9, 818.7, 716.5, 514.9, 432.4, 594.8, 587.4, 938.0, 245.0, 428.5, 587.7, 226.9, 195.9, 400.3, 329.1, 452.0, 436.3, 449.8, 739.6, 827.5, 543.9, 269.4, 147.1, 125.2, 412.8, 258.8]
******************************



428_B1_1_01 - Ag124-Fc => [-3.3, -5.0, -0.1, -5.3, -3.0, -3.9, -1.3, -4.7, -3.9, -2.2, -2.8, -4.3, -2.5, -2.5, -1.1, -3.2, -1.3, -3.1, -0.6, -3.9, -1.6, 0.8, -1.0, -2.9, 26.3, -2.5, 25.4, -2.4, -5.6, -1.6, -4.4, 30.2, -3.3, -2.2, -0.6, -7.0, -1.3, -0.1, -0.7, -3.3, 26.3, -3.6, -0.9, -0.7, -4.6, -2.7, 1.7, 0.1, -2.7, 1.4, 0.8, -1.2, -3.3, -2.6, -1.2, -1.4, -2.0, 4.2, 0.7, 18.6, -1.2, -3.3, 13.0, -3.5, 0.9, -2.2, 1.2, -1.3, 12.5, -0.6, 6.6, -1.5, -2.6, -1.2, -1.0, -0.9, -2.4, -1.4, -0.6, -1.2, -1.6, 2.9, 18.7, -0.7, -2.6, 2.9, -1.1,

In [35]:
def msd_parse_file(data_file):
    """ Parsing MSD files.
    Args:
        data_file: file to Parse
    Returns:
        plates: a dictionary of plate, position, values
    """
    data_file = StringIO(data_file.read().replace('\r', '\n'))
    plate_pattern = re.compile(r"^Plate #\s+:\s+([^\s]+)\s*$")
    values_pattern = re.compile(r"^[A-Z]+((\s+-?\d+){%d})\s*$" % 12)
    plate_names = []
    plates = {}
    for line in data_file:
        plate_match = plate_pattern.match(line)
        if plate_match:
            plate_name = plate_match.group(1)
            plate_names.append(plate_name)
        else:
            values_match = values_pattern.match(line)
            if values_match:
                values = values_match.group(1).strip().split()
                current_plate_name = plate_names[-1]
                plates.setdefault(current_plate_name, [])
                for value in values:
                    plates[current_plate_name].append(float(value))
    return plates

# file_name = 'Data/8_msd_single_plate.txt'
file_name = 'Data/8a_msd_multiple_plates.txt'
with open(file_name) as data_file:
    plates = msd_parse_file(data_file)
    for plate, values in plates.iteritems():
        print plate, " : ", len(values)
        print values

test-plate-1  :  96
[46.0, 41.0, 44.0, 58.0, 39.0, 49.0, 41.0, 36.0, 42.0, 40.0, 43.0, 34.0, 32.0, 38.0, 36.0, 27.0, 34.0, 37.0, 40.0, 29.0, 263.0, 41.0, 85.0, 63.0, 51.0, 68.0, 52.0, 81.0, 69.0, 57.0, 48.0, 33.0, 93.0, 97.0, 38.0, 79.0, 936.0, 138.0, 224.0, 42.0, 123.0, 46.0, 216.0, 42.0, 227.0, 80.0, 52.0, 37.0, 62.0, 291.0, 42.0, 46.0, 53.0, 36.0, 35754.0, 37.0, 32.0, 37.0, 40.0, 34.0, -2.0, 3.0, 2.0, -3.0, 0.0, -2.0, 4.0, -7.0, -7.0, 1.0, -3.0, -8.0, -5.0, -3.0, -2.0, -2.0, -7.0, -1.0, -3.0, -7.0, -9.0, -6.0, -7.0, -10.0, 1.0, -2.0, 0.0, 1.0, -6.0, -2.0, -2.0, 2.0, -4.0, -5.0, 1.0, -4.0]
test-plate-2  :  96
[52.0, 100.0, 52.0, 81.0, 57.0, 79.0, 47.0, 49.0, 43.0, 39.0, 44.0, 42.0, 45.0, 48.0, 49.0, 70.0, 45.0, 44.0, 48.0, 46.0, 201.0, 39.0, 71.0, 60.0, 64.0, 67.0, 64.0, 78.0, 69.0, 55.0, 56.0, 46.0, 79.0, 69.0, 43.0, 59.0, 774.0, 94.0, 90.0, 48.0, 76.0, 51.0, 93.0, 49.0, 83.0, 63.0, 54.0, 42.0, 59.0, 100.0, 42.0, 50.0, 46.0, 38.0, 70.0, 42.0, 38.0, 41.0, 40.0, 43.0, 5.0, 0.0, 9.0, 7