From 29294fe33fc302ab4f030cae87a80c87d9b7089c Mon Sep 17 00:00:00 2001
From: Maxmilian Bertsch <max.bertsch@gmx.de>
Date: Mon, 15 May 2023 15:12:48 +0200
Subject: [PATCH 01/10] use bitmask to only read meaning full bits for integer
 values

---
 flowio/flowdata.py | 57 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 13 deletions(-)

diff --git a/flowio/flowdata.py b/flowio/flowdata.py
index 60b7ec2..6b6e2ee 100644
--- a/flowio/flowdata.py
+++ b/flowio/flowdata.py
@@ -4,6 +4,7 @@
 from warnings import warn
 import os
 import re
+import math
 from functools import reduce
 from .create_fcs import create_fcs
 from .exceptions import FCSParsingError, DataOffsetDiscrepancyError, MultipleDataSetsError
@@ -307,16 +308,19 @@ def __parse_data(self, offset, start, stop, text):
             order = '@'
             # from here on out we assume mode "l" (list)
 
-        bit_width = []
+        bit_width_by_channel = {}
+        max_range_by_channel = {}
         for i in range(1, int(text['par']) + 1):
-            bit_width.append(int(text['p%db' % i]))
+            bit_width_by_channel[i] = int(text['p%db' % i])
+            max_range_by_channel[i] = int(text['p%dr' % i])
 
         if data_type.lower() == 'i':
             data = self.__parse_int_data(
                 offset,
                 start,
                 stop,
-                bit_width,
+                bit_width_by_channel,
+                max_range_by_channel,
                 order
             )
         else:
@@ -368,42 +372,69 @@ def __calc_data_item_count(self, start, stop, data_type_size):
 
         return num_items, stop
 
-    def __parse_int_data(self, offset, start, stop, bit_width, order):
+    def __parse_int_data(self, offset, start, stop, bit_width_by_channel, 
+                         max_range_by_channel, order):
         """Parse out and return integer list data from FCS file"""
 
-        if reduce(and_, [item in [8, 16, 32] for item in bit_width]):
+        if reduce(and_, [item in [8, 16, 32] for item in bit_width_by_channel.values()]):
             # We have a uniform bit width for all parameters,
             # use the first value to determine the number of actual events
-            if len(set(bit_width)) == 1:
-                data_type_size = bit_width[0] / 8
+            if len(set(bit_width_by_channel.values())) == 1:
+                bit_width = list(bit_width_by_channel.values())[0]
+                data_type_size = bit_width / 8
                 num_items, stop = self.__calc_data_item_count(start, stop, data_type_size)
 
                 self._fh.seek(offset + start)
-                tmp = array.array(self.__format_integer(bit_width[0]))
+                tmp = array.array(self.__format_integer(bit_width))
                 tmp.fromfile(self._fh, int(num_items))
                 if order == '>':
                     tmp.byteswap()
-
+                # acording to the FCS standard the PnR value of Integer values
+                # determines how many bit of the max bit_width are actually used
+                # for the data
+                if any(2**bit_width_by_channel[c] > max_range_by_channel[c] for 
+                       c in bit_width_by_channel.keys()) :
+                    # for i in range(len(tmp)):
+                    #     value = tmp.pop(0)
+                    #     channel = i % len(bit_width_by_channel) + 1
+                    #     tmp.append(value % max_range_by_channel[channel])
+                    amount_data_points = int(num_items / len(max_range_by_channel))
+                    #create bit mask for extracting the right amount of bits
+                    bit_mask = array.array(self.__format_integer(bit_width), 
+                                           [mr -1 for mr in max_range_by_channel.values()]*amount_data_points)
+                    new_tmp = array.array(self.__format_integer(bit_width))
+                    new_tmp.frombytes(bytes(map(lambda a,b: a&b, tmp.tobytes(), bit_mask.tobytes())))
+                    tmp = new_tmp
             # parameter sizes are different
             # e.g. 8, 8, 16, 8, 32 ...
             else:
                 # can't use array for heterogeneous bit widths
-                tmp = self.__extract_var_length_int(bit_width, offset, order, start, stop)
+                tmp = self.__extract_var_length_int(bit_width_by_channel, max_range_by_channel,
+                                                    offset, order, start, stop)
 
         else:  # non standard bit width...  Does this happen?
             warn('Non-standard bit width for data segments')
             return None
         return tmp
 
-    def __extract_var_length_int(self, bit_width, offset, order, start, stop):
+    def __extract_var_length_int(self, bit_width_by_channel, max_range_by_channel, 
+                                 offset, order, start, stop):
         data_format = order
-        for cur_width in bit_width:
+        for cur_width in bit_width_by_channel.values():
             data_format += '%s' % self.__format_integer(cur_width)
 
         # array module doesn't have a function to heterogeneous bit widths,
         # so fall back to the slower unpack approach
         tuple_tmp = iter_unpack(data_format, self.__read_bytes(offset, start, stop))
-        tmp = [ti for t in tuple_tmp for ti in t]
+        if any(2**bit_width_by_channel[c] > max_range_by_channel[c] for 
+               c in bit_width_by_channel.keys()) :
+            tmp = []
+            for data_tuple in tuple_tmp:
+                for channel, max_range in max_range_by_channel.items():
+                    tmp.append(data_tuple[channel-1] % max_range)
+        else:
+            tmp = [ti for t in tuple_tmp for ti in t]
+        
         return tmp
 
     def __parse_non_int_data(self, offset, start, stop, data_type, order):

From 50caf6ad6877312b6c9e698c0458665b45281bd0 Mon Sep 17 00:00:00 2001
From: Maxmilian Bertsch <max.bertsch@gmx.de>
Date: Mon, 15 May 2023 15:13:01 +0200
Subject: [PATCH 02/10] add test for fcs3.0 data of lmd file

---
 flowio/tests/flowdata_lmd_tests.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/flowio/tests/flowdata_lmd_tests.py b/flowio/tests/flowdata_lmd_tests.py
index a1393e7..24c512a 100644
--- a/flowio/tests/flowdata_lmd_tests.py
+++ b/flowio/tests/flowdata_lmd_tests.py
@@ -1,6 +1,7 @@
 import unittest
 import warnings
-from flowio import FlowData
+import array
+from flowio import FlowData, read_multiple_data_sets
 from flowio.exceptions import FCSParsingError
 
 
@@ -8,10 +9,9 @@ class FlowDataLMDTestCase(unittest.TestCase):
     def setUp(self):
         with warnings.catch_warnings():
             warnings.simplefilter('ignore')
-            self.flow_data = FlowData(
+            self.flow_data, self.fcs3_data = read_multiple_data_sets(
                 'examples/fcs_files/coulter.lmd',
-                ignore_offset_error=True,
-                nextdata_offset=0
+                ignore_offset_error=True
             )
         
     def test_event_count(self):
@@ -26,3 +26,11 @@ def test_get_text(self):
     def test_fail_data_offset_error(self):
         with self.assertRaises(FCSParsingError):     
             FlowData('examples/fcs_files/coulter.lmd', nextdata_offset=0)
+
+    def test_right_integer_reading(self):
+        self.assertEqual(
+            self.fcs3_data.events[:24], 
+            array.array("I", [61056, 131840, 46, 324, 10309, 104, 11912, 0,
+                            257280, 378656, 139, 1728, 58688, 354, 58720, 0,
+                            164128, 305376, 159, 924, 29024, 208, 29728, 0])
+        )
\ No newline at end of file

From 9bb1f86f63ed3fe5ffe53bc8323e703a541f0566 Mon Sep 17 00:00:00 2001
From: Maxmilian Bertsch <max.bertsch@gmx.de>
Date: Mon, 15 May 2023 15:13:19 +0200
Subject: [PATCH 03/10] dirty fix for var int file test

---
 flowio/tests/flowdata_tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flowio/tests/flowdata_tests.py b/flowio/tests/flowdata_tests.py
index f22902e..a46aac5 100644
--- a/flowio/tests/flowdata_tests.py
+++ b/flowio/tests/flowdata_tests.py
@@ -65,10 +65,10 @@ def test_parse_var_int_data(self):
         event_values = [
             49135, 61373, 48575, 49135, 61373, 48575, 7523, 598, 49135, 61373,
             48575, 49135, 61373, 48575, 28182, 61200, 48575, 49135, 32445, 30797,
-            19057, 49135, 61373, 48575, 5969, 142482809,
+            19057, 49135, 61373, 48575, 5969, 7967621,
             61266, 48575, 49135, 20925, 61265, 48575, 27961, 25200, 61287, 48575, 9795,
             49135, 29117, 49135, 61373, 48575, 61228, 48575, 22, 21760, 49135,
-            20413, 49135, 23997, 19807, 3220139858
+            20413, 49135, 23997, 19807, 2984945
         ]
 
         fcs_file = "examples/fcs_files/variable_int_example.fcs"

From efd4727a171c9666d003195d39566298ab9fd46b Mon Sep 17 00:00:00 2001
From: whitews <whitews@gmail.com>
Date: Tue, 16 May 2023 17:35:07 -0400
Subject: [PATCH 04/10] bump version to beta patch v1.2.1b0

---
 flowio/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flowio/_version.py b/flowio/_version.py
index ad194f9..1f5be26 100644
--- a/flowio/_version.py
+++ b/flowio/_version.py
@@ -1,4 +1,4 @@
 """
 FlowIO version
 """
-__version__ = "1.2.0"
+__version__ = "1.2.1b0"

From e003e32d7abf21aa5d60723e71b521920351bce4 Mon Sep 17 00:00:00 2001
From: whitews <whitews@gmail.com>
Date: Tue, 16 May 2023 17:36:33 -0400
Subject: [PATCH 05/10] remove unused import

---
 flowio/flowdata.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flowio/flowdata.py b/flowio/flowdata.py
index 6b6e2ee..798096a 100644
--- a/flowio/flowdata.py
+++ b/flowio/flowdata.py
@@ -4,7 +4,6 @@
 from warnings import warn
 import os
 import re
-import math
 from functools import reduce
 from .create_fcs import create_fcs
 from .exceptions import FCSParsingError, DataOffsetDiscrepancyError, MultipleDataSetsError

From fca169eccf6ce22a39e198fdb8b26fb04cfaca7c Mon Sep 17 00:00:00 2001
From: whitews <whitews@gmail.com>
Date: Tue, 16 May 2023 17:45:35 -0400
Subject: [PATCH 06/10] update comments & minor reformatting

---
 flowio/flowdata.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/flowio/flowdata.py b/flowio/flowdata.py
index 798096a..b0b9679 100644
--- a/flowio/flowdata.py
+++ b/flowio/flowdata.py
@@ -383,6 +383,8 @@ def __parse_int_data(self, offset, start, stop, bit_width_by_channel,
                 data_type_size = bit_width / 8
                 num_items, stop = self.__calc_data_item_count(start, stop, data_type_size)
 
+                # Here, we're reading the initial data array, but some channel
+                # data may still need bit-masking correction using max range
                 self._fh.seek(offset + start)
                 tmp = array.array(self.__format_integer(bit_width))
                 tmp.fromfile(self._fh, int(num_items))
@@ -404,9 +406,9 @@ def __parse_int_data(self, offset, start, stop, bit_width_by_channel,
                     new_tmp = array.array(self.__format_integer(bit_width))
                     new_tmp.frombytes(bytes(map(lambda a,b: a&b, tmp.tobytes(), bit_mask.tobytes())))
                     tmp = new_tmp
-            # parameter sizes are different
-            # e.g. 8, 8, 16, 8, 32 ...
             else:
+                # parameter sizes are different
+                # e.g. 8, 8, 16, 8, 32 ...
                 # can't use array for heterogeneous bit widths
                 tmp = self.__extract_var_length_int(bit_width_by_channel, max_range_by_channel,
                                                     offset, order, start, stop)
@@ -414,6 +416,7 @@ def __parse_int_data(self, offset, start, stop, bit_width_by_channel,
         else:  # non standard bit width...  Does this happen?
             warn('Non-standard bit width for data segments')
             return None
+
         return tmp
 
     def __extract_var_length_int(self, bit_width_by_channel, max_range_by_channel, 

From 8b6721ac18476858f952ac731023e649178c866d Mon Sep 17 00:00:00 2001
From: whitews <whitews@gmail.com>
Date: Tue, 16 May 2023 17:48:11 -0400
Subject: [PATCH 07/10] refactor a few vars w/shorter names, remove commented
 code, comments & PEP8 changes

---
 flowio/flowdata.py | 61 +++++++++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 22 deletions(-)

diff --git a/flowio/flowdata.py b/flowio/flowdata.py
index b0b9679..5259e7d 100644
--- a/flowio/flowdata.py
+++ b/flowio/flowdata.py
@@ -371,15 +371,24 @@ def __calc_data_item_count(self, start, stop, data_type_size):
 
         return num_items, stop
 
-    def __parse_int_data(self, offset, start, stop, bit_width_by_channel, 
-                         max_range_by_channel, order):
+    def __parse_int_data(
+            self,
+            offset,
+            start,
+            stop,
+            bit_width_lut,
+            max_range_lut,
+            order
+    ):
         """Parse out and return integer list data from FCS file"""
 
-        if reduce(and_, [item in [8, 16, 32] for item in bit_width_by_channel.values()]):
-            # We have a uniform bit width for all parameters,
-            # use the first value to determine the number of actual events
-            if len(set(bit_width_by_channel.values())) == 1:
-                bit_width = list(bit_width_by_channel.values())[0]
+        if reduce(and_, [item in [8, 16, 32] for item in bit_width_lut.values()]):
+            # Determine if we have uniform bit width values for all parameters.
+            # If so, use array.array for much faster parsing
+            if len(set(bit_width_lut.values())) == 1:
+                # We do have a uniform bit width, grab the 1st value to
+                # determine the number of actual events
+                bit_width = list(bit_width_lut.values())[0]
                 data_type_size = bit_width / 8
                 num_items, stop = self.__calc_data_item_count(start, stop, data_type_size)
 
@@ -390,19 +399,21 @@ def __parse_int_data(self, offset, start, stop, bit_width_by_channel,
                 tmp.fromfile(self._fh, int(num_items))
                 if order == '>':
                     tmp.byteswap()
-                # acording to the FCS standard the PnR value of Integer values
-                # determines how many bit of the max bit_width are actually used
-                # for the data
-                if any(2**bit_width_by_channel[c] > max_range_by_channel[c] for 
-                       c in bit_width_by_channel.keys()) :
-                    # for i in range(len(tmp)):
-                    #     value = tmp.pop(0)
-                    #     channel = i % len(bit_width_by_channel) + 1
-                    #     tmp.append(value % max_range_by_channel[channel])
-                    amount_data_points = int(num_items / len(max_range_by_channel))
-                    #create bit mask for extracting the right amount of bits
-                    bit_mask = array.array(self.__format_integer(bit_width), 
-                                           [mr -1 for mr in max_range_by_channel.values()]*amount_data_points)
+
+                # If any bits higher shall be
+                # ignored using a bit mask. If the PnR value is not a power
+                # of 2, then the next power of 2 shall be used.
+                if any(2 ** bit_width_lut[c] > max_range_lut[c] for
+                       c in bit_width_lut.keys()) :
+
+                    amount_data_points = int(num_items / len(max_range_lut))
+                    
+                    # Create bit mask array matching length of our data array,
+                    # with values for every position being the max range value.
+                    bit_mask = array.array(
+                        self.__format_integer(bit_width),
+                        [mr - 1 for mr in max_range_lut.values()] * amount_data_points
+                    )
                     new_tmp = array.array(self.__format_integer(bit_width))
                     new_tmp.frombytes(bytes(map(lambda a,b: a&b, tmp.tobytes(), bit_mask.tobytes())))
                     tmp = new_tmp
@@ -410,8 +421,14 @@ def __parse_int_data(self, offset, start, stop, bit_width_by_channel,
                 # parameter sizes are different
                 # e.g. 8, 8, 16, 8, 32 ...
                 # can't use array for heterogeneous bit widths
-                tmp = self.__extract_var_length_int(bit_width_by_channel, max_range_by_channel,
-                                                    offset, order, start, stop)
+                tmp = self.__extract_var_length_int(
+                    bit_width_lut,
+                    max_range_lut,
+                    offset,
+                    order,
+                    start,
+                    stop
+                )
 
         else:  # non standard bit width...  Does this happen?
             warn('Non-standard bit width for data segments')

From 9d2754df8762dc6d8cb4cac7580856da4b63107d Mon Sep 17 00:00:00 2001
From: whitews <whitews@gmail.com>
Date: Tue, 16 May 2023 17:52:30 -0400
Subject: [PATCH 08/10] only do bit width/range collection for int data type,
 calc next power of 2 for PnR values

---
 flowio/flowdata.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/flowio/flowdata.py b/flowio/flowdata.py
index 5259e7d..c782900 100644
--- a/flowio/flowdata.py
+++ b/flowio/flowdata.py
@@ -16,6 +16,13 @@
     basestring = str
 
 
+def _next_power_of_2(x):
+    if x == 0:
+        return 1
+    else:
+        return 2 ** (x - 1).bit_length()
+
+
 class FlowData(object):
     """
     Object representing a Flow Cytometry Standard (FCS) file.
@@ -307,13 +314,21 @@ def __parse_data(self, offset, start, stop, text):
             order = '@'
             # from here on out we assume mode "l" (list)
 
-        bit_width_by_channel = {}
-        max_range_by_channel = {}
-        for i in range(1, int(text['par']) + 1):
-            bit_width_by_channel[i] = int(text['p%db' % i])
-            max_range_by_channel[i] = int(text['p%dr' % i])
-
         if data_type.lower() == 'i':
+            # For int data we need to check the bit width and range values.
+            # The PnR value specifies the max value for the channel. This
+            # value is exclusive, e.g. a value of 1024 means the highest
+            # integer value allowed is 1023. Integer data needs to be
+            # bit-masked according to this max range value.
+            bit_width_by_channel = {}
+            max_range_by_channel = {}
+            for i in range(1, int(text['par']) + 1):
+                bit_width_by_channel[i] = int(text['p%db' % i])
+
+                # Need to verify the value is a power of 2
+                tmp_max_range = int(text['p%dr' % i])
+                max_range_by_channel[i] = _next_power_of_2(tmp_max_range)
+
             data = self.__parse_int_data(
                 offset,
                 start,

From 89869be9308bb31c0c287b8b4c747aea032d100f Mon Sep 17 00:00:00 2001
From: whitews <whitews@gmail.com>
Date: Tue, 16 May 2023 17:55:08 -0400
Subject: [PATCH 09/10] update test for int data type w/correct bit-masked
 values & extensive comment explanation

---
 flowio/tests/flowdata_tests.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/flowio/tests/flowdata_tests.py b/flowio/tests/flowdata_tests.py
index a46aac5..97dffce 100644
--- a/flowio/tests/flowdata_tests.py
+++ b/flowio/tests/flowdata_tests.py
@@ -65,12 +65,30 @@ def test_parse_var_int_data(self):
         event_values = [
             49135, 61373, 48575, 49135, 61373, 48575, 7523, 598, 49135, 61373,
             48575, 49135, 61373, 48575, 28182, 61200, 48575, 49135, 32445, 30797,
-            19057, 49135, 61373, 48575, 5969, 7967621,
+            19057, 49135, 61373, 48575, 5969, 8265081,
             61266, 48575, 49135, 20925, 61265, 48575, 27961, 25200, 61287, 48575, 9795,
             49135, 29117, 49135, 61373, 48575, 61228, 48575, 22, 21760, 49135,
-            20413, 49135, 23997, 19807, 2984945
+            20413, 49135, 23997, 19807, 15691602
         ]
 
+        # To double-check our logic, let's use the 2 values from
+        # channel 26 where:
+        # PnB is 32
+        # PnR is 11209599
+        # The 1st value for chan 26, 32-bit, unmasked:  142482809
+        # The 2nd value for chan 26, 32-bit, unmasked: 3220139858
+        # The next power of 2 above 11209599 (PnR) is:   16777216
+        # Subtracting 1 from this power of 2, we can see what the
+        # new values should be from the binary:
+        #   1st value:
+        #   0000 1000 0111 1110 0001 1101 0111 1001     142482809 (orig 32-bit value)
+        #   0000 0000 1111 1111 1111 1111 1111 1111      16777215 (2 ** 24 - 1)
+        #   0000 0000 0111 1110 0001 1101 0111 1001       8265081 (new value)
+        #   2nd value:
+        #   1011 1111 1110 1111 0110 1111 0101 0010    3220139858 (orig 32-bit value)
+        #   0000 0000 1111 1111 1111 1111 1111 1111      16777215 (2 ** 24 - 1)
+        #   0000 0000 1110 1111 0110 1111 0101 0010      15691602 (new value)
+
         fcs_file = "examples/fcs_files/variable_int_example.fcs"
         sample = FlowData(fcs_file)
 

From e481694d43695edb14343ce9a1557f1dc15e7d05 Mon Sep 17 00:00:00 2001
From: whitews <whitews@gmail.com>
Date: Tue, 16 May 2023 17:56:02 -0400
Subject: [PATCH 10/10] close file before throwing error, better error message

---
 flowio/flowdata.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/flowio/flowdata.py b/flowio/flowdata.py
index c782900..ab9945d 100644
--- a/flowio/flowdata.py
+++ b/flowio/flowdata.py
@@ -108,7 +108,10 @@ def __init__(
         )
 
         if int(self.text.get("nextdata", "0")) != 0 and nextdata_offset is None:
-            raise MultipleDataSetsError()
+            self._fh.close()
+            raise MultipleDataSetsError(
+                "%s contains multiple data sets, use read_multiple_data_sets function" % self.name
+            )
 
         self.channel_count = int(self.text['par'])
         self.event_count = int(self.text['tot'])