In [1]:
import bokeh
from bokeh.plotting import show

import flowkit as fk

bokeh.io.output_notebook()

In [2]:
# check version so users can verify they have the same version/API
fk.__version__

'1.2.3'

In [3]:
# help(fk.Sample)

In [4]:
fcs_path = './data_cytof/Tet2_194_VEH.fcs'

In [5]:
sample = fk.Sample(fcs_path)

In [6]:
sample

Sample(v3.0, Tet2_194_VEH.fcs, 71 channels, 14994 events)

In [7]:
df_events = sample.as_dataframe(source='raw')

In [8]:
df_events.head()

pnn,Time,Event_length,Y89Di,Pd102Di,Pd104Di,Pd105Di,Pd106Di,Pd108Di,Pd110Di,Cd111Di,...,Pt198Di,Pb208Di,Bi209Di,Center,Offset,Width,Residual,beadDist,bc_separation_dist,mahalanobis_dist
pns,Time,Event_length,89Y_CD45,102Pd,104Pd,105Pd,106Pd,108Pd,110Pd,111Cd_HLA-DR,...,198Pt_CD45,208Pb,209Bi_CD16,Center,Offset,Width,Residual,beadDist,bc_separation_dist,mahalanobis_dist
0,1685.29895,26.0,341.027374,671.08728,984.812683,934.105225,82.299583,32.618511,19.536291,3.382686,...,1.070791,0.0,0.0,869.389343,89.89122,70.802391,135.675247,86.621948,0.383767,3.733617
1,3874.73999,30.0,236.192307,215.467926,299.217926,315.343567,38.759453,41.688251,32.905548,0.568704,...,0.649648,23.595097,2.1475,1098.618408,85.144279,75.888794,117.579514,141.618637,0.298424,0.519812
2,4987.87793,27.0,513.242737,565.935974,712.255859,739.106506,50.505489,40.565819,38.075027,21.636871,...,0.0,0.0,2.326579,926.856628,76.30484,60.508003,79.655258,159.908218,0.440903,2.345677
3,5135.09082,19.0,434.569702,453.607147,677.94812,702.347778,46.099533,21.909271,15.151691,3.138445,...,0.0,0.0,0.0,608.7547,54.721352,29.800858,86.646614,111.293686,0.416754,2.8111
4,5714.817871,26.0,936.375183,718.643066,1048.463379,1053.404053,69.765152,45.399395,32.985199,132.26442,...,0.0,0.0,1.083686,858.364868,76.245644,59.434353,108.201691,170.788101,0.426315,2.13707


In [9]:
df_events.columns[20:30]

MultiIndex([('Nd143Di', '143Nd_CD127'),
            ('Nd144Di',  '144Nd_CD38'),
            ('Nd145Di',  '145Nd_CD1c'),
            ('Nd146Di',   '146Nd_IgD'),
            ('Sm147Di',  '147Sm_IL-6'),
            ('Nd148Di',  '148Nd_CD14'),
            ('Sm149Di',  '149Sm_CD25'),
            ('Nd150Di',       '150Nd'),
            ('Eu151Di', '151Eu_CD123'),
            ('Sm152Di', '152Sm_CD62L')],
           names=['pnn', 'pns'])

In [None]:
sample_from_df = fk.Sample(df_events, sample_id='my_sample_from_dataframe')

In [None]:
sample_from_df

In [None]:
np_events = sample.get_events(source='raw')
channel_labels = sample.pnn_labels

In [None]:
sample_from_np = fk.Sample(np_events, channel_labels=channel_labels, sample_id='my_sample_from_numpy')

In [None]:
sample.subsample_indices

In [None]:
len(sample.subsample_indices)

In [None]:
p = sample.plot_histogram('Cd114Di', source='raw')
show(p)

In [None]:
f = sample.plot_channel('Cd114Di', source='raw')
show(f)

In [None]:
# by default, plot_contour uses subsampled events for performance
p = sample.plot_contour('Cd114Di', 'Cd116Di', source='raw', fill=False, plot_events=False)

In [None]:
show(p)

In [None]:
# by default, plot_contour uses subsampled events for performance
p = sample.plot_contour('Cd114Di', 'Cd116Di', source='raw', fill=True, plot_events=False)
show(p)

In [None]:
p = sample.plot_contour('Cd114Di', 'Cd116Di', source='raw', plot_events=True)
show(p)

In [None]:
p = sample.plot_scatter(
    'Cd114Di', 'Cd116Di',
    source='raw', y_min=0., y_max=450, x_min=0., x_max=280, color_density=True
)

In [None]:
show(p)

In [None]:
help(fk.transforms.AsinhTransform)

In [10]:
xform = fk.transforms.AsinhTransform(param_t=20, param_m=5.0, param_a=0)

# LogicleTransform(param_t=1024, param_w=0.5, param_m=4.5, param_a=0)
sample.apply_transform(xform)

In FlowKit, when applying the AsinhTransform (hyperbolic arcsine transform), the parameters param_t, param_m, and param_a correspond to:

param_t: Top of scale — the maximum value of the transformed scale. In most flow cytometry data, this is often 262144 or similar (depends on the instrument's resolution and dynamic range). This parameter is sometimes referred to as the "data range" or "maximum channel value".

param_m: Median — this controls the number of decades the data is displayed across. It effectively determines the "scale width" or how compressed or expanded the data appears after transformation. It is often set to values like 4.5 or 5.

param_a: Additional arcsinh cofactor — controls the linearization around zero, shifting how the transformation behaves near low values. This helps with visualizing negative and small values more cleanly. Often set between 0 to 1, e.g., 0.2 or 0.5.

In [11]:
# For the scatter matrix, subsampling is usually a good idea since there are so many plots
spm = sample.plot_scatter_matrix(
    source='xform',
    channel_labels_or_numbers=['Cd114Di', 'Cd116Di', 'Nd144Di', 'Nd146Di'],
    color_density=True
)
show(spm)

In [12]:
import flowkit as fk
import numpy as np
import pandas as pd

# # Load the FCS file
# sample = fk.Sample("input_file.fcs")


In [14]:

# Extract events as DataFrame
# events_df = sample.get_events()
events_df = sample.as_dataframe(source='raw')

# Subsample randomly (e.g., 1000 cells)
subsampled_df = events_df.sample(n=1000, random_state=42)



In [17]:
subsampled_df

pnn,Time,Event_length,Y89Di,Pd102Di,Pd104Di,Pd105Di,Pd106Di,Pd108Di,Pd110Di,Cd111Di,...,Pt198Di,Pb208Di,Bi209Di,Center,Offset,Width,Residual,beadDist,bc_separation_dist,mahalanobis_dist
pns,Time,Event_length,89Y_CD45,102Pd,104Pd,105Pd,106Pd,108Pd,110Pd,111Cd_HLA-DR,...,198Pt_CD45,208Pb,209Bi_CD16,Center,Offset,Width,Residual,beadDist,bc_separation_dist,mahalanobis_dist
13409,8.235256e+06,28.0,579.454834,555.689819,886.415100,855.597900,40.799934,25.890133,30.348782,0.971897,...,0.725846,10.434646,0.000000,1382.483154,113.337296,96.657990,194.024384,86.502029,0.475728,3.315531
6472,4.303119e+06,29.0,500.191650,169.164444,247.036102,241.070175,22.743469,18.477108,21.978172,1.228370,...,0.000000,6.447477,0.997607,1229.580933,119.876724,101.862595,146.107651,87.171898,0.359473,1.112839
9967,6.402256e+06,23.0,257.317719,459.324219,772.646240,715.565491,43.902863,41.825493,19.522161,0.419662,...,0.000000,6.724018,0.887348,1003.174561,87.825394,61.992744,170.066315,83.424370,0.427778,2.626771
862,8.289456e+05,20.0,693.516235,676.385986,904.693481,908.912903,72.369583,13.973156,35.542976,0.000000,...,0.000000,0.000000,3.312334,759.568970,79.683449,47.299137,82.451477,154.654205,0.408566,7.086343
5967,3.993603e+06,25.0,301.670776,630.336792,962.359375,936.626587,79.404060,36.987846,29.871513,0.000000,...,0.000000,4.950260,0.000000,1001.018127,85.237625,63.476292,122.979164,136.804749,0.378804,2.158541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10692,6.810192e+06,20.0,191.541214,686.973206,926.827942,893.091431,42.573341,25.492825,19.753675,11.443378,...,0.000000,8.291533,5.449792,944.729248,95.791397,57.109482,110.824043,108.750542,0.506996,4.286443
13786,8.433629e+06,25.0,661.802673,609.821838,853.902832,840.024902,77.320343,36.472897,19.670523,4.314567,...,0.000000,3.210822,2.938845,1203.205322,86.947937,63.682121,145.218765,162.259247,0.377576,3.153117
13856,8.471432e+06,30.0,253.254059,426.303650,547.958557,529.645935,52.424454,21.057680,23.692648,2.206322,...,0.000000,7.113031,3.294798,1426.570068,151.798004,134.837051,230.919266,84.957695,0.382306,2.653640
455,4.917812e+05,21.0,344.791443,146.167892,200.373596,190.241409,29.199572,14.453162,14.912625,8.721877,...,0.000000,3.847662,0.422050,755.830811,92.916656,57.018860,63.106121,84.205444,0.290110,0.872189


In [20]:
# subsampled_sample = fk.Sample(
#     sample_id="cytof_subsampled",
#     data=subsampled_df,
#     metadata=sample.metadata
# )
sample_from_df = fk.Sample(subsampled_df, sample_id='subsampled')
# sample_from_np = fk.Sample(np_events, channel_labels=channel_labels, sample_id='my_sample_from_numpy')

In [23]:
sample_from_df

Sample(v3.1, subsampled, 71 channels, 1000 events)

In [28]:
sample_from_df.pnn_labels

['Time',
 'Event_length',
 'Y89Di',
 'Pd102Di',
 'Pd104Di',
 'Pd105Di',
 'Pd106Di',
 'Pd108Di',
 'Pd110Di',
 'Cd111Di',
 'Cd112Di',
 'Cd113Di',
 'Cd114Di',
 'Cd116Di',
 'I127Di',
 'Xe131Di',
 'Ba138Di',
 'Ce140Di',
 'Pr141Di',
 'Nd142Di',
 'Nd143Di',
 'Nd144Di',
 'Nd145Di',
 'Nd146Di',
 'Sm147Di',
 'Nd148Di',
 'Sm149Di',
 'Nd150Di',
 'Eu151Di',
 'Sm152Di',
 'Eu153Di',
 'Sm154Di',
 'Gd155Di',
 'Gd156Di',
 'Gd158Di',
 'Tb159Di',
 'Dy160Di',
 'Dy161Di',
 'Dy162Di',
 'Dy163Di',
 'Dy164Di',
 'Ho165Di',
 'Er166Di',
 'Er167Di',
 'Er168Di',
 'Tm169Di',
 'Er170Di',
 'Yb171Di',
 'Yb172Di',
 'Yb173Di',
 'Yb174Di',
 'Lu175Di',
 'Yb176Di',
 'Lu176Di',
 'BCKG190Di',
 'Os190Di',
 'Ir191Di',
 'Ir193Di',
 'Pt194Di',
 'Pt195Di',
 'Pt196Di',
 'Pt198Di',
 'Pb208Di',
 'Bi209Di',
 'Center',
 'Offset',
 'Width',
 'Residual',
 'beadDist',
 'bc_separation_dist',
 'mahalanobis_dist']

In [26]:
# https://flowkit.readthedocs.io/en/latest/sample.html
# Create a new Sample object from the subsampled data
# Note: you need to preserve metadata, so reuse original sample's metadata
# subsampled_sample = fk.Sample(
#     sample_id="subsampled",
#     data=subsampled_df,
#     sample_metadata=sample.sample_metadata,
#     spill_matrix=sample.spill_matrix
# )

# Export to a new FCS file
sample_from_df.export(filename='subsampled_fcs.fcs', source='raw')

In [None]:
sample.

In [1]:
# sample.metadata

In [29]:
sample_sub = fk.Sample('subsampled_fcs.fcs')

In [30]:
sample_sub

Sample(v3.1, subsampled_fcs.fcs, 71 channels, 1000 events)

In [31]:
df_sample_sub = sample_sub.as_dataframe(source='raw')

In [32]:
df_sample_sub.shape

(1000, 71)

In [33]:
df_sample_sub

pnn,Time,Event_length,Y89Di,Pd102Di,Pd104Di,Pd105Di,Pd106Di,Pd108Di,Pd110Di,Cd111Di,...,Pt198Di,Pb208Di,Bi209Di,Center,Offset,Width,Residual,beadDist,bc_separation_dist,mahalanobis_dist
pns,Time,Event_length,89Y_CD45,102Pd,104Pd,105Pd,106Pd,108Pd,110Pd,111Cd_HLA-DR,...,198Pt_CD45,208Pb,209Bi_CD16,Center,Offset,Width,Residual,beadDist,bc_separation_dist,mahalanobis_dist
0,8.235256e+06,28.0,579.454834,555.689819,886.415100,855.597900,40.799934,25.890133,30.348782,0.971897,...,0.725846,10.434646,0.000000,1382.483154,113.337296,96.657990,194.024384,86.502029,0.475728,3.315531
1,4.303119e+06,29.0,500.191650,169.164444,247.036102,241.070175,22.743469,18.477108,21.978172,1.228370,...,0.000000,6.447477,0.997607,1229.580933,119.876724,101.862595,146.107651,87.171898,0.359473,1.112839
2,6.402256e+06,23.0,257.317719,459.324219,772.646240,715.565491,43.902863,41.825493,19.522161,0.419662,...,0.000000,6.724018,0.887348,1003.174561,87.825394,61.992744,170.066315,83.424370,0.427778,2.626771
3,8.289456e+05,20.0,693.516235,676.385986,904.693481,908.912903,72.369583,13.973156,35.542976,0.000000,...,0.000000,0.000000,3.312334,759.568970,79.683449,47.299137,82.451477,154.654205,0.408566,7.086343
4,3.993603e+06,25.0,301.670776,630.336792,962.359375,936.626587,79.404060,36.987846,29.871513,0.000000,...,0.000000,4.950260,0.000000,1001.018127,85.237625,63.476292,122.979164,136.804749,0.378804,2.158541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,6.810192e+06,20.0,191.541214,686.973206,926.827942,893.091431,42.573341,25.492825,19.753675,11.443378,...,0.000000,8.291533,5.449792,944.729248,95.791397,57.109482,110.824043,108.750542,0.506996,4.286443
996,8.433629e+06,25.0,661.802673,609.821838,853.902832,840.024902,77.320343,36.472897,19.670523,4.314567,...,0.000000,3.210822,2.938845,1203.205322,86.947937,63.682121,145.218765,162.259247,0.377576,3.153117
997,8.471432e+06,30.0,253.254059,426.303650,547.958557,529.645935,52.424454,21.057680,23.692648,2.206322,...,0.000000,7.113031,3.294798,1426.570068,151.798004,134.837051,230.919266,84.957695,0.382306,2.653640
998,4.917812e+05,21.0,344.791443,146.167892,200.373596,190.241409,29.199572,14.453162,14.912625,8.721877,...,0.000000,3.847662,0.422050,755.830811,92.916656,57.018860,63.106121,84.205444,0.290110,0.872189
