<a href="https://colab.research.google.com/github/x380kkm/APTPosEasySlicer/blob/main/APPosEasySlicer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import, read

In [1]:
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter
import plotly.graph_objs as go
import plotly.express as px


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Read .pos file

**Put posfile into google drive**

**Put posfile path here**

In [None]:
#e.g.: posfile ='/content/drive/MyDrive/dspacing/*****.POS'
posfile =

Pos file reader

In [None]:
file_name=posfile
print(f'pos file {file_name} is being read...')

with open(file_name, 'rb') as f:
    #get file size and bring file pointer back
    f.seek(0, 2)
    file_size = f.tell()
    f.seek(0)

    #initial nb set zero
    float_size = 4
    total_floats = file_size // float_size
    nb = total_floats // 4
    flo = np.zeros((4, nb), dtype=np.float32)

    #read file, using tqdm show process
    update_step = 10000
    num_updates = nb // update_step + 1
    with tqdm(total=nb, desc='Reading file', unit='group', mininterval=1.0) as pbar:
        for i in range(num_updates):
            start = i * update_step
            end = min((i + 1) * update_step, nb)
            data = np.fromfile(f, dtype='>f4', count=4*(end-start)).reshape(end-start, 4)
            flo[:, start:end] = data.T
            pbar.update(end - start)

df = pd.DataFrame(flo.T, columns=['x', 'y', 'z', 'm'])
print('OK, file read, variables created.')
print('DataFrame created:')


Data info

In [None]:
print("*********************************************************")
print("data info")
print(df.info())
print("*********************************************************")
print("data examples")
df

# slice data by z

**If you want to use bins to slice, you can set the count of bins here.**

Each bin have same number of ions

Also you can check Z-axis infos in the figure

In [None]:
bins = 20


step = 10
total_samples = len(df)

bin_edges = np.linspace(df['z'].min(), df['z'].max(), bins+1)

bin_counts, _ = np.histogram(df['z'], bins=bin_edges)
bin_percentages = (bin_counts / total_samples) * 100
cumulative_percentages = np.cumsum(bin_percentages)


sorted_df = df.sort_values(by='z')
bin_means = [sorted_df[(sorted_df['z'] > bin_edges[i]) & (sorted_df['z'] <= bin_edges[i+1])]['z'].mean() for i in range(len(bin_edges)-1)]


plt.figure(figsize=(14, 8))

ax1 = plt.gca()

ax1.plot(cumulative_percentages, bin_means, marker='o', linestyle='-', color='b', label='Mean Z value per bin')


xticks = np.linspace(0, 100, bins//step )
ax1.set_xticks(xticks)
ax1.set_xticklabels(np.round(xticks, 2), rotation=90)

bin_steps = np.arange(1, bins+1, step)


ax1.set_xlabel('Cumulative Ion Percentage')
ax1.set_ylabel('Mean Z value')
plt.title('Mean Z value across different cumulative ion percentage bins')
ax1.grid(True)
ax1.legend(loc='upper right')

plt.show()

sub_size = len(sorted_df) // bins
remainder = len(sorted_df) % bins
sub_dfs = []
start = 0
for i in range(bins):
    end = start + sub_size + (1 if i < remainder else 0)
    sub_dfs.append(sorted_df.iloc[start:end].reset_index(drop=True))
    start = end

del  bin_counts, bin_edges, bin_means, bin_percentages, cumulative_percentages

zmax subset info

In [None]:
print("*********************************************************")
print("data info")
print(sub_dfs[0].info())
print("*********************************************************")
print("data examples")
sub_dfs[0]


zmin subset info

In [None]:
print("*********************************************************")
print("data info")
print(sub_dfs[-1].info())
print("*********************************************************")
print("data examples")
sub_dfs[-1]

Drawer

In [None]:
def heatmap(dataset, res=280, sm=1, fsize=30, mi=None, mx=None):
    # Set min and max if not provided
    if mi is None:
        mi = min(min(dataset['x']), min(dataset['y']))
    if mx is None:
        mx = max(max(dataset['x']), max(dataset['y']))

    # Create the histogram bins
    ctr = np.linspace(mi, mx, res)

    # Compute the 2D histogram
    H, xedges, yedges = np.histogram2d(dataset['x'], dataset['y'], bins=[ctr, ctr])

    # Apply Gaussian smoothing if sm == 1
    if sm == 1:
        H = gaussian_filter(H, sigma=1)

    # Create meshgrid for plotly heatmap
    X, Y = np.meshgrid(xedges[:-1], yedges[:-1])

    # Create heatmap
    fig = go.Figure(data=go.Heatmap(
        z=H.T,
        x=xedges,
        y=yedges,
        colorscale='Viridis'
    ))

    # Update layout for better visualization
    fig.update_layout(
        title='heatmap',
        xaxis_title='X Axis',
        yaxis_title='Y Axis',
        xaxis=dict(
            scaleanchor="y",
            scaleratio=1,
            range=[mi, mx]
        ),
        yaxis=dict(
            range=[mi, mx]
        ),
        width=800,
        height=800
    )

    fig.show()



**Choose bins if you want to use bin slicer**

In [None]:
#e.g.:choosed = [0,10,19]
choosed=

z_mean = lambda x: sub_dfs[x]['z'].mean()

for x in choosed:
    mean_value = z_mean(x)
    print(f'The mean of z for sub_df {x}/{bins} is {mean_value}')

Bin slicer

In [None]:
for x in choosed:
    print("*******************************")
    print(f'sub_df {x}/{bins}')
    heatmap(sub_dfs[x])
    z_min = sub_dfs[x]['z'].min()
    z_max = sub_dfs[x]['z'].max()
    print(f"Minimum value of 'z': {z_min}")
    print(f"Maximum value of 'z': {z_max}")
    print("*******************************")

print("Note Z do not means position here")
print('try put your mouse on the pictures')

**Triditional spinner**

In [None]:
def spinner_maker(zmin,zmax,df):
  subdf = df[(df['z'] >= zmin) & (df['z'] <= zmax)]
  return subdf

#e.g.: spinnerchoosed=[[-20,0],[-200,-180],[-410,-390]]
spinnerchoosed =
for x in spinnerchoosed:
  spinner = spinner_maker(x[0],x[1],sorted_df)
  heatmap(spinner)
  print("*******************************")
  print(f"Minimum value of 'z': {x[0]}")
  print(f"Maximum value of 'z': {x[1]}")
  print("*******************************")





Collect you data here if you need

Double click to use

| bin no | pole x | pole y | z min | z max |
|----------|----------|----------|----------|----------|
| - | - | - | - | -


| bin no | pole x | pole y | z min | z max |
|----------|----------|----------|----------|----------|
| - | - | - | - | -
