In [1]:
import sys
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

In [2]:
%%time
try:
    df = pd.read_pickle(r"local_data/LSWMD.pkl")
    print("Pickle file read successfully")
except Exception as e:
    print(f"Error reading pickle file: {e}")


Pickle file read successfully
CPU times: user 34.9 s, sys: 2.5 s, total: 37.4 s
Wall time: 37.7 s


In [3]:
df.shape[0]

811457

In [4]:
df.shape[1]

6

In [5]:
df.count()

waferMap          811457
dieSize           811457
lotName           811457
waferIndex        811457
trianTestLabel    811457
failureType       811457
dtype: int64

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 811457 entries, 0 to 811456
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   waferMap        811457 non-null  object 
 1   dieSize         811457 non-null  float64
 2   lotName         811457 non-null  object 
 3   waferIndex      811457 non-null  float64
 4   trianTestLabel  811457 non-null  object 
 5   failureType     811457 non-null  object 
dtypes: float64(2), object(4)
memory usage: 37.1+ MB


In [7]:
display(df)

Unnamed: 0,waferMap,dieSize,lotName,waferIndex,trianTestLabel,failureType
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1683.0,lot1,1.0,[[Training]],[[none]]
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1683.0,lot1,2.0,[[Training]],[[none]]
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1683.0,lot1,3.0,[[Training]],[[none]]
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1683.0,lot1,4.0,[[Training]],[[none]]
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1683.0,lot1,5.0,[[Training]],[[none]]
...,...,...,...,...,...,...
811452,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1,...",600.0,lot47542,23.0,[[Test]],[[Edge-Ring]]
811453,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1,...",600.0,lot47542,24.0,[[Test]],[[Edge-Loc]]
811454,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1,...",600.0,lot47542,25.0,[[Test]],[[Edge-Ring]]
811455,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,...",600.0,lot47543,1.0,[],[]


In [None]:
print(f"Total records: {len(df)}")
print(f"Datafram shape: {df.shape}")

In [None]:
%%time
labeled = list()
unlabeled = list()

for i, row in df.iterrows():
    label = failureType = None
    try:
        if row['trianTestLabel'][0][0]:
            label = row['trianTestLabel'][0][0]
        if row['failureType'][0][0]:
            failureType =  row['failureType'][0][0]
        labeled.append([row['waferMap'], row['dieSize'], row['lotName'], row['waferIndex'], label, failureType])
    except:
        unlabeled.append([row['waferMap'], row['dieSize'], row['lotName'], row['waferIndex'], label, failureType])
        
df_labeled = pd.DataFrame(labeled, columns=['waferMap', 'dieSize', 'lotName', 'waferIndex', 'label', 'failureType'])
df_unlabeled = pd.DataFrame(unlabeled, columns=['waferMap', 'dieSize', 'lotName', 'waferIndex', 'label', 'failureType'])

In [None]:
df_labeled.count()

In [None]:
df_unlabeled.count()

In [None]:
172950 + 638507

In [None]:
display(df_labeled)

In [None]:
print(*df_labeled['failureType'].unique(), sep="\n")

In [None]:
df_labeled['failureType'].value_counts()

In [None]:
df.sort_values(by='dieSize', ascending=False)

In [None]:
print(df['trianTestLabel'][42][0][0])
print(df['failureType'][42][0][0])

In [None]:
print(df.iloc[42])

In [None]:
df.waferMap[42]

In [None]:
plt.imshow(df.waferMap[42], 'cividis')

In [None]:
for row in df.waferMap[42]:
    print(*row)

In [None]:
wafer_failures = dict()
for f in df_labeled['failureType'].unique():
    wafer_failures[f] = df_labeled[df_labeled['failureType'] == f]        

In [None]:
def show_samples(wafer, failureType) -> None:
    """Displays a sample of images for a failure type

    input:
      images: list() of images
      
      failureType: string:  
            none
            Loc
            Edge-Loc
            Center
            Edge-Ring
            Scratch
            Random
            Near-full
            Donut

    output: none - displays images

    """
    # Create a figure and a grid of subplots
    fig, axes = plt.subplots(1, 10, figsize=(20, 20))
    
    img_n=0
    for i, row in wafer[failureType].sample(10).iterrows():
    #for i, row in df_wafer_donut.sample(8).iterrows():
        wafer_info = str(row['lotName']) + "wfr" + str(row['waferIndex'])
        axes[img_n].imshow(row['waferMap'], 'cividis', extent=[0,100,0,1], aspect=100)
        axes[img_n].set_title(wafer_info)
        axes[img_n].set_xlabel(row['failureType'])
        axes[img_n].get_xaxis().set_ticks([])
        axes[img_n].get_yaxis().set_ticks([])
        axes[img_n].axis('on')
        img_n+=1

    plt.show()
    
    # Show the plot

In [None]:
for f in df_labeled['failureType'].unique():
    b_start='\033[1m'
    b_end='\033[0m'
    print(f"{b_start}{f}{b_end}")
    show_samples(wafer_failures, f)

In [None]:
print(df.isnull().sum())
df.dropna() # Remove rows with missing values
df.fillna(0) # Fill missing values