In [1]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Binarized data

In [2]:
column_names = [
        'top-left-square', 'top-middle-square', 'top-right-square',
        'middle-left-square', 'middle-middle-square', 'middle-right-square',
        'bottom-left-square', 'bottom-middle-square', 'bottom-right-square',
        'Class'
    ]
df = pd.read_csv('data_sets/tic-tac-toe.data', names = column_names)
df['Class'] = [x == 'positive' for x in df['Class']]
df.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,True
1,x,x,x,x,o,o,o,x,o,True
2,x,x,x,x,o,o,o,o,x,True
3,x,x,x,x,o,o,o,b,b,True
4,x,x,x,x,o,o,b,o,b,True


In [3]:
X = pd.get_dummies(df[column_names[:-1]], prefix=column_names[:-1]).astype(bool)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [4]:
bin_cls = fcalc.classifier.BinarizedBinaryClassifier(X_train.values, y_train.to_numpy(), method="standard-support")

In [5]:
bin_cls.predict(X_test.values)

In [6]:
from sklearn.metrics import accuracy_score, f1_score

print(accuracy_score(y_test, bin_cls.predictions))
print(f1_score(y_test, bin_cls.predictions))

0.9965277777777778
0.9974160206718347


# Pattern structures

In [13]:
column_names = [
        'top-left-square', 'top-middle-square', 'top-right-square',
        'middle-left-square', 'middle-middle-square', 'middle-right-square',
        'bottom-left-square', 'bottom-middle-square', 'bottom-right-square',
        'Class'
    ]
df = pd.read_csv('data_sets/tic-tac-toe.data', names = column_names)
df['Class'] = [x == 'positive' for x in df['Class']]
df.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,Class
0,x,x,x,x,o,o,x,o,o,True
1,x,x,x,x,o,o,o,x,o,True
2,x,x,x,x,o,o,o,o,x,True
3,x,x,x,x,o,o,o,b,b,True
4,x,x,x,x,o,o,b,o,b,True


In [14]:
X = df[column_names[:-1]]
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [25]:
pat_cls = fcalc.classifier.PatternBinaryClassifier(X_train.values, y_train.to_numpy(), 
                                             categorical=np.arange(X_train.shape[1]))

In [26]:
pat_cls.predict(X_test.values)

In [27]:
from sklearn.metrics import accuracy_score, f1_score

print(accuracy_score(y_test, pat_cls.predictions))
print(f1_score(y_test, pat_cls.predictions))

0.9930555555555556
0.9948453608247423


In [28]:
df = pd.read_csv('data_sets/iris.data', names=['sepal_length',	'sepal_width',	'petal_length',	'petal_width','species'])
df['species'] = [x == 'Iris-setosa' for x in df['species']]
df.sample(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
110,6.5,3.2,5.1,2.0,False
87,6.3,2.3,4.4,1.3,False
126,6.2,2.8,4.8,1.8,False
59,5.2,2.7,3.9,1.4,False
20,5.4,3.4,1.7,0.2,True
119,6.0,2.2,5.0,1.5,False
6,4.6,3.4,1.4,0.3,True
91,6.1,3.0,4.6,1.4,False
112,6.8,3.0,5.5,2.1,False
54,6.5,2.8,4.6,1.5,False


In [29]:
X = df.iloc[:,:-1]
y = df['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [45]:
pat_cls = fcalc.classifier.PatternBinaryClassifier(X_train.values, y_train.to_numpy())

In [46]:
pat_cls.predict(X_test.values)

In [47]:
from sklearn.metrics import accuracy_score, f1_score
print("accuracy:",round(accuracy_score(y_test, pat_cls.predictions),4))
print("f1 score:",round(f1_score(y_test, pat_cls.predictions),4))

accuracy: 1.0
f1 score: 1.0


In [48]:
df = pd.read_csv('data_sets/heart_failure_clinical_records_dataset.csv')
df.sample(5)

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
224,58.0,0,582,1,25,0,504000.0,1.0,138,1,0,205,0
239,55.0,1,180,0,45,0,263358.03,1.18,137,1,1,211,0
52,60.0,0,3964,1,62,0,263358.03,6.8,146,0,0,43,1
201,45.0,0,308,1,60,1,377000.0,1.0,136,1,0,186,0
188,60.667,1,151,1,40,1,201000.0,1.0,136,0,0,172,0


In [49]:
X = df.iloc[:,:-1]
y = df['DEATH_EVENT']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
pat_cls = fcalc.classifier.PatternBinaryClassifier(X_train.values, y_train.to_numpy(), 
                                                   categorical=np.array([1,3,5,9,10]))

In [None]:
pat_cls.predict(X_test.values)

In [None]:
from sklearn.metrics import accuracy_score, f1_score
print("accuracy:",round(accuracy_score(y_test, pat_cls.predictions),4))
print("f1 score:",round(f1_score(y_test, pat_cls.predictions),4))