In [2]:
import numpy as np
import sklearn

## Generate Data

In [64]:
from sklearn.datasets import make_classification, make_blobs
# Cluster of points normally distributed (std=1) about vertices
# of an n_informative-dim hypercube with sides of length 2*class_sep

# Features are ordered: n_informative, n_redundant, n_repeated, 
# then random noise
N_INFORMATIVE = 3

X, y = make_classification(
    n_samples=100, 
    n_features=1000, 
    n_informative=N_INFORMATIVE,
    n_redundant=0, 
    n_repeated=0, 
    n_classes=2, 
    n_clusters_per_class=1, 
    flip_y=0.01, 
    class_sep=2, 
    random_state=42,
    shuffle=False
)


# Isotropic Gaussian blobs for clustering

X_iso, y_iso = make_blobs(
    n_samples=100, 
    n_features=1000, 
    random_state=42,
    centers=2
)

### Plot of 3 Dimensions

In [65]:
import plotly.express as px

In [66]:
px.scatter_3d(x=X[:,0], y=X[:,1], z=X[:,2], color=y)

In [54]:
px.scatter_3d(x=X_iso[:,0], y=X_iso[:,1], z=X_iso[:,2], color=y_iso)

## Linear Separator

In [55]:
from sklearn.svm import LinearSVC

clf = LinearSVC(random_state=42, max_iter=1000, C=1,
                penalty="l1", loss="squared_hinge", dual=False)
clf.fit(X, y)
print("Number of iterations", clf.n_iter_)

Number of iterations 411


In [56]:
clf.coef_

array([[ 7.87308762e-01,  1.37070849e-01,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  2.69187026e-02,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  1.87215482e-02,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        -3.99893334e-02,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+0

In [57]:
print("Mean accuracy:", clf.score(X, y))

Mean accuracy: 1.0


In [58]:
clf_iso = LinearSVC(random_state=42)
clf_iso.fit(X_iso, y_iso)
print("Number of iterations", clf_iso.n_iter_)

Number of iterations 204


In [59]:
print("Coefficients", clf_iso.coef_)

Coefficients [[-1.22741377e-04 -2.55617129e-04  6.62983815e-05  7.85023403e-05
   3.93938402e-04  3.15773142e-04  3.77868614e-04 -2.51336803e-06
  -2.32997787e-04 -1.45443577e-04  1.31878685e-04  1.26121619e-05
   6.76846176e-05 -7.75049950e-05  3.35316747e-04  4.17354937e-04
  -7.27878057e-05 -2.31387619e-06  2.59012407e-04 -1.45379905e-04
   5.46767028e-05  8.50232885e-05  3.83445178e-04  3.26634451e-04
   2.90225724e-04 -1.86701188e-04  3.94048900e-04  1.98262728e-04
  -1.48949943e-04  4.77164624e-04 -3.67010013e-04  2.74703636e-04
   1.03104169e-04 -5.25839969e-04 -5.21288520e-04 -7.81898200e-05
   4.12269322e-05  3.78146218e-04 -4.02755545e-04 -7.96656982e-05
   2.52596088e-04  1.65814866e-04  1.54166912e-04 -1.65759399e-04
   3.74860738e-04 -1.83126831e-05 -5.47260361e-05 -2.75983629e-04
   2.00820547e-04 -1.12040003e-04 -1.11532585e-05 -1.26161260e-04
   3.62557008e-05 -6.78633148e-05  2.53633803e-04 -3.41679450e-04
   4.27697760e-04  1.12949211e-04  2.23467266e-04  2.16485044e-

In [60]:
print("Mean accuracy:", clf_iso.score(X_iso, y_iso))

Mean accuracy: 1.0


### Distance from Point to Hyperplane

In [61]:
clf.decision_function(X)

array([-0.98127202, -1.03173125, -1.792845  , -3.03550954, -3.30380621,
       -1.15967159, -2.21092296, -1.8392983 , -2.29906692, -0.95880187,
       -1.27147893, -3.28963385, -0.97550506, -2.25512052, -1.35118878,
       -2.71078932, -0.97669541, -1.22739883, -1.02726875, -2.78060634,
       -0.94286773, -0.94908696, -1.03831697, -0.9505641 , -1.18073064,
       -3.03467833, -1.07801678, -0.98606773, -0.99499395, -1.10762388,
       -1.80996197, -1.01946503, -2.68501314, -2.06510903, -0.91877544,
       -1.68300991, -0.96637212, -2.08394802, -0.95253878, -2.46134168,
       -1.00238554, -2.05683392, -3.66499868, -1.23535945, -1.99479972,
       -1.3553871 , -2.4570428 , -1.84341508, -0.98936781, -0.97717749,
        2.10396924,  1.17521266,  1.2443096 ,  1.58964738,  1.4321618 ,
        0.94058227,  1.50605346,  1.94837368,  1.29634412,  0.99854264,
        1.55265634,  0.94555005,  1.11392459,  1.0086282 ,  1.65274336,
        1.0339941 ,  0.94717325,  1.23291956,  1.34873385,  2.03

In [63]:
clf_iso.decision_function(X)

array([ 1.05476994e-02, -3.15047089e-03,  2.42619572e-03, -1.98282968e-03,
       -4.53511897e-03,  5.73552309e-03, -7.66779305e-03,  5.28901302e-03,
        1.75773102e-03, -7.72370515e-03,  4.95300563e-04, -4.67416010e-03,
        1.26444856e-03, -5.28052237e-04,  7.98737867e-03,  2.78911600e-03,
       -4.10585129e-03,  6.00832312e-03,  3.18269719e-03, -2.32079777e-03,
        1.35780705e-02, -4.91421066e-04, -9.52374445e-03,  1.32586091e-03,
        3.70585593e-03, -1.64475597e-03,  7.02923254e-03, -1.86836431e-02,
        4.14966008e-03,  4.18713193e-03,  4.94761433e-03,  6.26706679e-03,
        1.97490074e-02,  2.40840114e-03,  1.97540617e-03, -1.11744723e-04,
        6.69708534e-03,  9.84894261e-03, -1.02224577e-03,  4.34519280e-03,
       -1.54557468e-03,  7.47904295e-03,  6.13915181e-03, -1.00606511e-02,
        2.91716696e-03,  1.81124237e-02,  4.25849420e-03,  9.33382820e-03,
        9.08407645e-03,  1.20449771e-03,  8.50575787e-03,  1.14153360e-02,
        4.36353511e-03,  