## Generating Data

#### Generating random values 

In [2]:
import numpy as np

np.random.seed(420) # set seed for reproductive values
np.random.random(3) # generate 3 random float between 0,1
np.random.randint(0,11,3) # generate 3 random num between 0,10
np.random.normal(0,1,3) # generate normal distribution with mean 0 and std of 1
np.random.logistic(0,1,3) # generate 3 rand numb from logistic reg with mean 0 and scale of 1
np.random.uniform(2,5,3) # generate 3 rand element between [2,5[

array([2.7113966 , 2.70621101, 4.32311415])

#### Generate dataset for regression

In [3]:
from sklearn.datasets import make_regression

features, target, coefficients = make_regression(
    n_samples = 100,
    n_features = 3,
    n_informative = 3,
    n_targets = 1,
    noise = 0,
    coef = True,
    random_state = 420
)

print('Features Matrix\n', features[:3])
print('Target Vector\n', target[:3])

# n_informative: number of features used to create target

Features Matrix
 [[ 0.88927717 -0.98514348  0.14545183]
 [ 0.57073084  0.0653164   0.67641123]
 [-0.12873466 -0.17049738  1.74133681]]
Target Vector
 [  9.82952567  74.59668467 108.37143506]


#### Generate dataset for classification

In [4]:
from sklearn.datasets import make_classification

features, target = make_classification(
    n_samples = 100,
    n_features = 3,
    n_informative =3,
    n_redundant = 0,
    n_classes = 2,
    weights =[.25, .75],
    random_state = 420
)

print('Features Matrix\n', features[:3])
print('Target Vector\n', target[:3])

# weights: create imbalance in classes

Features Matrix
 [[ 2.2062126  -0.09854505  0.43939749]
 [ 1.02605503 -0.84677005 -1.01515178]
 [ 0.61771459 -1.05462148  2.81549146]]
Target Vector
 [1 1 0]


#### Generate dataset for clustering

In [5]:
from sklearn.datasets import make_blobs

feature, target = make_blobs(
    n_samples = 100,
    n_features = 2,
    centers =3,
    cluster_std =0.5,
    shuffle = True,
    random_state = 420
)
print('Feature Matrix\n', features[:3])
print('Target Vector\n', target[:3])

# center: number of cluster generated

Feature Matrix
 [[ 2.2062126  -0.09854505  0.43939749]
 [ 1.02605503 -0.84677005 -1.01515178]
 [ 0.61771459 -1.05462148  2.81549146]]
Target Vector
 [0 0 1]
