# Generating Data w/ NumPy

In [1]:
import numpy as np

## np.empty(), np.zeros(), np.ones(), np.full()

In [6]:
# Functions that quickly generate arrays of data
array_empty = np.empty(shape = (2,3))
array_empty.dtype

dtype('float64')

In [7]:
array_zeros = np.zeros(shape = (2,3))
array_zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [8]:
array_zeros = np.zeros(shape = (2,3), dtype = np.int8)
array_zeros

array([[0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [11]:
array_ones = np.ones(shape=(2,2), dtype='str')
array_ones

array([['1', '1'],
       ['1', '1']], dtype='<U1')

In [14]:
array_full = np.full(shape = (2,3), fill_value=2)
array_full

array([[2, 2, 2],
       [2, 2, 2]])

## _like functions

In [15]:
matrix_a = np.array([[1,1,1,2,0], [3,6,6,7,4], [4,5,3,8,0]])
matrix_a

array([[1, 1, 1, 2, 0],
       [3, 6, 6, 7, 4],
       [4, 5, 3, 8, 0]])

In [23]:
array_empty_like = np.empty_like(matrix_a)
array_empty_like

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [20]:
array_zeros_like = np.zeros_like(matrix_a)
array_zeros_like

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [24]:
array_ones_like = np.ones_like(matrix_a)
array_ones_like

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [26]:
array_full_like = np.full_like(matrix_a, fill_value=4)
array_full_like

array([[4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4]])

## np.arange()

In [29]:
list(range(10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [32]:
np.arange(30)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [33]:
np.arange(stop=30)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [36]:
np.arange(start=21, stop=30)

array([21, 22, 23, 24, 25, 26, 27, 28, 29])

In [37]:
np.arange(start=21, stop=30, step=2.5)

array([21. , 23.5, 26. , 28.5])

In [38]:
np.arange(start=21, stop=30, step=2.5, dtype=np.float16)

array([21. , 23.5, 26. , 28.5], dtype=float16)

In [39]:
np.arange(start=21, stop=30, step=2.5, dtype=np.int32)

array([21, 23, 25, 27], dtype=int32)

## Random generators

### Defining Random Generators and seeds

In [43]:
from numpy.random import Generator as gen, PCG64 as pcg

In [47]:
array_random_generated = gen(pcg())
array_random_generated.normal()

0.6178731124829107

In [50]:
array_random_generated.normal(size = 5)

array([-0.22497992,  0.32052955,  1.15017732, -0.40356222, -0.35273315])

In [51]:
array_random_generated.normal(size=(5,5))

array([[ 0.08480043, -0.89798777,  0.29083938, -0.03576859,  0.54288791],
       [ 2.86655945, -0.97068979,  0.59367643,  0.38745249,  1.18464076],
       [-0.24715454, -0.74664557, -0.60468742, -1.18495438,  1.71211197],
       [ 0.21223978, -1.01612239,  0.29559703, -0.58114463,  1.51646607],
       [-0.28358397,  0.1288424 , -2.02004341,  1.73117923,  1.75747294]])

In [61]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.normal(size=(5,5))

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641, -0.6866818 ],
       [-0.50922173, -0.7999526 ,  0.73041825,  0.08825439, -2.1177576 ],
       [ 0.65526774, -0.48095012, -0.5519114 , -0.58578662, -0.98257896],
       [ 1.12378166, -1.30984316, -0.04703774,  0.955272  ,  0.26071745],
       [-0.20023668, -1.50172484, -1.4929163 ,  0.96535084,  1.18694633]])

## Generating Integers, Probabilities and Random Choices

In [70]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.integers(4, size=(2,2))

array([[0, 3],
       [2, 3]])

In [71]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.integers(low=10, high=100, size=(5,5))

array([[18, 78, 64, 78, 84],
       [66, 67, 28, 10, 69],
       [45, 15, 37, 74, 96],
       [19, 21, 89, 73, 54],
       [53, 84, 66, 51, 92]])

In [73]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.random(size=(5,5))

array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599, 0.66501486],
       [0.06559111, 0.71326309, 0.10812106, 0.87969046, 0.49405844],
       [0.82472673, 0.45652944, 0.07367232, 0.69628564, 0.36690736],
       [0.29787156, 0.4996155 , 0.4865245 , 0.62740703, 0.54952637],
       [0.64894629, 0.04411757, 0.7206516 , 0.84594003, 0.17159792]])

In [77]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.choice([1,2,3,4,5], size=(5,5)) # Passed list value for choice

array([[4, 4, 2, 3, 1],
       [2, 5, 2, 2, 3],
       [2, 5, 2, 2, 5],
       [5, 4, 2, 2, 3],
       [4, 3, 2, 5, 1]])

In [79]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.choice([1,2,3,4,5], p = [0.1, .2, .3, .2, .2], size=(5,5)) # Passed list value for choice + probability

array([[4, 4, 4, 2, 4],
       [1, 4, 2, 5, 3],
       [5, 3, 1, 4, 3],
       [2, 3, 3, 4, 3],
       [4, 1, 4, 5, 2]])

## Generating Arrays From Known Distributions

In [81]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.poisson(size=(5,5))

array([[2, 0, 1, 1, 2],
       [1, 1, 0, 1, 1],
       [1, 2, 1, 1, 0],
       [0, 1, 0, 2, 1],
       [0, 1, 0, 0, 2]])

In [82]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.poisson(lam =10, size=(5,5))

array([[11, 12, 12, 14, 13],
       [ 9, 10, 11, 11,  8],
       [11,  8, 10,  9, 14],
       [ 7,  8,  9, 15, 15],
       [13,  8,  8,  7,  9]])

In [87]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.binomial(n=100, p=0.8, size=(5,5))

array([[77, 77, 79, 83, 78],
       [86, 78, 85, 75, 80],
       [76, 81, 86, 78, 81],
       [82, 80, 80, 79, 80],
       [79, 87, 78, 76, 84]])

In [88]:
array_random_generated = gen(pcg(seed=365))
array_random_generated.logistic(loc=9, scale=1.2, size=(5,5))

array([[10.37767822, 10.42451863,  9.63404367,  7.36153427,  9.82286787],
       [ 5.81223125, 10.09354231,  6.46790532, 11.38740256,  8.97147918],
       [10.85844698,  8.79081317,  5.962079  ,  9.99560681,  8.34539118],
       [ 7.97105522,  8.9981544 ,  8.93530194,  9.6253307 ,  9.23850869],
       [ 9.73729284,  5.3090678 , 10.13723528, 11.04372782,  7.11078651]])

## Application of Random Generators

In [97]:
array_random_generated = gen(pcg(seed=365))
array_column_1 = array_random_generated.normal(loc=2, scale=3, size=1000)
array_column_2 = array_random_generated.normal(loc=7, scale=2, size=1000)
array_column_3 = array_random_generated.logistic(loc=11, scale=3, size=1000)
array_column_4 = array_random_generated.exponential(scale=4, size=1000)
array_column_5 = array_random_generated.geometric(p=0.7, size=1000)

In [98]:
random_test_data = np.array([array_column_1, array_column_2, array_column_3, array_column_4, array_column_5])
random_test_data

array([[ 1.59077303,  2.28243293,  1.81098674, ...,  0.1973629 ,
        -2.21015722,  2.91161235],
       [ 6.42174295,  8.57902322,  5.17270135, ...,  4.3465854 ,
         8.2176402 ,  7.90337695],
       [10.14698427, 15.93309953, -0.46878789, ...,  2.66485989,
        12.69328115, 11.79840961],
       [ 6.91500737,  6.243605  ,  2.44997251, ...,  0.80935387,
         0.50644607,  4.86816939],
       [ 1.        ,  1.        ,  1.        , ...,  1.        ,
         2.        ,  1.        ]], shape=(5, 1000))

In [99]:
random_test_data.shape

(5, 1000)

In [102]:
random_test_data.transpose()
random_test_data.shape

(1000, 5)

In [103]:
random_test_data

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]],
      shape=(1000, 5))

In [105]:
np.savetxt("Random-Test-from-NumPy.csv", random_test_data, fmt="%s", delimiter= ",")

In [106]:
np.genfromtxt("Random-Test-from-NumPy.csv", delimiter=",")

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]],
      shape=(1000, 5))