In [1]:
import numpy as np

## Numpy
### n X n matrix

In [36]:
def n_size_ndarray_creation(n, dtype=np.int):
    return np.arange(n**2).reshape(n,n)

In [37]:
n_size_ndarray_creation(5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

### zero_or_one_or_empty_ndarray

In [38]:
def zero_or_one_or_empty_ndarray(shape, type, dtype):
    if type==0:
        return np.zeros(shape=shape, dtype=dtype)
    if type==1:
        return np.ones(shape=shape, dtype=dtype)
    if type==99:
        return np.empty(shape=shape, dtype=dtype)

In [39]:
zero_or_one_or_empty_ndarray(shape = (2,2), type=1,dtype=int)

array([[1, 1],
       [1, 1]])

In [34]:
zero_or_one_or_empty_ndarray(shape = (3,3), type=99,dtype=int)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

### change_shape_of_ndarray

In [69]:
def change_shape_of_ndarray(X, n_row):
    return X.flatten() if n_row==1 else X.reshape(n_row,-1)

In [70]:
X = np.ones((32,32), dtype=np.int)

change_shape_of_ndarray(X,1)

array([1, 1, 1, ..., 1, 1, 1])

In [71]:
change_shape_of_ndarray(X,512)

array([[1, 1],
       [1, 1],
       [1, 1],
       ...,
       [1, 1],
       [1, 1],
       [1, 1]])

### concat_ndarray

In [4]:
def concat_ndarray(X_1, X_2, axis):
    try:
        if X_1.ndim == 1:
            X_1 = X_1.reshape(1,-1)
        if X_2.ndim == 1:
            X_2 = X_2.reshape(1,-1)
        return np.concatenate((X_1,X_2), axis=axis)
    except ValueError as e:
        return False
    
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
concat_ndarray(a, b, 0)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [14]:
np.concatenate((a,b), axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [94]:
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
concat_ndarray(a, b, 0)


array([[1, 2],
       [3, 4],
       [5, 6]])

In [75]:
concat_ndarray(a, b, 1)

False

In [91]:
a = np.array([1, 2])
b = np.array([5, 6, 7])
concat_ndarray(a, b, 1)


0.5

In [77]:
concat_ndarray(a, b, 0)

False

### normalize_ndarray

In [126]:
def normalize_ndarray(X, axis=99, dtype=np.float32):
    if axis == 1:
        x_mean = np.mean(X,1).reshape(-1,1)
        x_std = np.std(X,1).reshape(-1,1)
        Z = (X - x_mean) / x_std
    if axis == 0:
        x_mean = np.mean(X,0)
        x_std = np.std(X,0)
        Z = (X - x_mean) / x_std
    else:
        x_mean = np.mean(X)
        x_std = np.std(X)
        Z = (X - x_mean) / x_std
    return Z

In [127]:
X = np.arange(12, dtype=np.float32).reshape(6,2)
normalize_ndarray(X)

array([[-1.593255  , -1.3035723 ],
       [-1.0138896 , -0.7242068 ],
       [-0.4345241 , -0.14484136],
       [ 0.14484136,  0.4345241 ],
       [ 0.7242068 ,  1.0138896 ],
       [ 1.3035723 ,  1.593255  ]], dtype=float32)

In [128]:
normalize_ndarray(X, 1)

array([[-1.593255  , -1.3035723 ],
       [-1.0138896 , -0.7242068 ],
       [-0.4345241 , -0.14484136],
       [ 0.14484136,  0.4345241 ],
       [ 0.7242068 ,  1.0138896 ],
       [ 1.3035723 ,  1.593255  ]], dtype=float32)

In [129]:
normalize_ndarray(X, 0)

array([[-1.46385, -1.46385],
       [-0.87831, -0.87831],
       [-0.29277, -0.29277],
       [ 0.29277,  0.29277],
       [ 0.87831,  0.87831],
       [ 1.46385,  1.46385]], dtype=float32)

### save_ndarray

In [149]:
def save_ndarray(X, filename="test.npy"):
    file_test = open(filename,"wb")
    np.save(X,file_test)

In [150]:
X = np.arange(32, dtype=np.float32).reshape(4, -1)
filename = "test.npy"
save_ndarray(X, filename)

TypeError: expected str, bytes or os.PathLike object, not numpy.ndarray

### boolean_index

In [169]:
def boolean_index(X, condition):
    condition = eval(str("X") + condition) == 1
    return np.where(condition)

In [170]:
X = np.arange(32, dtype=np.float32).reshape(4, -1)
boolean_index(X, "== 3")

(array([0], dtype=int64), array([3], dtype=int64))

### find_nearest_value

In [179]:
def find_nearest_value(X, target_value):
    return X[np.argmin(np.abs(X - target_value))]

In [181]:
X = np.random.uniform(0, 1, 100)
print(X)
target_value = 0.3
find_nearest_value(X, target_value)

[0.44182213 0.25502409 0.74972348 0.05831013 0.61920294 0.70662908
 0.12411368 0.65568702 0.35941233 0.36709125 0.12261389 0.71978705
 0.03378425 0.29180237 0.45434175 0.18619834 0.62102721 0.72913206
 0.75956237 0.16747628 0.11401423 0.81341183 0.21927813 0.9214057
 0.86907756 0.92307254 0.08242895 0.99499849 0.8802616  0.23776162
 0.74531954 0.23934539 0.66424226 0.89690577 0.52844245 0.15952445
 0.9993787  0.50918228 0.65540707 0.80867507 0.89897586 0.45791163
 0.11245392 0.52789616 0.49214655 0.86882384 0.73670576 0.03586156
 0.99011527 0.89470718 0.36236512 0.7358793  0.38005689 0.93675096
 0.8183859  0.88951976 0.51036545 0.71909919 0.77154948 0.66157148
 0.28259618 0.44139553 0.13088315 0.11469986 0.27983015 0.34156674
 0.52510696 0.17083696 0.84752933 0.60624689 0.84948579 0.3118716
 0.68981322 0.70694938 0.95757809 0.61419567 0.09942558 0.01121086
 0.20175955 0.71015355 0.16490519 0.90477669 0.73754224 0.88528279
 0.51073503 0.76532051 0.91264933 0.6132991  0.89106933 0.316876

0.29180236726517783

### get_n_largest_values

In [200]:
def get_n_largest_values(X, n):
    return X[np.argsort(X)[::-1][:n]]

In [201]:
X = np.random.uniform(0, 1, 100)
get_n_largest_values(X, 3)

array([0.98188972, 0.97898659, 0.97868012])

In [199]:
X[np.argsort(X[::-1])]

array([0.87903997, 0.2852035 , 0.38638126, 0.47207264, 0.56581892,
       0.67427353, 0.89488965, 0.52530376, 0.45470521, 0.6956676 ,
       0.60888481, 0.50352416, 0.19939584, 0.60258536, 0.81682244,
       0.16008888, 0.43966713, 0.75760316, 0.58877865, 0.7308327 ,
       0.39823471, 0.90559746, 0.77417196, 0.8571986 , 0.42137334,
       0.01501171, 0.87311126, 0.4366294 , 0.50190608, 0.40889909,
       0.45814173, 0.50566666, 0.75462491, 0.91653081, 0.01756067,
       0.21875916, 0.32444719, 0.27119788, 0.84795797, 0.30121537,
       0.20038661, 0.88630267, 0.14379885, 0.32715445, 0.96952572,
       0.02015501, 0.70507019, 0.630676  , 0.82377328, 0.31489001,
       0.15937166, 0.32875083, 0.82868427, 0.61095521, 0.88124178,
       0.09470324, 0.92955253, 0.04567717, 0.79520347, 0.2037086 ,
       0.17137815, 0.1493502 , 0.52212684, 0.86454533, 0.6655483 ,
       0.48489271, 0.93683526, 0.71522833, 0.62762491, 0.05062625,
       0.14887955, 0.73909686, 0.48289202, 0.71398948, 0.70978

## Build Matrix

In [2]:
import pandas as pd

### get_rating_matrix

In [30]:
df = pd.read_csv("/Data/movie_rating.csv")
df.head()

Unnamed: 0,critic,title,rating
0,Jack Matthews,Lady in the Water,3.0
1,Jack Matthews,Snakes on a Plane,4.0
2,Jack Matthews,You Me and Dupree,3.5
3,Jack Matthews,Superman Returns,5.0
4,Jack Matthews,The Night Listener,3.0


In [33]:
df.groupby(["critic","title"])["rating"].sum().unstack()

title,Just My Luck,Lady in the Water,Snakes on a Plane,Superman Returns,The Night Listener,You Me and Dupree
critic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Claudia Puig,3.0,,3.5,4.0,4.5,2.5
Gene Seymour,1.5,3.0,3.5,5.0,3.0,3.5
Jack Matthews,,3.0,4.0,5.0,3.0,3.5
Lisa Rose,3.0,2.5,3.5,3.5,3.0,2.5
Mick LaSalle,2.0,3.0,4.0,3.0,3.0,2.0
Toby,,,4.5,4.0,,1.0


In [49]:
def get_rating_matrix(filename):
    df = pd.read_csv(filename)
    return df.groupby(["critic","title"])["rating"].sum().unstack().fillna(0).values
    
    
get_rating_matrix("/Data/movie_rating.csv")

array([[3. , 0. , 3.5, 4. , 4.5, 2.5],
       [1.5, 3. , 3.5, 5. , 3. , 3.5],
       [0. , 3. , 4. , 5. , 3. , 3.5],
       [3. , 2.5, 3.5, 3.5, 3. , 2.5],
       [2. , 3. , 4. , 3. , 3. , 2. ],
       [0. , 0. , 4.5, 4. , 0. , 1. ]])

### get_frequent_matrix

In [50]:
df = pd.read_csv("/Data/1000i.csv", dtype=np.float32)
df.head()

Unnamed: 0,source,target
0,3.0,7.0
1,4.0,15.0
2,2.0,49.0
3,5.0,44.0
4,1.0,1.0


In [69]:
df["rating"] = 1
df.groupby(["source","target"])["rating"].count().unstack()

target,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,40.0,41.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,19,17,14,11,17,25,7,22,5,18,...,15,14,20,9,12,16,11,9,11,12
2.0,20,16,10,15,17,18,10,13,5,19,...,13,12,15,9,13,16,16,10,16,9
3.0,12,16,13,19,23,19,5,14,5,18,...,10,14,10,17,15,16,11,17,9,11
4.0,14,14,19,11,11,18,7,16,7,17,...,9,16,18,12,16,16,26,16,12,20
5.0,13,7,8,15,13,16,3,19,11,12,...,11,10,16,8,13,20,14,18,21,3


In [63]:
def get_frequent_matrix(filename):
    df = pd.read_csv(filename)
    return df.groupby(["source","target"])["target"].count().unstack().fillna(0).values


get_frequent_matrix("/Data/1000i.csv")

array([[19, 17, 14, 11, 17, 25,  7, 22,  5, 18, 10, 13, 13,  8, 20, 10,
         9, 10, 16, 15,  9, 11, 17, 15, 14,  8,  6, 12, 18, 12,  6, 18,
         9, 24,  7, 19, 14,  6,  4, 12, 15, 14, 20,  9, 12, 16, 11,  9,
        11, 12],
       [20, 16, 10, 15, 17, 18, 10, 13,  5, 19,  8, 14, 14,  9, 15, 14,
        13,  8, 12,  9,  5, 10, 28, 18,  7,  8,  6, 19, 14, 13, 11, 12,
        18, 15,  7, 11, 17,  9,  5,  5, 13, 12, 15,  9, 13, 16, 16, 10,
        16,  9],
       [12, 16, 13, 19, 23, 19,  5, 14,  5, 18,  7,  6, 14,  8, 20, 17,
        14, 11, 16, 12,  7,  9, 23, 12, 12,  8,  7, 23, 26, 10,  9, 20,
        16, 11,  4, 19, 12, 12,  5, 10, 10, 14, 10, 17, 15, 16, 11, 17,
         9, 11],
       [14, 14, 19, 11, 11, 18,  7, 16,  7, 17,  6, 19, 18, 12, 13, 13,
        14,  9, 21, 16,  6,  6, 19, 14, 19,  5, 12, 14, 18, 11, 11, 21,
        15, 10, 11, 14, 17, 21,  6, 14,  9, 16, 18, 12, 16, 16, 26, 16,
        12, 20],
       [13,  7,  8, 15, 13, 16,  3, 19, 11, 12,  7, 10, 13, 14, 16, 