# Packages for ML

1.   Numpy
2.   Pandas
3.   Matplotlib
4.   Tensorflow/Keras

---


##1. numpy

np.array()

In [None]:
import numpy as np

arr1 = np.array([1, 2, 3])
print('arr1 :\n', arr1)
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print('arr2 :\n', arr2)
arr3 = np.array([[1, 2], [4, 5]])
print('arr3 :\n', arr3)

In [None]:
# dimension-wise로 indexing & slicing 가능
print('Indexing :', arr2[0,0], arr2[1,2])
print('Slicing :', arr2[0], arr2[:,1])

array 정의

In [None]:
# 영행렬, 1로 초기화된 행렬, 단위 행렬
print('영행렬 :\n', np.zeros(shape=(4, 4), dtype=np.uint8))
print('\n1로 초기화된 행렬 :\n', np.ones(shape=(4, 4), dtype=np.uint8))
print('\n단위 행렬 :\n', np.eye(4, dtype=np.uint8))

In [None]:
print('arange1 :\n', np.arange(10))
print('arange2 :\n', np.arange(10, 20, 2))

array 연산

In [None]:
# array 간 사칙연산
arr4 = np.array([10,20,30,40,50])
arr5 = np.array([1,2,3,4,5])

print('arr4 + arr5\t:', arr4+arr5)
print('arr4 - arr5\t:', arr4-arr5)
print('arr4 * arr5\t:', arr4*arr5)
print('arr4 / arr5\t:', arr4/arr5)

In [None]:
# 여러 선형대수 연산 가능
arr6 = np.array([[1,2,3],
                 [4,5,6],
                 [7,8,9]])

print('Diagonal matric :\n', np.diag(arr6))
print('\nTranspose :\n', np.transpose(arr6),'\n', arr6.T)
print('\nDot product :\n', np.dot(arr6, arr6.T),'\n', arr6.dot(arr6.T))
print('\nTrace :\n', np.trace(arr6), arr6.trace())
print('\nDeterminant :\n', np.linalg.det(arr6))

array 변형

In [None]:
# 변형
arr7 = np.array([[1,2,3,4],
                [5,6,7,8]])

print('shape :', arr7.shape)
print('\nreshape :\n', np.reshape(arr7, (4, 2)), np.reshape(arr7, (1, 8)))
print('\nflatten :\n', arr7.flatten())

array 연결

In [None]:
# 연결
# hstack, vstack, dstack, concatenate
arr8 = np.array([1,2,3])
print('arr8 :', arr8)

print('\nhstack :\n', np.hstack((arr8, arr8)))
print('\nvstack :\n', np.vstack((arr8, arr8)))
print('\nstack :\n', np.stack((arr8, arr8), axis=0))
print('\nstack :\n', np.stack((arr8, arr8), axis=1))

In [None]:
# 정렬
arr9 = np.array([0, 2, 1, -2, -3, 3, -1])
print('arr9\t\t:', arr9)

print('sorted(asc)\t:', np.sort(arr9))
print('sorted(desc)\t:', np.sort(arr9)[::-1])

In [None]:
# 2차원 array 정렬
arr10 = np.array([[6, 9, 7],
                 [4, 5, 3],
                 [2, 1, 8]])

print('arr10 :\n', arr10)
print('\nsorted(axis=1, asc) :\n', np.sort(arr10, axis=1))
print('\nsorted(axis=0, asc) :\n', np.sort(arr10, axis=0))
print('\nsorted(axis=1, desc) :\n', np.sort(arr10, axis=1)[:,::-1])
print('\nsorted(axis=0, desc) :\n', np.sort(arr10, axis=0)[::-1])

##2. pandas

pd.Dataframe()

In [None]:
# pandas : dataframe을 이용해 효율적으로 자료 관리 가능
import pandas as pd

patient = [['Kim', 31, 178],
           ['Lee', 23, 162],
           ['Park', 25, 160],
           ['Kang', 52, 173],
           ['Song', 15, 142]]
df_a = pd.DataFrame(data=patient, columns=['name', 'age', 'height'])

df_a

In [None]:
print('columns :', df_a.columns)
print('values :\n', df_a.values)

In [None]:
df_a.describe()

dataframe indexing (columns)

In [None]:
df_a['name']

In [None]:
df_a[['name', 'age']]

In [None]:
# index를 이용하여 새로운 column과 값 추가
df_a['fee'] = [5000, 7000, 16000, 2500, 9000]
df_a['paid'] = [5000, 7000, 10000, 0, 0]
df_a

In [None]:
# column 간의 연산
df_a['unpaid'] = df_a['fee'] - df_a['paid']
df_a['fully_paid'] = df_a['unpaid']==0
df_a

dataframe indexing (rows)

In [None]:
df_a.iloc[0]

In [None]:
df_a.iloc[0:3]

In [None]:
df_a.loc[0:3]

In [None]:
df_a.loc[df_a['fully_paid']==False]

In [None]:
# 특정 행 삭제
df_a.drop([0,3])

In [None]:
df_a.drop(df_a[df_a['age'] < 20].index)

##3. matplotlib

In [None]:
# matplotlib : 그래프나 이미지 등을 시각화해주는 툴
import matplotlib.pyplot as plt

arr1 = np.random.rand(10)
plt.plot(arr1)
plt.show()

In [None]:
arr2 = np.random.rand(10)
plt.plot(arr1)
plt.plot(arr2)
plt.show()

In [None]:
plt.hist(arr1, bins=5)
plt.show()

In [None]:
plt.scatter(arr1, arr2)
plt.show()

In [None]:
# plot 스타일 조정
plt.figure(figsize=(10,3))
plt.plot(arr1, 'k.-', label='arr1')
plt.plot(arr2, 'bo--', label='arr2')
plt.title('Random variables')
plt.xlabel('index')
plt.ylabel('value')
plt.legend()
plt.show()

In [None]:
# image 출력
image = np.stack([np.arange(10) for _ in range(10)])
plt.imshow(image, 'gray')
plt.show()

##4. tensorflow/keras

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
# v1 사용
import tensorflow.compat.v1 as tf

In [None]:
# constant 로 이루어진 model

with tf.Session() as sess:  
  x = tf.constant(1.0)
  y = tf.constant(2.0)
  sum = x+y

  output = sess.run(sum)
  print(output)

In [None]:
# placeholder 에 feed_dict 로 데이터 입력

with tf.Session() as sess:
  a = tf.placeholder(tf.float32)
  x = tf.placeholder(tf.float32)
  b = tf.placeholder(tf.float32)
  sum = a*x + b

  output = sess.run(sum, feed_dict={a:5.0, x:1.0, b:0.0})
  print(output)

In [None]:
# 학습 예시(y = Wx + b)

# 사용할 데이터 정의
x_data = [1, 2, 3]
y_data = [3, 5, 7]

with tf.Session() as sess:

  # W, X, Y 정의
  W = tf.Variable(tf.random_normal([1]), name='weight')
  # W = tf.Variable(5.0)
  b = tf.Variable(tf.random_normal([1]), name='bias')
  X = tf.placeholder(tf.float32, shape=[None], name='x')
  Y = tf.placeholder(tf.float32, shape=[None], name='y')

  # 사용할 식 정의. linear regression
  hypothesis = W * X + b

  # cost func 정의
  cost = tf.reduce_mean(tf.square(hypothesis - Y))

  lr = 0.05

  # 기울기 값
  optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
  train = optimizer.minimize(cost)

  # Variable 초기화하기
  sess.run(tf.global_variables_initializer())

  for step in range(2001):
      cost_val, W_val, b_val, _ = sess.run([cost, W, b, train], feed_dict={X:x_data, Y:y_data})
      if step % 100 == 0:
        print(step, '\tcost: ', cost_val, '\tW :', W_val, '\tb :', b_val)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers

In [None]:
model = Sequential()
model.add(Dense(1, input_dim=1, activation='linear'))
model.compile(loss='mse', optimizer=optimizers.SGD(learning_rate=0.05), metrics=['accuracy'])

history = model.fit(x_data, y_data, epochs=100, batch_size=1, shuffle=False)
print('W :', model.layers[0].get_weights()[0][0], '\tb :', model.layers[0].get_weights()[1])