# Data Preprocessing

In [1]:
import os

os.makedirs(os.path.join('..', 'data'), exist_ok=True)
data_file = os.path.join('..', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('''NumRooms,RoofType,Price
NA,NA,127500
2,NA,106000
4,Slate,178100
NA,NA,140000''')

In [2]:
data_file

'../data/house_tiny.csv'

In [3]:
import pandas as pd

data = pd.read_csv(data_file)
print(data)

   NumRooms RoofType   Price
0       NaN      NaN  127500
1       2.0      NaN  106000
2       4.0    Slate  178100
3       NaN      NaN  140000


In [7]:
## test pd.get_dummies function

import pandas as pd
 
con = pd.Series(list('abcba'))
print(pd.get_dummies(con))

   a  b  c
0  1  0  0
1  0  1  0
2  0  0  1
3  0  1  0
4  1  0  0


In [8]:
inputs, targets = data.iloc[:, 0:2], data.iloc[:, 2]
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   NumRooms  RoofType_Slate  RoofType_nan
0       NaN               0             1
1       2.0               0             1
2       4.0               1             0
3       NaN               0             1


In [9]:
inputs.mean()

NumRooms          3.00
RoofType_Slate    0.25
RoofType_nan      0.75
dtype: float64

In [10]:
inputs = inputs.fillna(inputs.mean())
print(inputs)

   NumRooms  RoofType_Slate  RoofType_nan
0       3.0               0             1
1       2.0               0             1
2       4.0               1             0
3       3.0               0             1


* Conversion to the Tensor format

In [11]:
import tensorflow as tf

X, y = tf.constant(inputs.values), tf.constant(targets.values)
X, y

2023-02-05 23:35:37.818053: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-05 23:35:45.975007: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


(<tf.Tensor: shape=(4, 3), dtype=float64, numpy=
 array([[3., 0., 1.],
        [2., 0., 1.],
        [4., 1., 0.],
        [3., 0., 1.]])>,
 <tf.Tensor: shape=(4,), dtype=int64, numpy=array([127500, 106000, 178100, 140000])>)

In [12]:
type(X), type(y)

(tensorflow.python.framework.ops.EagerTensor,
 tensorflow.python.framework.ops.EagerTensor)

In [14]:
X.shape, y.shape

(TensorShape([4, 3]), TensorShape([4]))

# Linear Algebra

In [15]:
import tensorflow as tf

x = tf.constant(3.0)
y = tf.constant(2.0)

x + y, x * y, x / y, x**y

(<tf.Tensor: shape=(), dtype=float32, numpy=5.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=6.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1.5>,
 <tf.Tensor: shape=(), dtype=float32, numpy=9.0>)

In [17]:
x = tf.range(3)
x

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([0, 1, 2], dtype=int32)>

In [18]:
len(x), x.shape

(3, TensorShape([3]))

In [19]:
A = tf.reshape(tf.range(6), (3, 2))
A

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[0, 1],
       [2, 3],
       [4, 5]], dtype=int32)>

In [20]:
tf.transpose(A)

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[0, 2, 4],
       [1, 3, 5]], dtype=int32)>

In [21]:
A = tf.constant([[1, 2, 3], [2, 0, 4], [3, 4, 5]])
A == tf.transpose(A)

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [22]:
tf.reshape(tf.range(24), (2, 3, 4))

<tf.Tensor: shape=(2, 3, 4), dtype=int32, numpy=
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]], dtype=int32)>

In [23]:
A = tf.reshape(tf.range(6, dtype=tf.float32), (2, 3))
B = A  # No cloning of `A` to `B` by allocating new memory
A, A + B

(<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
 array([[0., 1., 2.],
        [3., 4., 5.]], dtype=float32)>,
 <tf.Tensor: shape=(2, 3), dtype=float32, numpy=
 array([[ 0.,  2.,  4.],
        [ 6.,  8., 10.]], dtype=float32)>)

In [24]:
A * B

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 0.,  1.,  4.],
       [ 9., 16., 25.]], dtype=float32)>

In [26]:
a = 2
X = tf.reshape(tf.range(24), (2, 3, 4))
X, a + X, (a * X).shape

(<tf.Tensor: shape=(2, 3, 4), dtype=int32, numpy=
 array([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],
 
        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]], dtype=int32)>,
 <tf.Tensor: shape=(2, 3, 4), dtype=int32, numpy=
 array([[[ 2,  3,  4,  5],
         [ 6,  7,  8,  9],
         [10, 11, 12, 13]],
 
        [[14, 15, 16, 17],
         [18, 19, 20, 21],
         [22, 23, 24, 25]]], dtype=int32)>,
 TensorShape([2, 3, 4]))

In [27]:
x = tf.range(3, dtype=tf.float32)
x, tf.reduce_sum(x)

(<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 1., 2.], dtype=float32)>,
 <tf.Tensor: shape=(), dtype=float32, numpy=3.0>)

In [29]:
A, A.shape, tf.reduce_sum(A)

(<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
 array([[0., 1., 2.],
        [3., 4., 5.]], dtype=float32)>,
 TensorShape([2, 3]),
 <tf.Tensor: shape=(), dtype=float32, numpy=15.0>)

In [38]:
A, tf.reduce_sum(A, axis=0), tf.reduce_sum(A, axis=1)

(<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
 array([[0., 1., 2.],
        [3., 4., 5.]], dtype=float32)>,
 <tf.Tensor: shape=(3,), dtype=float32, numpy=array([3., 5., 7.], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 3., 12.], dtype=float32)>)

In [35]:
X, tf.reduce_sum(X, axis=0)

(<tf.Tensor: shape=(2, 3, 4), dtype=int32, numpy=
 array([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],
 
        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]], dtype=int32)>,
 <tf.Tensor: shape=(3, 4), dtype=int32, numpy=
 array([[12, 14, 16, 18],
        [20, 22, 24, 26],
        [28, 30, 32, 34]], dtype=int32)>)

In [34]:
X, tf.reduce_sum(X, axis=1)

(<tf.Tensor: shape=(2, 3, 4), dtype=int32, numpy=
 array([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],
 
        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]], dtype=int32)>,
 <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
 array([[12, 15, 18, 21],
        [48, 51, 54, 57]], dtype=int32)>)

In [39]:
tf.reduce_sum(A, axis=[0, 1]), tf.reduce_sum(A)

(<tf.Tensor: shape=(), dtype=float32, numpy=15.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=15.0>)

In [40]:
A

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 1., 2.],
       [3., 4., 5.]], dtype=float32)>

In [41]:
sum_A = tf.reduce_sum(A, axis=1, keepdims=True)
sum_A

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[ 3.],
       [12.]], dtype=float32)>

In [42]:
A / sum_A

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.        , 0.33333334, 0.6666667 ],
       [0.25      , 0.33333334, 0.41666666]], dtype=float32)>

In [43]:
tf.cumsum(A, axis=0)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 1., 2.],
       [3., 5., 7.]], dtype=float32)>

In [44]:
x

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 1., 2.], dtype=float32)>

In [45]:
y = tf.ones(3, dtype=tf.float32)
x, y, tf.tensordot(x, y, axes=1)

(<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 1., 2.], dtype=float32)>,
 <tf.Tensor: shape=(3,), dtype=float32, numpy=array([1., 1., 1.], dtype=float32)>,
 <tf.Tensor: shape=(), dtype=float32, numpy=3.0>)

In [46]:
A.shape, x.shape, tf.linalg.matvec(A, x)

(TensorShape([2, 3]),
 TensorShape([3]),
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 5., 14.], dtype=float32)>)

In [47]:
A, x

(<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
 array([[0., 1., 2.],
        [3., 4., 5.]], dtype=float32)>,
 <tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 1., 2.], dtype=float32)>)

In [48]:
u = tf.constant([3.0, -4.0])
tf.norm(u)

<tf.Tensor: shape=(), dtype=float32, numpy=5.0>