### Data accessing, indexing in pytorch 

In [1]:
import torch 
import torch.nn as nn
import numpy as np
import pandas as pd


In [7]:
x = torch.tensor([1,2,3], dtype=int)
x.dtype

torch.int64

### Convert Numpy arr to tensor

In [None]:
np_arr = np.array(
    [
        [1,2,3,], [4,5,6]
    ]
    )
np_arr.shape

(2, 3)

In [12]:
torch_np_arr = torch.from_numpy(np_arr)
torch_np_arr

tensor([[1, 2, 3],
        [4, 5, 6]])

In [13]:
torch_np_arr.shape

torch.Size([2, 3])

### Convert Pandas into torch tensor

In [23]:
input_dict = {
    'distance_miles': {0: 1.6, 1: 13.09, 2: 6.97},
    'delivery_time_minutes': {0: 7.22, 1: 32.41, 2: 17.47}
    }
df_in = pd.DataFrame(input_dict)
df_in.shape

(3, 2)

In [None]:
#torch cannot directly read df, need to convert it to numpy first 
num_input = df_in.to_numpy()
num_input.shape

(3, 2)

In [26]:
torch_df = torch.from_numpy(num_input)
torch_df

tensor([[ 1.6000,  7.2200],
        [13.0900, 32.4100],
        [ 6.9700, 17.4700]], dtype=torch.float64)

In [28]:
torch_df.shape

torch.Size([3, 2])

### Add / Reduce dimention of the data

In [30]:
torch_df.shape

torch.Size([3, 2])

In [31]:
#if we need to add one more dimen to it, use unsqueeze function

new_torch_df = torch_df.unsqueeze(0)

In [33]:
new_torch_df.shape

torch.Size([1, 3, 2])

In [36]:
## if we want to remove 1 dimension, we can use squeeze() 
#  
new_torch_df_squeezed = new_torch_df.squeeze(0)

In [38]:
new_torch_df_squeezed.shape

torch.Size([3, 2])

### Reconstructing
Flat the data first and then reshape

In [43]:
torch_df

tensor([[ 1.6000,  7.2200],
        [13.0900, 32.4100],
        [ 6.9700, 17.4700]], dtype=torch.float64)

In [44]:
torch_df.reshape(2,3)

tensor([[ 1.6000,  7.2200, 13.0900],
        [32.4100,  6.9700, 17.4700]], dtype=torch.float64)

### Combining 


In [48]:
torh_a = torch.tensor([[1,2,3],[4,5,6]])
torh_b = torch.tensor([[7,8,9],[10,11,12]])

# dim =0 means we concatenate along the rows (vertically)
torch_cat = torch.cat((torh_a, torh_b), dim=0)
torch_cat
#dim=1 means we concatenate along the columns (horizontally)

torch_cat_dim1 = torch.cat((torh_a, torh_b), dim=1)
torch_cat_dim1


tensor([[ 1,  2,  3,  7,  8,  9],
        [ 4,  5,  6, 10, 11, 12]])

### Indexing and slicing 

In [60]:
x = torch.tensor([[1,2,3,4],[5,6,7,8],[9,10,11,12]])

print (f'original tensor:\n {x}')

single_element = x[1,2]
print (f'single element: {single_element}')

row_0 = x[0, :]
print (f'row 0: {row_0}')

col_1 = x[:, 1]
print (f'col 1: {col_1}')

sub_tensor = x[0:2, 1:3]
print (f'sub tensor: \n {sub_tensor}')

every_other_element = x[:, ::2]
print (f'every other element: \n {every_other_element}')
every_third_element = x[:, ::3]
print (f'every third element: \n {every_third_element}')

original tensor:
 tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])
single element: 7
row 0: tensor([1, 2, 3, 4])
col 1: tensor([ 2,  6, 10])
sub tensor: 
 tensor([[2, 3],
        [6, 7]])
every other element: 
 tensor([[ 1,  3],
        [ 5,  7],
        [ 9, 11]])
every third element: 
 tensor([[ 1,  4],
        [ 5,  8],
        [ 9, 12]])


In [61]:
single_element.dtype

torch.int64

In [64]:
#extract the scalar value from the tensor
scalar_value = single_element.item()
scalar_value

7

### Advanced Masking

In [65]:
x

tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])

In [66]:
x>5

tensor([[False, False, False, False],
        [False,  True,  True,  True],
        [ True,  True,  True,  True]])

In [70]:
x[x >= 5].reshape(2,4)

tensor([[ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])

In [75]:
# dot product of two tensors
a = torch.tensor([1,2,3])
b = torch.tensor([4,5,6])
c = torch.dot(a,b)

# option2 

d = torch.matmul(a,b)


In [76]:
c == d

tensor(True)

### Exercise

In [80]:
list1 = [
    [100,120,130,110],
    [90,95,105,125],
    [140,115,120,150]
]
num_array = np.array(list1)
df_input = pd.DataFrame(num_array, 
                        columns=['Jan','Feb','Mar','April'],
                        index=['prodA','prodB','prodC']
                        )
df_input

Unnamed: 0,Jan,Feb,Mar,April
prodA,100,120,130,110
prodB,90,95,105,125
prodC,140,115,120,150


In [83]:
torch_tensor = torch.from_numpy(num_array)
torch_tensor

tensor([[100, 120, 130, 110],
        [ 90,  95, 105, 125],
        [140, 115, 120, 150]])

In [84]:
torch_tensor[1,:].sum()

tensor(415)

In [86]:
torch_tensor[2,:][torch_tensor[2,:] > 130]

tensor([140, 150])

In [90]:
torch_tensor[:,[1,2]]

tensor([[120, 130],
        [ 95, 105],
        [115, 120]])