# How read npz file and convert to numpy array

In [1]:
import numpy as np

inputfile = './20210906_dataset/testSet.npz'
#inputfile = './20210906_dataset/trainSet.npz'

npzfile = np.load(inputfile, allow_pickle=True)

#display all variable in npz
npzfile.files

['x', 'y', 'location', 'weeks']

## 1. x_set

In [2]:
x_set = npzfile['x']
x_set.shape

(4000, 98)

In [3]:
x_set

array([[0.35305171, 0.36305948, 0.38363946, ..., 0.25901657, 0.282713  ,
        0.28998319],
       [0.37794011, 0.392543  , 0.47613544, ..., 0.51391437, 0.46120672,
        0.41333216],
       [0.21129252, 0.23120572, 0.24029174, ..., 0.22886625, 0.21125735,
        0.21130195],
       ...,
       [0.6714039 , 0.68167707, 0.73334926, ..., 0.54456205, 0.53718712,
        0.53702632],
       [0.18861976, 0.19455567, 0.20285442, ..., 0.58536707, 0.57077086,
        0.55353971],
       [0.31341889, 0.26179435, 0.21742928, ..., 0.27786333, 0.28508746,
        0.29354012]])

## 2. y_set 
 - 0 :Other
 - 1 :Sugarcane
 - 2 :Rice
 

In [4]:
y_set = npzfile['y']
y_set.shape

(4000,)

In [5]:
y_set

array([1, 1, 1, ..., 0, 0, 0])

## 3. Location
list of (lat, lon)

In [6]:
location = npzfile['location']
location.shape

(4000, 2)

In [7]:
location

array([[16.4549737, 99.8181237],
       [16.4444173, 99.7224679],
       [16.4732159, 99.8221055],
       ...,
       [16.5121079, 99.8216425],
       [16.5502591, 99.8490521],
       [16.4999773, 99.8505337]])

## 4. weeks
timestamp index for ndvi vaules, list of [year]-[week_of_year] string format 


In [8]:
weeks_of_year = npzfile['weeks']
weeks_of_year.shape

(98,)

In [9]:
weeks_of_year

array(['2019-18', '2019-19', '2019-20', '2019-21', '2019-22', '2019-23',
       '2019-24', '2019-25', '2019-26', '2019-27', '2019-28', '2019-29',
       '2019-30', '2019-31', '2019-32', '2019-33', '2019-34', '2019-35',
       '2019-36', '2019-37', '2019-38', '2019-39', '2019-40', '2019-41',
       '2019-42', '2019-43', '2019-44', '2019-45', '2019-46', '2019-47',
       '2019-48', '2019-49', '2019-50', '2019-51', '2019-52', '2020-01',
       '2020-02', '2020-03', '2020-04', '2020-05', '2020-06', '2020-07',
       '2020-08', '2020-09', '2020-10', '2020-11', '2020-12', '2020-13',
       '2020-14', '2020-15', '2020-16', '2020-17', '2020-18', '2020-19',
       '2020-20', '2020-21', '2020-22', '2020-23', '2020-24', '2020-25',
       '2020-26', '2020-27', '2020-28', '2020-29', '2020-30', '2020-31',
       '2020-32', '2020-33', '2020-34', '2020-35', '2020-36', '2020-37',
       '2020-38', '2020-39', '2020-40', '2020-41', '2020-42', '2020-43',
       '2020-44', '2020-45', '2020-46', '2020-47', 

# How to reshape x_set
## 1. Original x_set from npz 
The data is stored as dimension structure of (n_sample, ndvi_values)

In [10]:
x_set.shape

(4000, 98)

which means the data contains 4000 rows of 98 weeks ndvi data

In [11]:
x_set

array([[0.35305171, 0.36305948, 0.38363946, ..., 0.25901657, 0.282713  ,
        0.28998319],
       [0.37794011, 0.392543  , 0.47613544, ..., 0.51391437, 0.46120672,
        0.41333216],
       [0.21129252, 0.23120572, 0.24029174, ..., 0.22886625, 0.21125735,
        0.21130195],
       ...,
       [0.6714039 , 0.68167707, 0.73334926, ..., 0.54456205, 0.53718712,
        0.53702632],
       [0.18861976, 0.19455567, 0.20285442, ..., 0.58536707, 0.57077086,
        0.55353971],
       [0.31341889, 0.26179435, 0.21742928, ..., 0.27786333, 0.28508746,
        0.29354012]])

## 2. Reshape to n_sample, timestamp, ndvi


In [12]:
option1 = x_set.reshape(4000, 98, 1)

print("shape:",option1.shape)

option1

shape: (4000, 98, 1)


array([[[0.35305171],
        [0.36305948],
        [0.38363946],
        ...,
        [0.25901657],
        [0.282713  ],
        [0.28998319]],

       [[0.37794011],
        [0.392543  ],
        [0.47613544],
        ...,
        [0.51391437],
        [0.46120672],
        [0.41333216]],

       [[0.21129252],
        [0.23120572],
        [0.24029174],
        ...,
        [0.22886625],
        [0.21125735],
        [0.21130195]],

       ...,

       [[0.6714039 ],
        [0.68167707],
        [0.73334926],
        ...,
        [0.54456205],
        [0.53718712],
        [0.53702632]],

       [[0.18861976],
        [0.19455567],
        [0.20285442],
        ...,
        [0.58536707],
        [0.57077086],
        [0.55353971]],

       [[0.31341889],
        [0.26179435],
        [0.21742928],
        ...,
        [0.27786333],
        [0.28508746],
        [0.29354012]]])

## 3. Reshape to n_sample, ndvi, time

In [13]:
option2 = x_set.reshape(4000, 1, 98)

print("shape:",option2.shape)

option2

shape: (4000, 1, 98)


array([[[0.35305171, 0.36305948, 0.38363946, ..., 0.25901657,
         0.282713  , 0.28998319]],

       [[0.37794011, 0.392543  , 0.47613544, ..., 0.51391437,
         0.46120672, 0.41333216]],

       [[0.21129252, 0.23120572, 0.24029174, ..., 0.22886625,
         0.21125735, 0.21130195]],

       ...,

       [[0.6714039 , 0.68167707, 0.73334926, ..., 0.54456205,
         0.53718712, 0.53702632]],

       [[0.18861976, 0.19455567, 0.20285442, ..., 0.58536707,
         0.57077086, 0.55353971]],

       [[0.31341889, 0.26179435, 0.21742928, ..., 0.27786333,
         0.28508746, 0.29354012]]])