In [1]:
import numpy as np

In [2]:
#NumPy I/O - 
#np.save() 儲存單一陣列到 .npy 格式
#np.savez() 可以儲存多個陣列到.npz格式
#np.load()

In [3]:
array = np.array(range(10)).reshape(2,5)
array

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [4]:
#np.save(file, arr, allow_pickle=True, fix_imports=True)
np.save('test.npy',array)

In [5]:
array_read = np.load('test.npy')
array_read

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [6]:
with open('one_array.npy', 'wb') as f:
    np.save(f, np.array([1, 2]))

In [7]:
np.load('one_array.npy')

array([1, 2])

In [8]:
with open('test.npy', 'wb') as f:
    np.save(f, np.array([1, 2]))
    np.save(f, np.array([1, 3]))
    np.save(f, np.array([1, 4]))
    np.save(f, np.array([1, 3]))

In [9]:
#載入的時候每一次 numpy.load() 就載入一個陣列
with open('test.npy', 'rb') as f:
    a = np.load(f)
    b = np.load(f)
    c = np.load(f)
    d = np.load(f)

print(a, b, c, d)

[1 2] [1 3] [1 4] [1 3]


In [10]:
x = np.arange(10)
y = np.array([1, 2, 3])
z = np.random.rand(10)

with open('multi_array.npz', 'wb') as f:
    #儲存陣列時並指定陣列關鍵字 (array1, array2...)，
    #若未指定的話預設會以 arr_0, arr_1... 關鍵字設定。
    np.savez(f, array1=x, array2=y, array3=z)

In [11]:
npzfile = np.load('multi_array.npz')
type(npzfile)

numpy.lib.npyio.NpzFile

In [12]:
#回傳的 List
npzfile.files

['array1', 'array2', 'array3']

In [13]:
print(npzfile['array1'])
print(npzfile['array2'])
print(npzfile['array3'])

[0 1 2 3 4 5 6 7 8 9]
[1 2 3]
[0.25670646 0.92481425 0.44235386 0.61003224 0.03296771 0.77497883
 0.28761106 0.57799762 0.0200823  0.93581271]


In [14]:
#np.savetxt() 可將一維或是二維陣列儲存到文字檔
#np.savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='n', header='', footer='', comments='# ', encoding=None)
#newline 換行字元
#delimiter 分隔字元

In [15]:
x = np.arange(0.0,5.0,1.0)
x

array([0., 1., 2., 3., 4.])

In [16]:
y = np.arange(10).reshape(2, 5)
y

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [17]:
np.savetxt('test.out', [x], delimiter=',')#.out file 需用[]

In [18]:
# %load test.out
0.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+00,3.000000000000000000e+00,4.000000000000000000e+00


(0.0, 1.0, 2.0, 3.0, 4.0)

In [19]:
np.savetxt('test.gz', [x], delimiter=',')#.gz file 需用[]
np.savetxt('test.csv', y, delimiter=',')#.csv

In [20]:
np.savetxt('test.csv', y, delimiter=',')

In [21]:
# %load test.csv
0.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+00,3.000000000000000000e+00,4.000000000000000000e+00
5.000000000000000000e+00,6.000000000000000000e+00,7.000000000000000000e+00,8.000000000000000000e+00,9.000000000000000000e+00


(5.0, 6.0, 7.0, 8.0, 9.0)

In [22]:
#header and footer
np.savetxt('test.out', x, fmt='%1.4e', delimiter=',', header='this is,\nheader', footer='this is footer')

In [23]:
# %load test.out
# this is,
# header
0.0000e+00
1.0000e+00
2.0000e+00
3.0000e+00
4.0000e+00
# this is footer


4.0

In [24]:
#numpy.loadtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, 
#usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None)
np.loadtxt('test.out', delimiter=',', dtype='f4')#f4 == float32

array([0., 1., 2., 3., 4.], dtype=float32)

In [25]:
#numpy.genfromtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, skip_header=0, skip_footer=0,
#converters=None, missing_values=None, filling_values=None, usecols=None,
#names=None, excludelist=None, deletechars=" !#$%&'()*+, -./:;<=>?@[\]^{|}~",
#replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i',
#unpack=None, usemask=False, loose=True, 
#invalid_raise=True, max_rows=None, encoding='bytes')
np.genfromtxt("test.csv", delimiter=",")

array([[0., 1., 2., 3., 4.],
       [5., 6., 7., 8., 9.]])

In [26]:
np.genfromtxt("test.gz", delimiter=",")

array([0., 1., 2., 3., 4.])

In [27]:
np.genfromtxt(["1", "2", "abc", "4", "5"])

array([ 1.,  2., nan,  4.,  5.])

In [28]:
from io import StringIO

data = u"  1  2  3\n  4  5 67\n890123  4"
np.genfromtxt(StringIO(data), delimiter=3)#delimiter=3 === 每三個一組 （包含空格）

array([[  1.,   2.,   3.],
       [  4.,   5.,  67.],
       [890., 123.,   4.]])

In [29]:
data = u"123456789\n   4  7 9\n   4567 9"
np.genfromtxt(StringIO(data), delimiter=(4, 3, 2))#delimiter=(4,3,2) === 先4 再3 再2

array([[1234.,  567.,   89.],
       [   4.,    7.,    9.],
       [   4.,  567.,    9.]])

In [30]:
data = u"1, 2 , 4\n 4, 5, 6"
np.genfromtxt(StringIO(data), delimiter=",", autostrip=True)#autostrip = True 自動去除空白

array([[1., 2., 4.],
       [4., 5., 6.]])

In [31]:
np.genfromtxt("test.out", comments="#")

array([0., 1., 2., 3., 4.])

In [32]:
np.genfromtxt("test.out", comments=None, skip_footer=1, skip_header=2)#skip_footer = 1 跳過一行 skip_header=2 跳過2行

array([0., 1., 2., 3., 4.])

In [33]:
# %load test.out
# this is,
# header
0.0000e+00
1.0000e+00
2.0000e+00
3.0000e+00
4.0000e+00
# this is footer


4.0

In [34]:
# %load names.txt
a,b,c
1,2,3
4,5,6
7,8,9

(7, 8, 9)

In [35]:
np.genfromtxt("names.txt", delimiter=",", names=True)#names = True ===資料內有column names

array([(1., 2., 3.), (4., 5., 6.), (7., 8., 9.)],
      dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])

In [36]:
data = StringIO("1 2 3\n 4 5 6")
np.genfromtxt(data, names="a, b, c")#names='a,b,c' 指定column names

array([(1., 2., 3.), (4., 5., 6.)],
      dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])

In [37]:
a = u"1,2,3,4,5\n6,7,8,9,10"
np.genfromtxt(StringIO(a),delimiter=',',names='a,b,c,d,e')

array([(1., 2., 3., 4.,  5.), (6., 7., 8., 9., 10.)],
      dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8'), ('d', '<f8'), ('e', '<f8')])

In [38]:
a = u"1,2,3,4,5\n6,7,8,9,10"
np.genfromtxt(StringIO(a), delimiter=",", names="a, b, c", usecols=("a", "c"))#usecols=('a','b')僅顯示a,b columns

array([(1., 3.), (6., 8.)], dtype=[('a', '<f8'), ('c', '<f8')])

In [39]:
a = u"1 2 3 4 5\n6 7 8 9 10"
np.genfromtxt(StringIO(a), usecols=(1, -1))#若無names 可利用col index指定

array([[ 2.,  5.],
       [ 7., 10.]])

In [40]:
a = u"1 2 3 4 5\n6 7 8 9 10"
np.genfromtxt(StringIO(a), names="a, b, c", usecols=(1, -1))#若有names 再用usecols 會出錯

IndexError: list index out of range

In [41]:
a = StringIO("1 2 3\n 4 5 6")
np.genfromtxt(a, dtype=(int, float, int))#指定類型

array([(1, 2., 3), (4, 5., 6)],
      dtype=[('f0', '<i8'), ('f1', '<f8'), ('f2', '<i8')])

In [42]:
a = StringIO("1 2 3\n 4 5 6")
np.genfromtxt(a, dtype=(int, float, int), defaultfmt="var_%i")#要指定命名規則，可以使用 defaultfmt 引數

array([(1, 2., 3), (4, 5., 6)],
      dtype=[('var_0', '<i8'), ('var_1', '<f8'), ('var_2', '<i8')])

In [43]:
#缺值處理
a = u", 2, 3\n4, ,"
np.genfromtxt(StringIO(a), delimiter=",", filling_values=np.nan)#以nan 補缺

array([[nan,  2.,  3.],
       [ 4., nan, nan]])

In [44]:
a = u"N/A, 2, 3, ???\n 2, N/A, 3,4"
np.genfromtxt(StringIO(a), delimiter=",", 
              missing_values=["N/A", "N/A", "N/A", "???"], #指定每欄missing value規則
              filling_values=[1, -2, 0, -999])#指定每欄 補缺規則

array([[   1.,    2.,    3., -999.],
       [   2.,   -2.,    3.,    4.]])

In [45]:
a = u"1, , \n , 5, 6"
np.genfromtxt(StringIO(a), delimiter=',', dtype="int, float, str")#不同dtype 預設補缺 規則不同
#dtype	預設填值
#bool	False
#int	-1
#float	np.nan
#complex	np.nan+0j
#string		''

array([( 1, nan, ''), (-1,  5., '')],
      dtype=[('f0', '<i8'), ('f1', '<f8'), ('f2', '<U')])

In [46]:
#透過converters參數 轉換資料

In [47]:
np.genfromtxt("transform.txt", delimiter=',', dtype="i8, i8, U3, U3")

array([(1, 2, 'Yes', '87%'), (3, 4, 'No', '3%'), (5, 6, 'Yes', '55%')],
      dtype=[('f0', '<i8'), ('f1', '<i8'), ('f2', '<U3'), ('f3', '<U3')])

In [48]:
def trans(s):
    if s == b'Yes':
        return 1
    else:
        return 0

In [49]:
def conversion(x):
    return float(x.strip(b"%"))/100

In [50]:
np.genfromtxt("transform.txt", delimiter=',', converters={2:trans, 3:conversion})#index:轉換function

array([(1., 2., 1, 0.87), (3., 4., 0, 0.03), (5., 6., 1, 0.55)],
      dtype=[('f0', '<f8'), ('f1', '<f8'), ('f2', '<i8'), ('f3', '<f8')])

# HW

In [51]:
#將下兩列 array 存成 npz 檔 array1 = np.array(range(30))array2 = np.array([2,3,5])
#讀取剛剛的 npz 檔，加入下列 array 一起存成新的 npz 檔

In [52]:
import numpy as np

In [53]:
array1 = np.array(range(30))
array2 = np.array([2,3,5])

In [54]:
np.save('ar1.npy',array1)

In [55]:
np.save('ar2.npy',array2)

In [56]:
array1 = np.load('ar1.npy')
array2 = np.load('ar2.npy')

In [57]:
np.savez('hw.npz',array1,array2)

In [58]:
npzfile = np.load('hw.npz')
npzfile.files

['arr_0', 'arr_1']

In [59]:
npzfile['arr_0']

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [60]:
npzfile['arr_1']

array([2, 3, 5])

In [None]:
#. 將下兩列array存成npz檔
array1 = np.array(range(30))
array2 = np.array([2,3,5])
np.savez('array.npz',array1,array2)
#2. 讀取剛剛的npz檔，加入下列array一起存成新的npz檔
load_array = np.load('array.npz')
array3 = np.array([[4,5,6],[1,2,3]])
np.savez('new_array.npz',load_array['arr_0'],load_array['arr_1'],array3)