#### 文件读写及相关操作  
CSV文件时一种文本文件，可以使用文本编辑器进行编辑，同时支持追加模式，节省内存的开销。

1.使用read_csv函数读取csv文件  
read_csv()函数的作用就是将csv文件中的数据读取出来，并转为DataFrame对象。  
pd.read_csv(filepath_or_buffer,sep,header)  
filepath_or_buffer:文件路径  
sep：文件分割符  
header：指定行数的列名，如果读取的文件没有列名，则默认为0，否则可以设置为None。
names：用于结果的列名列表。如果文件不包含标题行需要设置为None  
index_col:用作行索引的列标签，如果时给定一个序列，则表示有多个索引行

In [1]:
import pandas as pd
import numpy as np

In [2]:
# 使用read_csv读取
df = pd.read_csv('数据源/股票数据.csv') # 相对路径
df

Unnamed: 0.1,Unnamed: 0,股票0,股票1,股票2,股票3,股票4
0,2022-05-08,0.72,-1.33,4.56,0.48,-6.17
1,2022-05-09,-0.03,1.31,-1.35,0.23,-1.27
2,2022-05-10,1.54,2.92,2.88,-0.95,4.78
3,2022-05-11,-4.45,-0.41,0.37,4.62,3.55


In [3]:
pd1 = pd.read_table('数据源/股票数据.csv', sep = ',') #制表符时/t
pd1

Unnamed: 0.1,Unnamed: 0,股票0,股票1,股票2,股票3,股票4
0,2022-05-08,0.72,-1.33,4.56,0.48,-6.17
1,2022-05-09,-0.03,1.31,-1.35,0.23,-1.27
2,2022-05-10,1.54,2.92,2.88,-0.95,4.78
3,2022-05-11,-4.45,-0.41,0.37,4.62,3.55


In [4]:
# header-None 不用数据填充列名
pd.read_csv('数据源/exa1.csv',header=None)

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,hello
1,5,6,7,8,python
2,9,10,11,12,sixstar


In [5]:
# 设置index_col，将某列设置为行索引
pd.read_csv('数据源/exa1.csv',names = list('ABCFE'),index_col = 'E')

Unnamed: 0_level_0,A,B,C,F
E,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hello,1,2,3,4
python,5,6,7,8
sixstar,9,10,11,12


In [6]:
pd.read_csv('数据源/mutiindex.csv',index_col = ['k1','k2'])

Unnamed: 0_level_0,Unnamed: 1_level_0,v1,v2
k1,k2,Unnamed: 2_level_1,Unnamed: 3_level_1
one,a,2,11
one,b,3,12
one,c,4,13
one,d,5,14
two,a,6,15
two,b,7,16
two,c,8,17
two,d,9,18


In [7]:
pd.read_table('数据源/exa2.txt',
              #使用正则表达式作为分隔符
              sep='\s+')

Unnamed: 0,A,B,C
a,-0.264438,-1.026059,-0.6195
b,0.927272,0.302904,-0.032399
c,-0.264273,-0.386314,-0.217601
d,-0.871858,-0.348382,1.100491


In [8]:
pd.read_table('数据源/exa2.txt',
              #使用正则表达式作为分隔符
              sep='\s+',skiprows = [0,1],header = None)

Unnamed: 0,0,1,2,3
0,b,0.927272,0.302904,-0.032399
1,c,-0.264273,-0.386314,-0.217601
2,d,-0.871858,-0.348382,1.100491


In [9]:
pd.read_table('数据源/exa2.txt',
              #使用正则表达式作为分隔符
              sep='\s+',skiprows = [1])

Unnamed: 0,A,B,C
b,0.927272,0.302904,-0.032399
c,-0.264273,-0.386314,-0.217601
d,-0.871858,-0.348382,1.100491


In [10]:
pd.read_csv('数据源/exa3.csv',encoding = 'gbk')

Unnamed: 0,Nickname,Age,Gender,Comment
0,小枫,18,1.0,A
1,托尼,19,1.0,B
2,云归,0,,C


In [11]:
# 将指定的数据替换成nan
pd.read_csv('数据源/exa3.csv',encoding = 'gbk',na_values = 0)

Unnamed: 0,Nickname,Age,Gender,Comment
0,小枫,18.0,1.0,A
1,托尼,19.0,1.0,B
2,云归,,,C


In [12]:
# 替换不同列中不同的数据为Nan
pd.read_csv('数据源/exa3.csv',encoding = 'gbk',na_values = {
    'Nickname':'托尼',
    'Comment':'C'
})

Unnamed: 0,Nickname,Age,Gender,Comment
0,小枫,18,1.0,A
1,,19,1.0,B
2,云归,0,,


#### 逐块读取文本文件

In [13]:
# 1.设置chunksize参数
data = pd.read_csv('数据源/exa4.csv',
                   # 将源数据分成若干份，每份100行数据
                   chunksize=100)
# data是一个可迭代数据，里面的数据只能读取一次，第二次运行将会没数据
for i in data:print(i)

    Unnamed: 0   股票0   股票1   股票2   股票3   股票4
0   2020-05-08  3.21  1.82  2.51 -3.61  4.03
1   2020-05-09  4.18 -3.25  1.34  0.58 -3.35
2   2020-05-10  4.52  3.95 -0.29 -5.73  1.81
3   2020-05-11  0.90  2.31 -0.74 -1.69 -3.10
4   2020-05-12  3.42  5.05 -3.42  2.35 -2.12
..         ...   ...   ...   ...   ...   ...
95  2020-08-11  2.17  4.39  2.96 -1.07  1.46
96  2020-08-12  0.64 -1.52  2.97  0.56 -0.47
97  2020-08-13 -3.68 -1.91 -1.88  1.05 -4.18
98  2020-08-14  3.59 -2.19 -0.20 -2.43 -1.41
99  2020-08-15  0.70  5.56 -6.17 -5.64 -1.54

[100 rows x 6 columns]
     Unnamed: 0   股票0   股票1   股票2   股票3   股票4
100  2020-08-16  4.81 -4.33  1.66  0.44 -2.93
101  2020-08-17 -1.55 -5.51  2.07  0.21  2.02
102  2020-08-18 -3.02  0.67  1.06 -4.91  2.42
103  2020-08-19  3.65 -5.51 -1.11 -0.59  2.24
104  2020-08-20  0.48 -1.30  0.45 -3.67 -3.01
..          ...   ...   ...   ...   ...   ...
195  2020-11-19 -0.55  3.63 -0.29 -3.84  3.72
196  2020-11-20 -2.84  0.57  1.03  0.10  2.31
197  2020-11-21 -1.55 

      Unnamed: 0   股票0   股票1   股票2   股票3   股票4
3600  2030-03-17 -0.69 -5.00  4.60  2.02 -5.22
3601  2030-03-18 -4.22  0.05 -0.14  1.75  5.60
3602  2030-03-19 -0.49  5.19 -5.12 -1.08  1.25
3603  2030-03-20  2.84 -2.44  0.63 -1.12  3.17
3604  2030-03-21 -0.96 -1.07  5.45  1.00  1.53
...          ...   ...   ...   ...   ...   ...
3695  2030-06-20 -1.61 -4.93  0.87  3.32  1.56
3696  2030-06-21 -1.08 -4.53  3.29 -1.96 -0.98
3697  2030-06-22 -1.16 -0.15 -5.69 -1.25 -3.44
3698  2030-06-23  2.83  0.18  0.05  1.06 -2.95
3699  2030-06-24 -5.67 -4.41  3.76 -3.73 -3.30

[100 rows x 6 columns]
      Unnamed: 0    股票0   股票1   股票2   股票3   股票4
3700  2030-06-25   2.46  3.30  1.27  1.41 -5.15
3701  2030-06-26   4.98  1.54  0.03 -1.46  0.03
3702  2030-06-27   4.51 -0.67 -3.13 -5.88  4.27
3703  2030-06-28  10.49 -0.98 -0.06 -1.10 -1.65
3704  2030-06-29  -1.33  0.26  1.14 -2.56 -0.32
...          ...    ...   ...   ...   ...   ...
3795  2030-09-28  -2.04 -2.63 -2.50 -5.82 -3.77
3796  2030-09-29   0.44  0.4

      Unnamed: 0   股票0   股票1   股票2   股票3   股票4
7000  2039-07-08  1.42 -1.01 -0.09  2.90 -2.52
7001  2039-07-09  0.18 -3.17 -0.01  0.68  0.96
7002  2039-07-10  1.65  3.34 -0.62 -1.01  2.93
7003  2039-07-11  0.88 -4.22 -0.06 -3.39 -2.75
7004  2039-07-12  0.35 -0.14  4.80  0.15 -5.04
...          ...   ...   ...   ...   ...   ...
7095  2039-10-11 -2.46  1.12 -5.44 -0.01  4.41
7096  2039-10-12  2.61 -1.75 -4.66  0.88 -4.20
7097  2039-10-13  0.88  5.18 -0.27  0.13 -0.68
7098  2039-10-14  1.72  2.85  2.13 -1.43  0.25
7099  2039-10-15 -2.64 -2.62  1.80 -0.71  5.61

[100 rows x 6 columns]
      Unnamed: 0   股票0   股票1   股票2   股票3   股票4
7100  2039-10-16  4.06 -1.11  5.12 -3.66 -0.17
7101  2039-10-17 -2.60  2.81 -5.63  0.51  1.55
7102  2039-10-18 -0.16  8.26 -0.11 -1.95 -5.33
7103  2039-10-19  5.93 -0.99  3.46 -1.23 -2.46
7104  2039-10-20  0.52 -5.48 -3.08  1.49 -0.08
...          ...   ...   ...   ...   ...   ...
7195  2040-01-19 -1.56 -0.39  1.46  0.18 -0.58
7196  2040-01-20  2.77  3.11 -0.39 -

In [14]:
pd.read_csv('数据源/exa4.csv',
            # 将第0列设置为索引
            index_col=0,
           # 读取一部分数据
            nrows = 100
           )

Unnamed: 0,股票0,股票1,股票2,股票3,股票4
2020-05-08,3.21,1.82,2.51,-3.61,4.03
2020-05-09,4.18,-3.25,1.34,0.58,-3.35
2020-05-10,4.52,3.95,-0.29,-5.73,1.81
2020-05-11,0.90,2.31,-0.74,-1.69,-3.10
2020-05-12,3.42,5.05,-3.42,2.35,-2.12
...,...,...,...,...,...
2020-08-11,2.17,4.39,2.96,-1.07,1.46
2020-08-12,0.64,-1.52,2.97,0.56,-0.47
2020-08-13,-3.68,-1.91,-1.88,1.05,-4.18
2020-08-14,3.59,-2.19,-0.20,-2.43,-1.41


In [15]:
data1 = pd.read_csv('数据源/exa4.csv',
            # 将第0列设置为索引
            index_col=0)

In [16]:
# 默认读取5行
data1.head(10)

Unnamed: 0,股票0,股票1,股票2,股票3,股票4
2020-05-08,3.21,1.82,2.51,-3.61,4.03
2020-05-09,4.18,-3.25,1.34,0.58,-3.35
2020-05-10,4.52,3.95,-0.29,-5.73,1.81
2020-05-11,0.9,2.31,-0.74,-1.69,-3.1
2020-05-12,3.42,5.05,-3.42,2.35,-2.12
2020-05-13,5.1,-3.86,6.3,-6.05,-0.9
2020-05-14,1.69,-6.45,-0.64,2.81,0.12
2020-05-15,-0.4,4.79,3.5,-0.26,1.88
2020-05-16,-3.52,2.63,1.34,-1.29,3.91
2020-05-17,-2.93,3.39,5.9,1.39,2.42


三种读取方式的区别：  
* chunksize：将整个数据根据参数的值进行划分，分成 文件总行数/chunksize 个数据，同时这些数据会被存储到一个可迭代对象中，如果希望查看到中间的数据可以使用循环遍历，不过该对象只能遍历一次。  
* norws：返回指定行数的数据，如果希望读取更多的行需要重新修改参数并运行读取代码。
* head()函数：文件对象head()，默认读取文件前5行，如果希望读取其他的数据只需要修改里面的参数即可，不需要重新读取文件。

In [17]:
# tail读取尾部的数据，从末尾开始读取，参数和head一样
data1.tail()

Unnamed: 0,股票0,股票1,股票2,股票3,股票4
2047-09-19,-1.86,0.55,-1.02,-0.73,3.18
2047-09-20,0.02,2.72,0.15,2.85,-1.32
2047-09-21,1.19,-2.27,-2.64,2.7,-1.86
2047-09-22,1.5,-3.39,1.93,-2.34,2.33
2047-09-23,-4.22,-0.97,1.17,4.06,-2.19


In [18]:
data1

Unnamed: 0,股票0,股票1,股票2,股票3,股票4
2020-05-08,3.21,1.82,2.51,-3.61,4.03
2020-05-09,4.18,-3.25,1.34,0.58,-3.35
2020-05-10,4.52,3.95,-0.29,-5.73,1.81
2020-05-11,0.90,2.31,-0.74,-1.69,-3.10
2020-05-12,3.42,5.05,-3.42,2.35,-2.12
...,...,...,...,...,...
2047-09-19,-1.86,0.55,-1.02,-0.73,3.18
2047-09-20,0.02,2.72,0.15,2.85,-1.32
2047-09-21,1.19,-2.27,-2.64,2.70,-1.86
2047-09-22,1.50,-3.39,1.93,-2.34,2.33


In [19]:
data1.describe()

Unnamed: 0,股票0,股票1,股票2,股票3,股票4
count,10000.0,10000.0,10000.0,10000.0,10000.0
mean,-0.008562,-0.029201,0.014023,-0.046922,0.078394
std,2.99099,3.004099,3.000648,2.982153,3.006461
min,-10.97,-11.28,-10.46,-12.21,-11.91
25%,-1.97,-2.05,-2.0,-2.03,-1.94
50%,-0.04,-0.04,0.01,-0.01,0.08
75%,1.96,2.03,2.02,1.91,2.09
max,11.18,11.82,10.79,10.83,11.15


In [20]:
data1.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10000 entries, 2020-05-08 to 2047-09-23
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   股票0     10000 non-null  float64
 1   股票1     10000 non-null  float64
 2   股票2     10000 non-null  float64
 3   股票3     10000 non-null  float64
 4   股票4     10000 non-null  float64
dtypes: float64(5)
memory usage: 468.8+ KB


#### 写出数据

In [21]:
col = [f'股票{i}' for i in range(100)]
col

['股票0',
 '股票1',
 '股票2',
 '股票3',
 '股票4',
 '股票5',
 '股票6',
 '股票7',
 '股票8',
 '股票9',
 '股票10',
 '股票11',
 '股票12',
 '股票13',
 '股票14',
 '股票15',
 '股票16',
 '股票17',
 '股票18',
 '股票19',
 '股票20',
 '股票21',
 '股票22',
 '股票23',
 '股票24',
 '股票25',
 '股票26',
 '股票27',
 '股票28',
 '股票29',
 '股票30',
 '股票31',
 '股票32',
 '股票33',
 '股票34',
 '股票35',
 '股票36',
 '股票37',
 '股票38',
 '股票39',
 '股票40',
 '股票41',
 '股票42',
 '股票43',
 '股票44',
 '股票45',
 '股票46',
 '股票47',
 '股票48',
 '股票49',
 '股票50',
 '股票51',
 '股票52',
 '股票53',
 '股票54',
 '股票55',
 '股票56',
 '股票57',
 '股票58',
 '股票59',
 '股票60',
 '股票61',
 '股票62',
 '股票63',
 '股票64',
 '股票65',
 '股票66',
 '股票67',
 '股票68',
 '股票69',
 '股票70',
 '股票71',
 '股票72',
 '股票73',
 '股票74',
 '股票75',
 '股票76',
 '股票77',
 '股票78',
 '股票79',
 '股票80',
 '股票81',
 '股票82',
 '股票83',
 '股票84',
 '股票85',
 '股票86',
 '股票87',
 '股票88',
 '股票89',
 '股票90',
 '股票91',
 '股票92',
 '股票93',
 '股票94',
 '股票95',
 '股票96',
 '股票97',
 '股票98',
 '股票99']

In [22]:
index_ = pd.date_range('2023-02-17',periods=100,freq='D')# D表示天，M表示月
index_

DatetimeIndex(['2023-02-17', '2023-02-18', '2023-02-19', '2023-02-20',
               '2023-02-21', '2023-02-22', '2023-02-23', '2023-02-24',
               '2023-02-25', '2023-02-26', '2023-02-27', '2023-02-28',
               '2023-03-01', '2023-03-02', '2023-03-03', '2023-03-04',
               '2023-03-05', '2023-03-06', '2023-03-07', '2023-03-08',
               '2023-03-09', '2023-03-10', '2023-03-11', '2023-03-12',
               '2023-03-13', '2023-03-14', '2023-03-15', '2023-03-16',
               '2023-03-17', '2023-03-18', '2023-03-19', '2023-03-20',
               '2023-03-21', '2023-03-22', '2023-03-23', '2023-03-24',
               '2023-03-25', '2023-03-26', '2023-03-27', '2023-03-28',
               '2023-03-29', '2023-03-30', '2023-03-31', '2023-04-01',
               '2023-04-02', '2023-04-03', '2023-04-04', '2023-04-05',
               '2023-04-06', '2023-04-07', '2023-04-08', '2023-04-09',
               '2023-04-10', '2023-04-11', '2023-04-12', '2023-04-13',
      

In [23]:
# 生成标准正态分布的数据
data = np.random.normal(0,3,
                       #生成100行100列
                       (100,100))
df2 = pd.DataFrame(data,columns=col,index = index_)
df2

Unnamed: 0,股票0,股票1,股票2,股票3,股票4,股票5,股票6,股票7,股票8,股票9,...,股票90,股票91,股票92,股票93,股票94,股票95,股票96,股票97,股票98,股票99
2023-02-17,1.518998,0.365074,-4.611402,-0.696404,-2.219209,-6.663645,5.283073,3.430802,-1.350827,-3.192552,...,2.747531,-4.007901,4.440820,2.694782,-2.404305,-3.100652,-0.553856,1.652679,2.191796,0.433682
2023-02-18,-1.145414,-2.354493,0.374685,2.496891,-1.021139,-3.349666,0.490702,-4.976541,2.729110,-1.768839,...,-3.815459,-0.938177,-2.450568,1.825331,-1.048261,0.155184,-1.156605,-1.567912,-6.782842,-1.216917
2023-02-19,-2.470368,4.430159,2.919608,-1.403015,-1.643840,2.140703,3.483018,-0.019804,3.407428,4.371728,...,-8.256410,4.653255,5.371212,1.050953,-1.817624,4.743034,-2.695272,0.985878,-1.784085,-1.157450
2023-02-20,6.961655,-3.642941,1.667009,-1.138455,4.779185,3.935247,0.352943,-2.174916,1.067405,-0.211664,...,4.557491,-0.767505,2.261990,2.114341,0.289891,1.097054,2.736770,-0.707855,3.154312,5.655886
2023-02-21,-2.858928,-0.830418,-0.142636,-1.777048,1.324440,1.686472,-0.338487,-3.831907,2.260828,2.555167,...,-2.152983,-4.020507,-2.817496,-4.022596,-6.613392,-3.185146,-9.938719,-0.371223,-0.228370,-3.389070
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-23,2.092578,-0.302538,-6.640490,2.448849,-1.391546,0.089770,3.456350,-1.138910,-6.701209,0.473133,...,-2.646103,-2.522469,-0.785482,-4.564520,-3.151260,1.175160,-4.170896,1.571896,0.805509,1.192405
2023-05-24,1.650129,2.895536,3.229539,-0.235904,-3.733318,-0.159830,4.566899,-4.702081,-0.586211,1.179106,...,1.421220,-0.541820,6.254790,3.543349,-0.850872,6.756017,3.483396,-1.795470,-0.718169,2.567537
2023-05-25,-0.933130,-1.070997,-4.074611,-5.155032,1.189500,-1.309289,2.838901,-0.520088,1.794699,-2.598997,...,0.394901,1.197371,-2.445775,-3.417380,-1.053386,-0.922363,-3.724285,1.353790,-0.948572,-0.384783
2023-05-26,9.004934,-3.584600,5.009879,-0.846092,2.961768,-5.854809,0.128959,-1.283003,3.123669,-0.955383,...,-3.427225,-4.491836,1.741200,4.212605,2.682556,-0.183259,-0.246186,3.488343,-5.142760,-0.674030


#### 保存文件
 DataFrame.to_csv(path_or_buf,sep = ',',na_rep = '')  
 path_or_buf:文件保存路径
 sep:文件分隔符  
 na_rep:缺失数据填充  
 float_format:小数保留位数
 mode：数据读取模式，里面的参数和open打开文件时里面的参数一致  
 encoding：文件编码格式

In [24]:
# 运行完，没提示
df2.to_csv('data/股票.csv',encoding = 'gbk')

#### 其他格式文件读取与保存

* csv：是一种纯文本文件，体积相对而言会更小，更适合存放结构化数据 
* excel：里面包含很多格式信息之类的，并不是纯文本文件，体积相对大

**h5文件**  
    h5文件时HDF的第5代版本，是用于存储科学数据的一种文件格式。在内存占用、压缩、访问速度等方面都具有非常优秀的特性，在科学领域和工业领域可能用的多一些

In [25]:
# 保存为h5文件
# 指定文件路径，以及模式
hd = pd.HDFStore('data/stock.h5','w')
# 将df2中的数据存储到hd中的data列里面
hd['data'] = df2
# 关闭文件
hd.close()

有可能会出现以下报错： 未装模块
解决方法：
安装模块：pytables

In [26]:
# excel文件
df2.to_excel('data/股票.xlsx',
             #工作表名
             sheet_name = '股份百分比')

In [27]:
#保存为Json文件
df2.to_json('data/股票.json')

**pkl文件**
一种Python数据存储文件，里面可以用来存储一些Python项目过程中用到的变量或者字符串等待。如果直接打开有可能里面就是一些二进制数据。常用于保存神经网络训练出来的模型数据或者说各种需要存储的数据。

In [29]:
df2.to_pickle('data/股票.pkl')

#### 读取各种类型的文件

In [30]:
import time

In [31]:
# excel
time_excel_star = time.perf_counter()
exc = pd.read_excel('data/股票.xlsx',
                   #通过指定sheet_name参数去读取不同的表数据
                   sheet_name = '股份百分比')
time_excel_end = time.perf_counter()
print(f'execel文件读取时间:{time_excel_end-time_excel_star}')

execel文件读取时间:0.12021140000001651


In [32]:
# csv
time_csv_star = time.perf_counter()
exc = pd.read_csv('data/股票.csv',encoding = 'gbk')
time_csv_end = time.perf_counter()
print(f'文件读取时间:{time_csv_end-time_csv_star}')

文件读取时间:0.016228699999942364


In [33]:
# h5
time_h5_star = time.perf_counter()
h5 = pd.read_hdf('data/stock.h5',encoding = 'gbk')
time_h5_end = time.perf_counter()
print(f'文件读取时间:{time_h5_end-time_h5_star}')

文件读取时间:0.011642599999959202


In [34]:
h5

Unnamed: 0,股票0,股票1,股票2,股票3,股票4,股票5,股票6,股票7,股票8,股票9,...,股票90,股票91,股票92,股票93,股票94,股票95,股票96,股票97,股票98,股票99
2023-02-17,1.518998,0.365074,-4.611402,-0.696404,-2.219209,-6.663645,5.283073,3.430802,-1.350827,-3.192552,...,2.747531,-4.007901,4.440820,2.694782,-2.404305,-3.100652,-0.553856,1.652679,2.191796,0.433682
2023-02-18,-1.145414,-2.354493,0.374685,2.496891,-1.021139,-3.349666,0.490702,-4.976541,2.729110,-1.768839,...,-3.815459,-0.938177,-2.450568,1.825331,-1.048261,0.155184,-1.156605,-1.567912,-6.782842,-1.216917
2023-02-19,-2.470368,4.430159,2.919608,-1.403015,-1.643840,2.140703,3.483018,-0.019804,3.407428,4.371728,...,-8.256410,4.653255,5.371212,1.050953,-1.817624,4.743034,-2.695272,0.985878,-1.784085,-1.157450
2023-02-20,6.961655,-3.642941,1.667009,-1.138455,4.779185,3.935247,0.352943,-2.174916,1.067405,-0.211664,...,4.557491,-0.767505,2.261990,2.114341,0.289891,1.097054,2.736770,-0.707855,3.154312,5.655886
2023-02-21,-2.858928,-0.830418,-0.142636,-1.777048,1.324440,1.686472,-0.338487,-3.831907,2.260828,2.555167,...,-2.152983,-4.020507,-2.817496,-4.022596,-6.613392,-3.185146,-9.938719,-0.371223,-0.228370,-3.389070
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-23,2.092578,-0.302538,-6.640490,2.448849,-1.391546,0.089770,3.456350,-1.138910,-6.701209,0.473133,...,-2.646103,-2.522469,-0.785482,-4.564520,-3.151260,1.175160,-4.170896,1.571896,0.805509,1.192405
2023-05-24,1.650129,2.895536,3.229539,-0.235904,-3.733318,-0.159830,4.566899,-4.702081,-0.586211,1.179106,...,1.421220,-0.541820,6.254790,3.543349,-0.850872,6.756017,3.483396,-1.795470,-0.718169,2.567537
2023-05-25,-0.933130,-1.070997,-4.074611,-5.155032,1.189500,-1.309289,2.838901,-0.520088,1.794699,-2.598997,...,0.394901,1.197371,-2.445775,-3.417380,-1.053386,-0.922363,-3.724285,1.353790,-0.948572,-0.384783
2023-05-26,9.004934,-3.584600,5.009879,-0.846092,2.961768,-5.854809,0.128959,-1.283003,3.123669,-0.955383,...,-3.427225,-4.491836,1.741200,4.212605,2.682556,-0.183259,-0.246186,3.488343,-5.142760,-0.674030


In [36]:
# json
time_json_star = time.perf_counter()
jason = pd.read_json('data/股票.json')
time_json_end = time.perf_counter()
print(f'文件读取时间:{time_json_end-time_json_star}')

文件读取时间:0.059283399999912945


In [37]:
# pkl
time_pkl_star = time.perf_counter()
jason = pd.read_pickle('data/股票.pkl')
time_pkl_end = time.perf_counter()
print(f'文件读取时间:{time_pkl_end-time_pkl_star}')

文件读取时间:0.010579400000096939
