# pandas中DataFrame单个数据提取效率与修改效率

参考文献：
https://blog.csdn.net/wxxwxx11/article/details/105885562
    

In [1]:
import numpy as np
import pandas as pd

from copy import deepcopy
from time import time

np.random.seed(20000)
I = 900
df = pd.DataFrame(np.random.standard_normal((I, I)),
				  columns=['c'+str(_) for _ in range(I)],
                  index=['i'+str(_) for _ in range(I)])


In [2]:
columns_num = np.floor(np.random.uniform(0, 1, I) * I).astype(int)
index_num = np.floor(np.random.uniform(0, 1, I) * I).astype(int)

columns_str = ['c'+str(_) for _ in columns_num]
index_str = ['i'+str(_) for _ in index_num]


## 读取比较省时的两种方法

1. 先columns列名后在values中取行坐标
2. 先取二维数组来再定位




In [3]:
t0 = time()
for m in columns_str:
    for n in index_num:
        c = df[m].values[n]
print(time()-t0)



3.003784656524658


In [4]:
t0 = time()
b = df.values
for m in columns_num:
    for n in index_num:
        c = b[n][m]
print(time()-t0)




0.5721554756164551


In [5]:
# 传统方法

t0 = time()
for m in columns_str:
    for n in index_str:
        c = df[m][n]
print(time()-t0)




5.127807855606079


# 修改比较省时的两种方法

1. 取二维数组来再定位
2. 先columns列名后在values中取行坐标

In [6]:
df_backup = deepcopy(df)
t0 = time()
b = df.values
for m in columns_num:
    for n in index_num:
        c = b[n][m]
print(time()-t0)




0.5822360515594482


In [7]:
df_backup = deepcopy(df)
t0 = time()
for m in columns_str:
    for n in index_num:
        df_backup[m].values[n] = 0.0
print(time()-t0)




2.8330254554748535


In [8]:
df_backup = deepcopy(df)
t0 = time()
for m in columns_num:
    for n in index_num:
        df.values[n][m] = 0.0
print(time()-t0)




4.724409341812134
