### P041 数据指标计算 - MAE（mean absolute error）

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("p041-predictions.csv")
df.head(5)

Unnamed: 0,y_true,y_pred
0,109.934283,113.175123
1,97.234714,93.383891
2,112.953771,106.184551
3,130.460597,136.57736
4,95.316933,105.626928


In [3]:
def mean_absolute_error(y_true, y_pred):
    return abs(y_true-y_pred).sum() / len(y_true)

In [4]:
mae = mean_absolute_error(df["y_true"], df["y_pred"])
mae

6.791794588005249

### P042 数据指标计算 - MSE（mean squared error）

In [5]:
import numpy as np
import pandas as pd

In [6]:
df = pd.read_csv("p042-predictions.csv")
df.head(10)

Unnamed: 0,y_true,y_pred
0,109.934283,113.175123
1,97.234714,93.383891
2,112.953771,106.184551
3,130.460597,136.57736
4,95.316933,105.626928
5,95.317261,104.630062
6,131.584256,123.192081
7,115.348695,112.256571
8,90.610512,93.923147
9,110.851201,120.606652


In [7]:
def mean_squared_error(y_true, y_pred):
    return ((y_true - y_pred) ** 2).sum() / len(y_true)

In [8]:
mse = mean_squared_error(df["y_true"], df["y_pred"])

In [9]:
mse

74.9471459408194

### P043 数据指标计算 - Sigmoid函数

In [10]:
import numpy as np
import pandas as pd

In [11]:
df = pd.DataFrame(
    data = np.random.randn(10),
    columns = ["var1"]
)
df

Unnamed: 0,var1
0,0.147392
1,-1.589552
2,1.017404
3,-0.683986
4,-0.01637
5,-0.174412
6,3.400084
7,0.110171
8,-0.128879
9,-0.121582


In [12]:
def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [13]:
sigmoid(np.array([1,2,3]))

array([0.73105858, 0.88079708, 0.95257413])

In [14]:
df["var1_sigmoid"] = df["var1"].map(sigmoid)

In [15]:
df

Unnamed: 0,var1,var1_sigmoid
0,0.147392,0.536781
1,-1.589552,0.169447
2,1.017404,0.734467
3,-0.683986,0.335372
4,-0.01637,0.495908
5,-0.174412,0.456507
6,3.400084,0.967707
7,0.110171,0.527515
8,-0.128879,0.467825
9,-0.121582,0.469642


### P044 数据指标计算 - entropy函数

In [16]:
import numpy as np
import pandas as pd

In [17]:
df = pd.DataFrame(
    {
        "val_1" : np.arange(0.01, 1, 0.1),
        "val_2" : 1 - np.arange(0.01, 1, 0.1),
    }
)
df

Unnamed: 0,val_1,val_2
0,0.01,0.99
1,0.11,0.89
2,0.21,0.79
3,0.31,0.69
4,0.41,0.59
5,0.51,0.49
6,0.61,0.39
7,0.71,0.29
8,0.81,0.19
9,0.91,0.09


In [18]:
def entropy(x):
    return -np.sum(x*np.log2(x))

In [19]:
df["entropy"] = df.apply(
    lambda x : entropy([x["val_1"], x["val_2"]]),
    axis=1
)

In [20]:
df

Unnamed: 0,val_1,val_2,entropy
0,0.01,0.99,0.080793
1,0.11,0.89,0.499916
2,0.21,0.79,0.741483
3,0.31,0.69,0.893173
4,0.41,0.59,0.9765
5,0.51,0.49,0.999711
6,0.61,0.39,0.9648
7,0.71,0.29,0.868721
8,0.81,0.19,0.701471
9,0.91,0.09,0.43647


### P045 数据指标计算 - accuracy_score准确率

In [21]:
import numpy as np
import pandas as pd

In [22]:
from sklearn.metrics import accuracy_score

In [23]:
df = pd.read_csv("./p045-predictions.csv")

In [24]:
df.head(10)

Unnamed: 0,y_true,y_pred
0,1,0
1,0,0
2,1,1
3,2,2
4,1,1
5,0,0
6,1,1
7,1,0
8,0,0
9,1,1


In [25]:
accuracy = accuracy_score(df["y_true"], df["y_pred"])

In [26]:
accuracy

0.7241379310344828

### P046 数据指标计算 - confusion_matrix混淆矩阵

In [27]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix

In [28]:
df = pd.read_csv("./p046-predictions.txt")

In [29]:
df.head()

Unnamed: 0,y_true,y_pred
0,1,0
1,0,0
2,1,1
3,2,2
4,1,1


In [30]:
cm = confusion_matrix(df["y_true"], df["y_pred"])

In [31]:
cm

array([[ 6,  1,  0],
       [ 3, 10,  2],
       [ 0,  2,  5]], dtype=int64)