In [93]:
### 高铁乘客数量预测
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV

In [89]:
# 1.获取数据
data = pd.read_csv("./train.csv")
data.head()
# 2.数据处理
# 2.1选择时间特征
time = pd.to_datetime(data["datetime"])
time = pd.DatetimeIndex(time)
data["day"] = time.day
data["hour"] = time.hour
data["weekday"] = time.weekday
data.head()
# 2.2确定特征值和目标值
x = data[[ "hour"]]
y = data["cnt"]
# 2.3分割数据集
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=22)
# 3.特征工程--特征预处理(标准化)
# 3.1 实例化一个转换器
transfer = StandardScaler()
# 3.2 调用fit_transform
x_train = transfer.fit_transform(x_train)
x_test = transfer.fit_transform(x_test)

# estimator = SGDRegressor(max_iter=1000)
# estimator.fit(x_train, y_train)
# estimator = Ridge(alpha=1)
estimator = RidgeCV(alphas=(0.1, 1, 10))
estimator.fit(x_train, y_train)
# 4.机器学习--knn+cv
# 4.1 实例化一个估计器
estimator = KNeighborsClassifier()
# 4.2 调用gridsearchCV
param_grid = {"n_neighbors": [1, 3, 5, 7, 9]}
estimator = GridSearchCV(estimator, param_grid=param_grid, cv=5)
# 4.3 模型训练
estimator.fit(x_train, y_train)
# 5.模型评估
# 5.1 基本评估方式
score = estimator.score(x_test, y_test)
print("最后预测的准确率为:\n", score)

y_predict = estimator.predict(x_test)
print("最后的预测值为:\n", y_predict)
print("预测值和真实值的对比情况:\n", y_predict == y_test)

# 5.2 使用交叉验证后的评估方式
print("在交叉验证中验证的最好结果:\n", estimator.best_score_)
print("最好的参数模型:\n", estimator.best_estimator_)
print("每次交叉验证后的验证集准确率结果和训练集准确率结果:\n",estimator.cv_results_)

In [85]:
import wfdb
import pywt
import seaborn # seaborn与matlotlib同出一源，只是把matplotlib进行了封装，让许多方法调用时变得更加简便
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Dropout,Activation
from tensorflow.keras.layers import Conv1D,MaxPooling1D
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

In [87]:
### RNN实现心电图预测
# 小波去噪预处理
RATIO=0.3  # 验证集占比
def denoise(data):
    # 小波变换
    coeffs = pywt.wavedec(data=data, wavelet='db5', level=9)
    cA9, cD9, cD8, cD7, cD6, cD5, cD4, cD3, cD2, cD1 = coeffs
    # 阈值去噪
    threshold = (np.median(np.abs(cD1)) / 0.6745) * (np.sqrt(2 * np.log(len(cD1))))
    cD1.fill(0)
    cD2.fill(0)
    for i in range(1, len(coeffs) - 2):
        coeffs[i] = pywt.threshold(coeffs[i], threshold)
    # 小波反变换,获取去噪后的信号
    rdata = pywt.waverec(coeffs=coeffs, wavelet='db5')
    return rdata

# 读取心电数据和对应标签,并对数据进行小波去噪
def getDataSet(number, X_data, Y_data):
    ecgClassSet = ['N', 'A', 'V', 'L', 'R']
    
    # 读取心电数据记录
    # print("正在读取 " + number + " 号心电数据...")
    record = wfdb.rdrecord('./ecg_data/' + number, channel_names=['MLII'])
    data = record.p_signal.flatten()
    rdata = denoise(data=data)

    # 获取心电数据记录中R波的位置和对应的标签
    annotation = wfdb.rdann('ecg_data/' + number, 'atr')
    Rlocation = annotation.sample
    Rclass = annotation.symbol
    #  数据读取完成
    
    # 去掉前后的不稳定数据
    start = 10
    end = 5
    i = start
    j = len(annotation.symbol) - end

    # 因为只选择NAVLR五种心电类型,所以要选出该条记录中所需要的那些带有特定标签的数据,舍弃其余标签的点
    # X_data在R波前后截取长度为300的数据点
    # Y_data将NAVLR按顺序转换为01234
    while i < j:
        try:
            lable = ecgClassSet.index(Rclass[i])   
            x_train = rdata[Rlocation[i] - 99:Rlocation[i] + 201]
            X_data.append(x_train)
            Y_data.append(lable)
            i += 1
        except ValueError:
            i += 1
    return


# 加载数据集并进行预处理
def loadData():
    numberSet = ['100', '101', '103', '105', '106', '107', '108', '109', '111', '112', '113', '114', '115',
                 '116', '117', '119', '121', '122', '123', '124', '200', '201', '202', '203', '205', '208',
                 '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230',
                 '231', '232', '233', '234']
    dataSet = []
    lableSet = []
    for n in numberSet:
        getDataSet(n, dataSet, lableSet)
    # 转numpy数组,打乱顺序
    dataSet = np.array(dataSet).reshape(-1, 300)   # 转换成numpy数组
    lableSet = np.array(lableSet).reshape(-1, 1)
    train_ds = np.hstack((dataSet, lableSet))
    np.random.shuffle(train_ds)  # 打乱顺序，通过换行使得形式

#     # 数据集及其标签集
    X = train_ds[:, :300].reshape(-1, 300, 1)
    Y = train_ds[:, 300]

#     # 测试集及其标签集
    shuffle_index = np.random.permutation(len(X))
    test_length = int(RATIO * len(shuffle_index))
    test_index = shuffle_index[:test_length]
    train_index = shuffle_index[test_length:]
    X_test, Y_test = X[test_index], Y[test_index]
    X_train, Y_train = X[train_index], Y[train_index]
    X_train,X_test,Y_train,Y_test = train_test_split(dataSet,lableSet,test_size=0.3,shuffle=False)   # shuffle=False乱序
    X_train = np.array( X_train).reshape(-1, 300,1)
    X_test = np.array( X_test).reshape(-1, 300,1)
    Y_train = np.array( Y_train).reshape(-1,1)
    Y_test = np.array(Y_test).reshape(-1,1)
    return X_train,X_test,Y_train,Y_test
# [X_train,X_test,Y_train,Y_test]=loadData()

In [76]:
# CNN模型构建
model = Sequential()
# 第一个卷积层, 4 个 21x1 卷积核
model.add(Conv1D(filters=4, kernel_size=21, strides=1, padding='SAME', activation='relu',input_shape=(300, 1)))
# 第一个池化层, 最大池化,4 个 3x1 卷积核, 步长为 2
model.add(MaxPooling1D(pool_size=3, strides=2, padding='SAME'))
# 第二个卷积层, 16 个 23x1 卷积核
model.add(Conv1D(filters=16, kernel_size=231, strides=1, padding='SAME', activation='relu'))
# 第二个池化层, 最大池化,4 个 3x1 卷积核, 步长为 2
model.add(MaxPooling1D(pool_size=3, strides=2, padding='SAME'))
 # 第三个卷积层, 32 个 25x1 卷积核
model.add(Conv1D(filters=32, kernel_size=25, strides=1, padding='SAME', activation='relu'))
# 第三个池化层, 平均池化,4 个 3x1 卷积核, 步长为 2
model.add(MaxPooling1D(pool_size=3, strides=2, padding='SAME'))
# 第四个卷积层, 64 个 27x1 卷积核
model.add(Conv1D(filters=64, kernel_size=21, strides=1, padding='SAME', activation='relu'))
# 打平层,方便全连接层处理
model.add(Flatten())
# 全连接层,128 个节点
model.add(Dense(128))
# Dropout层,dropout = 0.2  随机冻结20%的权重，防止过拟合
model.add(Dropout(0.2))
model.add(Activation('relu'))
# 全连接层,5 个节点  输出层
model.add(Dense(5))
model.add(Activation('softmax'))
# 损失函数，优化器，精度
model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_8 (Conv1D)            (None, 300, 4)            88        
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 150, 4)            0         
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 150, 16)           14800     
_________________________________________________________________
max_pooling1d_7 (MaxPooling1 (None, 75, 16)            0         
_________________________________________________________________
conv1d_10 (Conv1D)           (None, 75, 32)            12832     
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 (None, 38, 32)            0         
_________________________________________________________________
conv1d_11 (Conv1D)           (None, 38, 64)           

In [75]:
# 进行预测
history = model.fit(X_train, Y_train, epochs=2,batch_size=128)

In [73]:
acc = history.history['accuracy']
loss = history.history['loss']
epochs = range(1,len(acc) + 1)
plt.plot(epochs,acc,'bo',label='Training acc')
plt.title('Training  accuracy')
plt.legend()
plt.figure()
plt.plot(epochs,loss,'bo',label='Training loss')
plt.title('Training  loss')
plt.legend()
plt.show()

print(history.history.keys())
print(model.evaluate(x_test,one_hot_test_labels))

In [74]:
# Y_pred = model.predict(X_test)
Y_pred = model.predict_classes(X_test)
# plt.plot(Y_test[:100])
con_mat = confusion_matrix(Y_test, Y_pred)
    # 归一化
    # con_mat_norm = con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis]
    # con_mat_norm = np.around(con_mat_norm, decimals=2)
    # 绘图
plt.figure(figsize=(8, 8))
seaborn.heatmap(con_mat, annot=True, fmt='.20g', cmap='Blues')
plt.ylim(0, 5)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

In [40]:
# 判断任意3个数的大小，按从大到小排列
a=input("请输入第一个数：")
b=input("请输入第二个数：")
c=input("请输入第三个数：")
a=int(a)
b=int(b)
c=int(c)
if a>b>c:
    print(a,b,c)
elif a>c>b:
    d=c;c=b;b=d
    print(a,b,c)
elif b>a>c:
    d=b;b=a;a=d
    print(a,b,c)
elif b>c>a:
    d=b;e=c;b=e;c=a;a=d
    print(a,b,c)
elif c>a>b:
    d=c;e=a;f=b;c=f;b=e;a=d
    print(a,b,c)
elif c>b>a:
    d=c;c=a;a=d
    print(a,b,c)
else:
    print(a,b,c)

请输入第一个数：55
请输入第二个数：54
请输入第三个数：53
55 54 53


In [54]:
# 判断闰年
i=int(input("请输入年份："))
if ((i%4==0) and (i%100!=0)):
    print("%d年是闰年"%i)
else:
    print("%d年不是闰年"%i)

请输入年份：2030
2030年不是闰年


In [62]:
# 成绩按等级划分
score=int(input("请输入您的成绩："))
def func(score):
    if score >100 or score <=0:
        return"wrong score .must between 0 and 100."
    elif score >= 90:
        return"A"
    elif score >= 80:
        return "B"
    elif score >= 70:
        return "C"
    elif score >=60:
        return "D"
    else:
        return "E"
print(func(score))
# 或者用嵌套选择结构
score=int(input("请输入您的成绩："))
def func(score):
    degree = "DCBAAE"
    if score >100 or score <=0:
        return"wrong score .must between 0 and 100."
    else:
        index =(score-60)//10
        if index >= 0:
            return degree[index]
        else:
            return[-1]
print(func(score))

请输入您的成绩：555
wrong score .must between 0 and 100.
请输入您的成绩：99
A


In [72]:
s=0;n=1
while n<=100:
    s=s+n
    n=n+1 
print(s)
# 或者用for-else语句配合使用
s=0
for i in range(1,101):
    s=s+i
else:
    print(s)

5050
5050
