In [71]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score,KFold
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score

# 数据集文件名为'winequality-red.csv'和'winequality-white.csv'
data1 = pd.read_csv('winequality-red.csv', delimiter=';')
data2 = pd.read_csv('winequality-white.csv', delimiter=';')

def wine_quality_linear_regression(data):
    # 'quality' 列是目标变量
    X = data.drop('quality', axis=1)
    y = data['quality']

    # 创建线性回归模型
    model = LinearRegression()

    # 设置五重交叉验证
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    # 初始化用于存储结果的数组
    precision_scores = []
    recall_scores = []

    # 进行五重交叉验证
    for train_index, test_index in kf.split(X):

        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        # 训练模型
        model.fit(X_train, y_train)

        # 预测标签
        y_pred = model.predict(X_test)
        
        # 这里针对十一分类任务，需要将回归输出映射到类别
        # 例如，可以将回归值四舍五入或者应用阈值
        # 这里假设使用四舍五入作为二分类输出
        rounded_predictions = np.round(y_pred)

        # 计算精确度和召回率
        precision = precision_score(y_test, rounded_predictions, average='weighted',zero_division=1)  # 可以选择其他的average参数，如'micro'或'macro'
        recall = recall_score(y_test, rounded_predictions, average='weighted',zero_division=1)  # 同样，可以选择其他的average参数

        # 存储结果
        precision_scores.append(precision)
        recall_scores.append(recall)

    # 输出结果
    print(f'Precision: {sum(precision_scores) / len(precision_scores)}')
    print(f'Recall: {sum(recall_scores) / len(recall_scores)}')


In [72]:
print("red wine:")
wine_quality_linear_regression(data1)
print("white wine:")
wine_quality_linear_regression(data2)

red wine:
Precision: 0.597022287028502
Recall: 0.5922884012539186
white wine:
Precision: 0.5407387827233142
Recall: 0.5167426153300954
