In [1]:
import cv2
import numpy as np
def preprocess_image(image_path):
    # 读取图像
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # 高斯滤波去噪
    denoised_image = cv2.GaussianBlur(image, (5, 5), 0)

    # Otsu二值化
    _, binary_image = cv2.threshold(denoised_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # 形态学开闭操作
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    opened_image = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel)
    closed_image = cv2.morphologyEx(opened_image, cv2.MORPH_CLOSE, kernel)

    return closed_image

# 步骤二：形态学特征提取
def extract_shape_features(binary_image):
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    features = []
    for contour in contours:
        # 基本几何属性
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        moments = cv2.moments(contour)
        centroid = (moments['m10'] / moments['m00'], moments['m01'] / moments['m00'])
        rect = cv2.minAreaRect(contour)
        (x, y), (width, height), angle = rect

        # 形状描述符
        hu_moments = cv2.HuMoments(moments).flatten()
        circularity = 4 * np.pi * area / (perimeter ** 2)

        # 将特征添加到列表
        features.append([area, perimeter, centroid[0], centroid[1], width, height, angle, *hu_moments, circularity])

    return np.array(features)

In [2]:
image_path = "C:\\Users\\MYZ\\PycharmProjects\\VIT_Pytorch\\BreaKHis_Datasets\\Original\\Benign_Sample\\Benign_Sample (2).png"
processed_image = preprocess_image(image_path)
shape_features = extract_shape_features(processed_image)
# 打印部分形状特征
'''
 print("Shape Features:")
print("Area: ", shape_features[:, 0])
print("Perimeter: ", shape_features[:, 1])
print("Centroid X: ", shape_features[:, 2])
print("Centroid Y: ", shape_features[:, 3])
print("Width: ", shape_features[:, 4])
print("Height: ", shape_features[:, 5])
print("Angle: ", shape_features[:, 6])
print("Hu Moments: ", shape_features[:, 7:14])
print("Circularity: ", shape_features[:, 14])
'''
# 定义列名
import pandas as pd
column_names = ['Area', 'Perimeter', 'Centroid_X', 'Centroid_Y', 'Width', 'Height', 'Angle']
column_names.extend(['Hu_Moment_' + str(i+1) for i in range(7)])
column_names.append('Circularity')
# 创建DataFrame
df_shape_features = pd.DataFrame(shape_features, columns=column_names)
df_shape_features

Unnamed: 0,Area,Perimeter,Centroid_X,Centroid_Y,Width,Height,Angle,Hu_Moment_1,Hu_Moment_2,Hu_Moment_3,Hu_Moment_4,Hu_Moment_5,Hu_Moment_6,Hu_Moment_7,Circularity
0,8.0,12.000000,443.000000,458.000000,2.000000,4.000000,90.000000,0.208333,0.015625,5.014237e-19,7.395160e-18,1.422366e-35,-6.183175e-21,-6.914540e-37,0.698132
1,10.0,14.000000,161.500000,458.000000,2.000000,5.000000,90.000000,0.241667,0.030625,1.964138e-18,2.421999e-18,5.278163e-36,-4.188705e-19,2.162505e-37,0.641141
2,14.5,15.414214,408.574713,457.540230,3.000000,5.000000,90.000000,0.186979,0.007707,7.008554e-05,3.112450e-06,-4.473307e-11,-2.123621e-07,1.058900e-11,0.766894
3,26.5,21.414214,668.327044,457.069182,4.000000,7.000000,90.000000,0.190870,0.008791,1.362671e-04,7.972787e-06,-8.393403e-11,-2.278039e-07,-2.490267e-10,0.726192
4,86.0,50.828427,208.122093,457.031008,4.000000,22.000000,90.000000,0.464729,0.188455,7.609093e-05,8.652603e-06,-1.560422e-10,-3.111373e-06,1.579318e-10,0.418307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218,8.0,12.000000,212.000000,1.000000,2.000000,4.000000,90.000000,0.208333,0.015625,2.710474e-20,2.710557e-20,7.347008e-40,3.388171e-21,-8.758166e-47,0.698132
219,182.5,84.727922,175.144292,2.750685,7.000000,36.000000,90.000000,0.556088,0.274503,1.710464e-02,1.368583e-02,2.093929e-04,7.113109e-03,-5.997657e-07,0.319462
220,24.0,20.000000,75.000000,2.000000,4.000000,6.000000,90.000000,0.180556,0.004823,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.753982
221,18382.0,1987.661029,69.122955,115.211792,186.851059,468.152802,27.864437,0.650964,0.266197,4.546549e-01,2.844319e-01,1.016683e-01,1.405803e-01,1.120755e-02,0.058468


In [3]:
data = df_shape_features.to_numpy()  # 使用to_numpy()方法确保数据结构正确
print(data.shape)  # 应输出：(223, 15)

(223, 15)


In [10]:
# 利用PCA进行降维
from sklearn.decomposition import PCA
#(223, M) 形状的二维数组，其中223为轮廓数，M为每个轮廓的特征数
shape_features = data 
# 创建PCA实例，指定保留一个主成分（即输出一个单一特征向量）
pca = PCA(n_components=1)
# 训练PCA模型并转换数据
pca.fit(shape_features)
image_pca_representation = pca.transform(shape_features)
# image_pca_representation 是一个形状为 (223, 1) 的数组，每行代表一个轮廓的PCA特征向量
# 若要进一步将整张图像表示为一个单一特征向量，可以取所有轮廓特征向量的均值
image_single_vector_representation = image_pca_representation.mean(axis=0)
# 现在，image_single_vector_representation 是一个长度为 1 的一维数组，表示整张图像的PCA特征向量
image_pca_representation

array([[-5.20382923e+02],
       [-5.17376473e+02],
       [-5.13358057e+02],
       [-5.01278486e+02],
       [-4.36834497e+02],
       [-5.08194549e+02],
       [-4.53205341e+02],
       [-5.03763003e+02],
       [-5.11058025e+02],
       [-4.39847265e+02],
       [-5.11348088e+02],
       [-4.01130043e+02],
       [-4.83586112e+02],
       [-2.70899355e+02],
       [-3.76497983e+02],
       [-4.78863780e+02],
       [-5.12524538e+02],
       [-4.74499033e+02],
       [-4.39035259e+02],
       [-4.73034598e+02],
       [-4.82841822e+02],
       [-4.92421622e+02],
       [-3.46853847e+02],
       [-4.93183333e+02],
       [-5.11967433e+02],
       [ 1.41396155e+03],
       [-4.84410965e+02],
       [-4.12086584e+02],
       [-5.07385933e+02],
       [-3.25515178e+02],
       [-1.82699810e+02],
       [-4.76035684e+02],
       [-4.37529785e+02],
       [-2.00178248e+01],
       [-3.25932784e+02],
       [-1.41765176e+02],
       [-3.90103579e+02],
       [-4.73702359e+02],
       [ 2.9

In [12]:
# 利用平均特征向量作为所有轮廓整体特性的单一数据
import numpy as np
contour_features = df_shape_features  # 实际数据在此处填充
# 计算所有轮廓在每个特征上的平均值
mean_feature_vector = np.mean(contour_features, axis=0)
mean_feature_vector_reshaped = mean_feature_vector.to_numpy().reshape(1, -1)
# mean_feature_vector 现在是一个长度为 15 的一维数组，表示所有轮廓在各个特征上的平均值
mean_feature_vector_reshaped

array([[5.20609865e+02, 1.02483300e+02, 3.60057220e+02, 2.19888301e+02,
        1.82355979e+01, 2.28163289e+01, 7.68661540e+01, 2.60395887e-01,
        4.28374220e-02, 9.77653617e-03, 2.91520914e-03, 5.03056502e-04,
        1.14920254e-03, 2.45146652e-05, 5.93050175e-01]])

In [6]:
import cv2
import numpy as np
from skimage import measure
from skimage.measure import label 
from scipy.ndimage import gaussian_filter
from scipy.spatial.distance import cdist
from skimage import morphology
from cv2 import ximgproc

def compute_skeleton_features(skeleton):
    # 计算骨架长度
    skeleton_length = cv2.countNonZero(skeleton)  
    # 计算骨架端点和分支数（基于连通分量分析）
    labeled_skeleton = label(skeleton, connectivity=1)
    num_endpoints = 0
    num_branches = 0
    for region in measure.regionprops(labeled_skeleton):
        # 检查当前连通分量是否只有一个像素（端点）
        if region.area == 1:
            num_endpoints += 1
        else:
            num_branches += 1
    # 计算骨架曲率
    # 预处理：对骨架进行平滑处理，以减小噪声影响
    skeleton_smooth = gaussian_filter(skeleton, sigma=1)
    # 提取骨架轮廓
    contours = measure.find_contours(skeleton_smooth, level=0, fully_connected='low', positive_orientation='low')
    # 计算每个轮廓点的曲率
    curvatures = []
    for contour in contours:
        # 计算轮廓点的坐标
        x, y = contour.T
        # 计算相邻点的距离矩阵
        dist_matrix = cdist(contour, contour, metric='euclidean')
        # 计算每个点的曲率
        for i in range(len(x)):
            # 选取当前点及其相邻点
            neighbors = np.delete(dist_matrix[i], i, axis=0)
            sorted_indices = np.argsort(neighbors)
            indices = sorted_indices[:3]  # 取前三个最近邻点
            # 计算曲率
            p1 = contour[indices[0]]
            p2 = contour[i]
            p3 = contour[indices[1]]
            print(f"Point {i}: ({x[i]}, {y[i]})") 
            v1 = p2 - p1
            v2 = p2 - p3
            print(f"v1: {v1}, v2: {v2}")
            # 避免向量夹角余弦值计算问题
            cos_theta = np.clip(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), -1, 1)
            print(f"Cosine theta: {cos_theta}")
            # 避免向量长度为零
            if np.isclose(np.linalg.norm(v1), 0) or np.isclose(np.linalg.norm(v2), 0):
                curvature = np.nan  # 或者设置为一个合适的默认值，如 0 或者一个足够大的值表示无穷大曲率
            else:
                curvature_radius = np.linalg.norm(p2 - p1) / (2 * np.arccos(cos_theta))
                curvature = 1 / curvature_radius if curvature_radius > 0 else np.nan  # 避免除以零
            print(f"Curvature radius: {curvature_radius},Curvature:{curvature}")
            curvatures.append(curvature)
    # 计算平均曲率，过滤掉 NaN 值
    valid_curvatures = [curvature for curvature in curvatures if not np.isnan(curvature)]
    if valid_curvatures:
        avg_curvature = np.mean(valid_curvatures)
    else:
        avg_curvature = np.nan  # 或者设置为一个合适的默认值，如 0 或者一个足够大的值表示平均曲率无法计算
    return {
        "skeleton_length": skeleton_length,
        "num_endpoints": num_endpoints,
        "num_branches": num_branches,
        "avg_curvature": avg_curvature
    }
# 读取图像并转换为二值图像（假设已进行阈值处理）
binary_img = cv2.imread('C:\\Users\\MYZ\\PycharmProjects\\VIT_Pytorch\\Benign_Sample (1).png', cv2.IMREAD_GRAYSCALE)
_, binary_img = cv2.threshold(binary_img, 127, 255, cv2.THRESH_BINARY)
# 使用 scikit-image 的 skeletonize 函数提取骨架
skeleton = morphology.skeletonize(binary_img)
# 转换为 uint8 类型以便与后续操作兼容
skeleton = skeleton.astype(np.uint8)
# 计算骨架特征
features = compute_skeleton_features(skeleton)
features

{'skeleton_length': 16199,
 'num_endpoints': 6567,
 'num_branches': 948,
 'avg_curvature': nan}

In [14]:
values_list = list(features.values())
# 处理缺失值（这里假设用0替换nan）
values_list = [v if np.isfinite(v) else 0 for v in values_list]
# 转换为一维NumPy数组
one_dim_array = np.array(values_list)
one_dim_array.shape

(4,)

In [18]:
arr_transposed = one_dim_array.reshape(1, -1)
arr_transposed

array([[16199,  6567,   948,     0]])

In [ ]:
high_dim_features  lbp_hist_array  GLCM_features  filter_features  mean_feature_vector_reshaped    skeleton_features