In [2]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image

# 加载预训练的 DeepLabv3 模型
model = models.segmentation.deeplabv3_resnet50(pretrained=True)
model.eval()

# 图像预处理
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    preprocess = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return preprocess(image).unsqueeze(0)

# 加载图像
image_path = "z.jpg"
input_image = preprocess_image(image_path)

# 推理
with torch.no_grad():
    output = model(input_image)['out'][0]
output_predictions = output.argmax(0)

# 将预测结果保存为图像
palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
colors = torch.as_tensor([i for i in range(21)])[:, None] * palette
colors = (colors % 255).numpy().astype("uint8")
output_predictions_rgb = Image.fromarray(output_predictions.byte().cpu().numpy()).resize(input_image.size)
output_predictions_rgb.putpalette(colors)
output_predictions_rgb.save("output_image_z.png")

TypeError: 'builtin_function_or_method' object is not iterable

In [None]:
import cv2

def align_images(image1, image2):
    # 初始化 SIFT 特征检测器
    sift = cv2.SIFT_create()

    # 在两张图像上检测特征点和计算特征描述符
    keypoints1, descriptors1 = sift.detectAndCompute(image1, None)
    keypoints2, descriptors2 = sift.detectAndCompute(image2, None)

    # 使用 FLANN 匹配器进行特征点匹配
    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    matches = flann.knnMatch(descriptors1, descriptors2, k=2)

    # 选择最佳匹配
    good_matches = []
    for m, n in matches:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m)

    # 收集匹配的特征点坐标
    src_pts = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
    dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

    # 计算变换矩阵
    M, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)

    # 对第一张图像进行变换
    aligned_image1 = cv2.warpPerspective(image1, M, (image2.shape[1], image2.shape[0]))

    return aligned_image1

# 读取两张图像
image1 = cv2.imread("image1.jpg", cv2.IMREAD_COLOR)
image2 = cv2.imread("image2.jpg", cv2.IMREAD_COLOR)

# 将图像转换为灰度图像
gray_image1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
gray_image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)

# 对图像进行放缩以使其对齐
aligned_image1 = align_images(gray_image1, gray_image2)

# 显示对齐后的图像
cv2.imshow("Aligned Image 1", aligned_image1)
cv2.waitKey(0)
cv2.destroyAllWindows()

True

In [51]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import numpy as np
import cv2

# 加载预训练的 DeepLabv3 模型
model = models.segmentation.deeplabv3_resnet50(pretrained=True)
model.eval()

# 图像预处理
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    preprocess = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return preprocess(image).unsqueeze(0)

# 加载图像
image_path = "tt2.jpg"
input_image = preprocess_image(image_path)

# 获取原始图像尺寸
original_image = Image.open(image_path)
original_width, original_height = original_image.size

# 推理
with torch.no_grad():
    output = model(input_image)['out'][0]
output_predictions = output.argmax(0)

# 将预测结果调整为与原始图像相同的大小
output_predictions_resized = transforms.Resize((original_height, original_width))(output_predictions.unsqueeze(0))
output_predictions_resized = output_predictions_resized.squeeze(0)

# 定义阈值，将人像区域与其他区域分隔开来
threshold = 0.5
face_mask = (output_predictions_resized == 15)  # 在 COCO 数据集上，15 是人的类别标签
face_mask = face_mask.float() * 255  # 将布尔值转换为 0 或 255

# 将二值图像转换为 OpenCV 格式
face_mask_cv = face_mask.byte().cpu().numpy()

# 寻找轮廓
contours, _ = cv2.findContours(face_mask_cv, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# 创建一个与原图大小相同的黑色图像
black_image = np.zeros((original_height, original_width, 3), dtype=np.uint8)

# 在黑色图像上绘制轮廓
cv2.drawContours(black_image, contours, -1, (0, 255, 0), 2)

# 将结果保存为图像
output_image_path = "output_contour_on_black_image_tt2.jpg"
cv2.imwrite(output_image_path, black_image)


True

In [58]:
import cv2
import numpy as np

def extract_green_contours(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_green = np.array([40, 40, 40])
    upper_green = np.array([70, 255, 255])
    mask = cv2.inRange(hsv, lower_green, upper_green)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours

def align_images(image1, image2):
    contours1 = extract_green_contours(image1)
    contours2 = extract_green_contours(image2)
    
    # 计算轮廓的中心点
    centers1 = [np.mean(contour, axis=0) for contour in contours1]
    centers2 = [np.mean(contour, axis=0) for contour in contours2]
    
    # 寻找最佳匹配
    best_matches = []
    for center1 in centers1:
        min_distance = float('inf')
        best_match = None
        for center2 in centers2:
            distance = np.linalg.norm(center1 - center2)
            if distance < min_distance:
                min_distance = distance
                best_match = center2
        best_matches.append(best_match)
    
    # 计算透视变换矩阵
    src_pts = np.float32(centers1).reshape(-1, 1, 2)
    dst_pts = np.float32(best_matches).reshape(-1, 1, 2)
    M, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    
    # 对其中一个图像进行透视变换
    aligned_image1 = cv2.warpPerspective(image1, M, (image2.shape[1], image2.shape[0]))
    
    return aligned_image1

# 读取两张黑底图像
image1_path = "output_contour_on_black_image_tt2.jpg"
image2_path = "output_contour_on_black_image_tt1.jpg"

image1 = cv2.imread(image1_path)
image2 = cv2.imread(image2_path)

# 对图像进行对齐
aligned_image1 = align_images(image1, image2)

# 保存对齐后的图像
output_path = "aligned_image1.png"
cv2.imwrite(output_path, aligned_image1)


error: OpenCV(4.8.0) /io/opencv/modules/calib3d/src/fundam.cpp:385: error: (-28:Unknown error code -28) The input arrays should have at least 4 corresponding point sets to calculate Homography in function 'findHomography'


In [3]:
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import numpy as np
import cv2

# 加载预训练的 DeepLabv3 模型
model = models.segmentation.deeplabv3_resnet50(pretrained=True)
model.eval()

# 图像预处理
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    preprocess = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return preprocess(image).unsqueeze(0)

# 加载图像
image_path = "z.jpg"
input_image = preprocess_image(image_path)

# 获取原始图像尺寸
original_image = Image.open(image_path)
original_width, original_height = original_image.size

# 推理
with torch.no_grad():
    output = model(input_image)['out'][0]
output_predictions = output.argmax(0)

# 将预测结果调整为与原始图像相同的大小
output_predictions_resized = transforms.Resize((original_height, original_width))(output_predictions.unsqueeze(0))
output_predictions_resized = output_predictions_resized.squeeze(0)

# 定义阈值，将人像区域与其他区域分隔开来
threshold = 0.5
face_mask = (output_predictions_resized == 15)  # 在 COCO 数据集上，15 是人的类别标签
face_mask = face_mask.float() * 255  # 将布尔值转换为 0 或 255

# 将二值图像转换为 OpenCV 格式
face_mask_cv = face_mask.byte().cpu().numpy()

# 寻找轮廓
contours, _ = cv2.findContours(face_mask_cv, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# 将轮廓绘制在原始图像上
output_image = cv2.cvtColor(np.array(original_image), cv2.COLOR_RGB2BGR)
cv2.drawContours(output_image, contours, -1, (0, 255, 0), 2)

# 将结果保存为图像
output_image_path = "output_contour_on_image_z.jpg"
cv2.imwrite(output_image_path, output_image)

True

In [4]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import numpy as np
import cv2

# 加载预训练的 DeepLabv3 模型
model = models.segmentation.deeplabv3_resnet50(pretrained=True)
model.eval()

# 图像预处理
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    preprocess = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    return preprocess(image).unsqueeze(0)

# 加载图像
image_path = "z.jpg"
input_image = preprocess_image(image_path)

# 获取原始图像尺寸
original_image = Image.open(image_path)
original_width, original_height = original_image.size

# 推理
with torch.no_grad():
    output = model(input_image)['out'][0]
output_predictions = output.argmax(0)

# 将预测结果调整为与原始图像相同的大小
output_predictions_resized = transforms.Resize((original_height, original_width))(output_predictions.unsqueeze(0))
output_predictions_resized = output_predictions_resized.squeeze(0)

# 定义阈值，将人像区域与其他区域分隔开来
threshold = 0.5
face_mask = (output_predictions_resized == 15)  # 在 COCO 数据集上，15 是人的类别标签
face_mask = face_mask.float() * 255  # 将布尔值转换为 0 或 255

# 将二值图像转换为 OpenCV 格式
face_mask_cv = face_mask.byte().cpu().numpy()

# 寻找轮廓
contours, _ = cv2.findContours(face_mask_cv, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# 创建一个与原图大小相同的黑色图像
black_image = np.zeros((original_height, original_width, 3), dtype=np.uint8)

# 在黑色图像上绘制轮廓
cv2.drawContours(black_image, contours, -1, (0, 255, 0), 2)

# 将结果保存为图像
output_image_path = "output_contour_on_black_image.jpg"
cv2.imwrite(output_image_path, black_image)

True

In [5]:
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import numpy as np
import cv2

# 加载预训练的 Mask R-CNN 模型
model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()

# 图像预处理
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    preprocess = transforms.Compose([
        transforms.ToTensor(),
    ])
    return preprocess(image)

# 加载图像
image_path = "tt1.jpg"
input_image = preprocess_image(image_path)

# 推理
with torch.no_grad():
    prediction = model([input_image])

# 获取预测结果中的人脸掩码
masks = prediction[0]['masks']
masks = masks.detach().cpu().numpy()

# 将人脸掩码应用到原始图像上
original_image = cv2.imread(image_path)
for i in range(masks.shape[0]):
    mask = masks[i, 0]
    mask = cv2.resize(mask, (original_image.shape[1], original_image.shape[0]))
    original_image[mask > 0.5] = [0, 255, 0]  # 将人脸部分标记为绿色

# 将结果保存为图像
output_image_path = "output_image_maskrnn.png"
cv2.imwrite(output_image_path, original_image)

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /home/mengqingyi/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth


  0%|          | 0.00/170M [00:00<?, ?B/s]

True

In [5]:
import dlib
import cv2

# 加载 Dlib 的人脸检测器和关键点检测器
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# 加载图像
image_path = "ttbb.jpg"
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 人脸检测
faces = detector(gray)
print(faces)
# 遍历每张检测到的人脸
for face in faces:
    # 关键点检测
    landmarks = predictor(gray, face)
    
    # 提取每个关键点的坐标
    points = []
    for n in range(68):
        x = landmarks.part(n).x
        y = landmarks.part(n).y
        points.append((x, y))
        
    # 在图像上绘制关键点
    for point in points:
        cv2.circle(image, point, 1, (0, 255, 0), -1)
        
# 将结果保存为图像
output_image_path = "output_image_with_landmarks.jpg"
cv2.imwrite(output_image_path, image)

rectangles[[(1229, 527) (1601, 899)]]


True

In [None]:
import cv2
import numpy as np

# 读取人像分割后的图像，蓝色区域为人的上半身和头部
segmented_image = cv2.imread('output_image_tt1.png')

# 将图像转换为HSV颜色空间
hsv_image = cv2.cvtColor(segmented_image, cv2.COLOR_BGR2HSV)

# 定义蓝色区域的HSV阈值范围
lower_blue = np.array([100, 50, 50])
upper_blue = np.array([140, 255, 255])

# 根据阈值范围进行颜色分割
mask = cv2.inRange(hsv_image, lower_blue, upper_blue)

# 找到轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# 框出头部
for contour in contours:
    x, y, w, h = cv2.boundingRect(contour)
    cv2.rectangle(segmented_image, (x, y), (x+w, y+h), (0, 255, 0), 2)

# 显示结果图像
cv2.imshow('Head Segmentation', segmented_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
import cv2
import numpy as np

# 读取人像分割后的图像，蓝色区域为人的上半身和头部
segmented_image = cv2.imread('output_image_tt1.png')

# 将图像转换为灰度图像
gray_image = cv2.cvtColor(segmented_image, cv2.COLOR_BGR2GRAY)

# 使用阈值分割找到蓝色区域
_, binary_image = cv2.threshold(gray_image, 100, 255, cv2.THRESH_BINARY)

# 找到蓝色区域的外接矩形
contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
x, y, w, h = cv2.boundingRect(contours[0])

# 在原图上用矩形框出头部部分
cv2.rectangle(segmented_image, (x, y), (x+w, y+h), (0, 255, 0), 2)

# 显示结果图像
cv2.imshow('Head Detection', segmented_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
import cv2

# 加载图像并转换为灰度图
image_path = "output_contour_on_black_image_tt2.jpg"
image = cv2.imread(image_path)
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 使用阈值化进行分割
_, thresholded_image = cv2.threshold(gray_image, 128, 255, cv2.THRESH_BINARY)

# 寻找图像的轮廓
contours, _ = cv2.findContours(thresholded_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# 寻找每个轮廓的凸包
for contour in contours:
    hull = cv2.convexHull(contour)
    
    # 在图像上绘制凸包的点
    for point in hull:
        x, y = point[0]
        cv2.circle(image, (x, y), 5, (0, 255, 0), -1)

# 显示标记后的图像
cv2.imshow("Image with Convex Hull Points", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [1]:
import dlib
import cv2

# 加载dlib的面部关键点检测器
predictor_path = "shape_predictor_68_face_landmarks.dat"
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)

# 加载图像
image_path = "tt2.jpg"
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 检测人脸
faces = detector(gray)

# 遍历每张脸并标出面部关键点
for face in faces:
    landmarks = predictor(gray, face)
    for n in range(0, 68):
        x = landmarks.part(n).x
        y = landmarks.part(n).y
        # 放大关键点半径
        cv2.circle(image, (x, y), 5, (0, 255, 0), -1)

# 显示标出关键点的图像
output_image_path = "tt1_show_landmarks.jpg"
cv2.imwrite(output_image_path, image)

# 保存标出关键点的图像


True

In [7]:
import numpy as np

# 给定的列表
logits = np.array([0.05, 0.05, 0.9, 0.1])
# 温度参数
tau = 0.2

# 计算sharpen后的概率值
sharpened_logits = np.exp(logits / tau)
sharpened_probabilities = sharpened_logits / np.sum(sharpened_logits)

print("Sharpened probabilities:", sharpened_probabilities)

Sharpened probabilities: [0.01362594 0.01362594 0.95525207 0.01749605]
