In [None]:
import cv2
import numpy as np

trains = np.array(
    [
        [150, 200],
        [200, 250],
        [100, 250],
        [150, 300],
        [350, 100],
        [400, 200],
        [400, 300],
        [350, 400],
    ],
    dtype=np.float32,
)

labels = np.array([0, 0, 0, 0, 1, 1, 1, 1])

svm = cv2.ml.SVM_create()

svm.setType(cv2.ml.SVM_C_SVC)

# svm.setKernel(cv2.ml.SVM_LINEAR)
svm.setKernel(cv2.ml.SVM_RBF)

svm.trainAuto(trains, cv2.ml.ROW_SAMPLE, labels)

print("C:", svm.getC())
print("Gamma:", svm.getGamma())

w, h = 500, 500
img = np.zeros((w, h, 3), dtype=np.uint8)

for y in range(h):
    for x in range(w):
        test = np.array([[x, y]], dtype=np.float32)
        _, res = svm.predict(test)
        ret = int(res[0][0])

        if ret == 0:
            img[y, x] = [128, 128, 255]
        else:
            img[y, x] = [128, 255, 128]

color = [(0, 0, 128), (0, 128, 0)]
for i in range(trains.shape[0]):
    x = int(trains[i][0])
    y = int(trains[i][1])
    l = labels[i]
    cv2.circle(img, (x, y), 5, color[l], -1, cv2.LINE_AA)

cv2.imshow("SVM Classification", img)
cv2.waitKey(0)
cv2.destroyAllWindows()

C: 2.5
Gamma: 1e-05


In [4]:
import cv2
import numpy as np

digits = cv2.imread("digits.png", cv2.IMREAD_GRAYSCALE)

h, w = digits.shape[:2]
oldx, oldy = -1, -1


def on_mouse(event, x, y, flags, _):
    global oldx, oldy

    if event == cv2.EVENT_LBUTTONDOWN:
        oldx, oldy = x, y

    elif event == cv2.EVENT_LBUTTONUP:
        oldx, oldy = -1, -1

    elif event == cv2.EVENT_MOUSEMOVE:
        if flags & cv2.EVENT_FLAG_LBUTTON:
            cv2.line(img, (oldx, oldy), (x, y), (255, 255, 255), 40, cv2.LINE_AA)
            oldx, oldy = x, y
            cv2.imshow("img", img)


hog = cv2.HOGDescriptor((20, 20), (10, 10), (5, 5), (5, 5), 9)

cells = [np.hsplit(row, w // 20) for row in np.vsplit(digits, h // 20)]
cells = np.array(cells)
cells = cells.reshape(-1, 20, 20)

desc = []
for img in cells:
    desc.append(hog.compute(img))

train_desc = np.array(desc).squeeze().astype(np.float32)
train_labels = np.repeat(np.arange(10), len(train_desc) // 10)
img = np.zeros((400, 400), np.uint8)


svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setKernel(cv2.ml.SVM_RBF)
svm.setC(2.5)
svm.setGamma(0.50625)
svm.train(train_desc, cv2.ml.ROW_SAMPLE, train_labels)

cv2.imshow("img", img)
cv2.setMouseCallback("img", on_mouse)

while True:
    key = cv2.waitKey()

    if key == 27:
        break
    elif key == ord(" "):
        test_image = cv2.resize(img, (20, 20), interpolation=cv2.INTER_AREA)
        test_desc = hog.compute(test_image).reshape(-1, 1).T

        _, res = svm.predict(test_desc)
        print(int(res[0, 0]))

        img.fill(0)
        cv2.imshow("img", img)

print("train_desc.shape:", train_desc.shape)
print("train_labels.shape:", train_labels.shape)

cv2.destroyAllWindows()

7
5
3
8
4
1
train_desc.shape: (5000, 324)
train_labels.shape: (5000,)


In [None]:
import sys
import numpy as np
import cv2


oldx, oldy = -1, -1


def on_mouse(event, x, y, flags, _):
    global oldx, oldy

    if event == cv2.EVENT_LBUTTONDOWN:
        oldx, oldy = x, y

    elif event == cv2.EVENT_LBUTTONUP:
        oldx, oldy = -1, -1

    elif event == cv2.EVENT_MOUSEMOVE:
        if flags & cv2.EVENT_FLAG_LBUTTON:
            cv2.line(img, (oldx, oldy), (x, y), (255, 255, 255), 40, cv2.LINE_AA)
            oldx, oldy = x, y
            cv2.imshow("img", img)


def norm_digit(img):
    m = cv2.moments(img)
    cx = m["m10"] / m["m00"]
    cy = m["m01"] / m["m00"]
    h, w = img.shape[:2]
    aff = np.array([[1, 0, w / 2 - cx], [0, 1, h / 2 - cy]], dtype=np.float32)
    dst = cv2.warpAffine(img, aff, (0, 0))
    return dst


# 학습 데이터 & 레이블 행렬 생성

digits = cv2.imread("digits.png", cv2.IMREAD_GRAYSCALE)

if digits is None:
    print("Image load failed!")
    sys.exit()

h, w = digits.shape[:2]
hog = cv2.HOGDescriptor((20, 20), (10, 10), (5, 5), (5, 5), 9)
print("Descriptor Size:", hog.getDescriptorSize())

cells = [np.hsplit(row, w // 20) for row in np.vsplit(digits, h // 20)]
cells = np.array(cells)
cells = cells.reshape(-1, 20, 20)  # shape=(5000, 20, 20)

desc = []
for img in cells:
    img = norm_digit(img)
    desc.append(hog.compute(img))

train_desc = np.array(desc)
train_desc = train_desc.squeeze().astype(np.float32)
train_labels = np.repeat(np.arange(10), len(train_desc) / 10)

# SVM 학습

svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setKernel(cv2.ml.SVM_RBF)
svm.setC(2.5)
svm.setGamma(0.50625)

svm.train(train_desc, cv2.ml.ROW_SAMPLE, train_labels)
# svm.save('svmdigits.yml')

# 사용자 입력 영상에 대해 예측

img = np.zeros((400, 400), np.uint8)

cv2.imshow("img", img)
cv2.setMouseCallback("img", on_mouse)

while True:
    key = cv2.waitKey()

    if key == 27:
        break
    elif key == ord(" "):
        test_image = cv2.resize(img, (20, 20), interpolation=cv2.INTER_AREA)
        test_image = norm_digit(test_image)
        test_desc = hog.compute(test_image).reshape(-1, 1).T

        _, res = svm.predict(test_desc)
        print(int(res[0, 0]))

        img.fill(0)
        cv2.imshow("img", img)

cv2.destroyAllWindows()


Descriptor Size: 324
7


In [None]:
import cv2
import numpy as np

src = cv2.imread("flowers.jpg")

data = src.reshape(-1, 3).astype(np.float32)

criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)

cv2.imshow("src", src)

for K in range(2, 10):
    ret, label, center = cv2.kmeans(
        data, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
    )

    center = np.uint8(center)
    dst = center[label.flatten()]
    dst = dst.reshape(src.shape)

    cv2.imshow(f"K={K}", dst)
    cv2.waitKey()
    cv2.destroyWindow(f"K={K}")

cv2.destroyAllWindows()

In [3]:
import cv2
import numpy as np

oldx, oldy = -1, -1


def on_mouse(event, x, y, flags, _):
    global oldx, oldy

    if event == cv2.EVENT_LBUTTONDOWN:
        oldx, oldy = x, y

    elif event == cv2.EVENT_LBUTTONUP:
        oldx, oldy = -1, -1

    elif event == cv2.EVENT_MOUSEMOVE:
        if flags & cv2.EVENT_FLAG_LBUTTON:
            cv2.line(img, (oldx, oldy), (x, y), (255, 255, 255), 40, cv2.LINE_AA)
            oldx, oldy = x, y
            cv2.imshow("img", img)


net = cv2.dnn.readNet("mnist_cnn.pb")

img = np.zeros((400, 400), np.uint8)

cv2.imshow("img", img)
cv2.setMouseCallback("img", on_mouse)

while True:
    c = cv2.waitKey()

    if c == ord(" "):
        blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (28, 28))
        net.setInput(blob)
        prob = net.forward()

        _, maxVal, _, maxLoc = cv2.minMaxLoc(prob)
        digit = maxLoc[0]

        print(f"{digit} ({maxVal * 100:4.2f})")

        img.fill(0)
        cv2.imshow("img", img)
    elif c == 27:
        break

cv2.destroyAllWindows()

1 (11.94)
3 (85.28)
6 (98.98)
8 (91.88)
7 (100.00)
4 (100.00)


In [19]:
import cv2
import numpy as np

filename = "beagle.jpg"

img = cv2.imread(filename)

model = r"C:\Users\UserK\projects\Python\20250425\googlenet\bvlc_googlenet.caffemodel"
config = r"C:\Users\UserK\projects\Python\20250425\googlenet\deploy.prototxt"

net = cv2.dnn.readNet(model, config)

classNames = None
with open(
    r"C:\Users\UserK\projects\Python\20250425\googlenet\classification_classes_ILSVRC2012.txt",
    "rt",
) as f:
    classNames = f.read().rstrip("\n").split("\n")

blob = cv2.dnn.blobFromImage(img, 1, (224, 224), (104, 117, 123))
net.setInput(blob, "data")
prob = net.forward()

out = prob.flatten()
classId = np.argmax(out)
confidence = out[classId]

text = f"{classNames[classId]} ({confidence * 100:4.2f}%)"
cv2.putText(
    img, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 1, cv2.LINE_AA
)

cv2.imshow("img", img)
cv2.waitKey()
cv2.destroyAllWindows()

In [None]:
import sys
import numpy as np
import cv2


oldx, oldy = -1, -1


def on_mouse(event, x, y, flags, _):
    global oldx, oldy

    if event == cv2.EVENT_LBUTTONDOWN:
        oldx, oldy = x, y

    elif event == cv2.EVENT_LBUTTONUP:
        oldx, oldy = -1, -1

    elif event == cv2.EVENT_MOUSEMOVE:
        if flags & cv2.EVENT_FLAG_LBUTTON:
            cv2.line(img, (oldx, oldy), (x, y), (255, 255, 255), 24, cv2.LINE_AA)
            oldx, oldy = x, y
            cv2.imshow("img", img)


def norm_hangul(img):
    m = cv2.moments(img)
    cx = m["m10"] / m["m00"]
    cy = m["m01"] / m["m00"]
    h, w = img.shape[:2]
    aff = np.array([[1, 0, w / 2 - cx], [0, 1, h / 2 - cy]], dtype=np.float32)
    dst = cv2.warpAffine(img, aff, (0, 0))
    return dst


# 네트워크 불러오기
net = cv2.dnn.readNet("tensorflow-hangul-recognition-master/korean_recognition.pb")

if net.empty():
    print("Network load failed!")
    sys.exit()

# 한글 파일 불러오기
classNames = None
with open(
    "tensorflow-hangul-recognition-master/labels/256-common-hangul.txt",
    "rt",
    encoding="utf-8",
) as f:
    classNames = f.read().rstrip("\n").split("\n")

# 마우스로 한글을 입력할 새 영상
img = np.zeros((400, 400), np.uint8)

cv2.imshow("img", img)
cv2.setMouseCallback("img", on_mouse)

while True:
    c = cv2.waitKey()

    if c == 27:
        break
    elif c == ord(" "):
        img = norm_hangul(img)
        blob = cv2.dnn.blobFromImage(img, 1, (64, 64))
        net.setInput(blob)
        out = net.forward()  # out.shape=(1, 256)

        out = out.flatten()
        classId = np.argmax(out)
        confidence = out[classId]

        print(f"{classNames[classId]} ({confidence * 100:4.2f}%)")

        img.fill(0)
        cv2.imshow("img", img)

cv2.destroyAllWindows()


In [17]:
import sys
import numpy as np
import cv2


model = "face_detect/res10_300x300_ssd_iter_140000_fp16.caffemodel"
config = "face_detect/deploy.prototxt"
# model = 'opencv_face_detector/opencv_face_detector_uint8.pb'
# config = 'opencv_face_detector/opencv_face_detector.pbtxt'

cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Camera open failed!")
    sys.exit()

net = cv2.dnn.readNet(model, config)

if net.empty():
    print("Net open failed!")
    sys.exit()

while True:
    ret, frame = cap.read()

    if not ret:
        break

    blob = cv2.dnn.blobFromImage(frame, 1, (300, 300), (104, 177, 123))
    net.setInput(blob)
    out = net.forward()

    detect = out[0, 0, :, :]
    (h, w) = frame.shape[:2]

    for i in range(detect.shape[0]):
        confidence = detect[i, 2]
        if confidence < 0.5:
            break

        x1 = int(detect[i, 3] * w)
        y1 = int(detect[i, 4] * h)
        x2 = int(detect[i, 5] * w)
        y2 = int(detect[i, 6] * h)

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0))

        label = f"Face: {confidence:4.2f}"
        cv2.putText(
            frame,
            label,
            (x1, y1 - 1),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.8,
            (0, 255, 0),
            1,
            cv2.LINE_AA,
        )

    cv2.imshow("frame", frame)

    if cv2.waitKey(1) == 27:
        break

cv2.destroyAllWindows()


In [14]:
import sys
import numpy as np
import cv2


# 모델 & 설정 파일
model = "yolo_v3/yolov3.weights"
config = "yolo_v3/yolov3.cfg"
class_labels = "yolo_v3/coco.names"
confThreshold = 0.5
nmsThreshold = 0.4

# 테스트 이미지 파일
img_files = [
    r"C:\Users\UserK\projects\Python\20250425\yolo_v3\dog.jpg",
    r"C:\Users\UserK\projects\Python\20250425\yolo_v3\person.jpg",
    r"C:\Users\UserK\projects\Python\20250425\yolo_v3\sheep.jpg",
    r"C:\Users\UserK\projects\Python\20250425\yolo_v3\beagle.jpg",
]

# 네트워크 생성
net = cv2.dnn.readNet(model, config)

if net.empty():
    print("Net open failed!")
    sys.exit()

# 클래스 이름 불러오기

classes = []
with open(class_labels, "rt") as f:
    classes = f.read().rstrip("\n").split("\n")

colors = np.random.uniform(0, 255, size=(len(classes), 3))

# 출력 레이어 이름 받아오기

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
# output_layers = ['yolo_82', 'yolo_94', 'yolo_106']

# 실행

for f in img_files:
    img = cv2.imread(f)

    if img is None:
        continue

    # 블롭 생성 & 추론
    blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (320, 320), swapRB=True)
    net.setInput(blob)
    outs = net.forward(output_layers)

    # outs는 3개의 ndarray 리스트.
    # outs[0].shape=(507, 85), 13*13*3=507
    # outs[1].shape=(2028, 85), 26*26*3=2028
    # outs[2].shape=(8112, 85), 52*52*3=8112

    h, w = img.shape[:2]

    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            # detection: 4(bounding box) + 1(objectness_score) + 80(class confidence)
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > confThreshold:
                # 바운딩 박스 중심 좌표 & 박스 크기
                cx = int(detection[0] * w)
                cy = int(detection[1] * h)
                bw = int(detection[2] * w)
                bh = int(detection[3] * h)

                # 바운딩 박스 좌상단 좌표
                sx = int(cx - bw / 2)
                sy = int(cy - bh / 2)

                boxes.append([sx, sy, bw, bh])
                confidences.append(float(confidence))
                class_ids.append(int(class_id))

    # 비최대 억제
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)

    for i in indices:
        sx, sy, bw, bh = boxes[i]
        label = f"{classes[class_ids[i]]}: {confidences[i]:.2}"
        color = colors[class_ids[i]]
        cv2.rectangle(img, (sx, sy, bw, bh), color, 2)
        cv2.putText(
            img,
            label,
            (sx, sy - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            color,
            2,
            cv2.LINE_AA,
        )

    t, _ = net.getPerfProfile()
    label = "Inference time: %.2f ms" % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(
        img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA
    )

    cv2.imshow("img", img)
    cv2.waitKey()

cv2.destroyAllWindows()


In [27]:
import sys
import numpy as np
import cv2


# 모델 & 설정 파일
model = 'openpose/pose_iter_440000.caffemodel'
config = 'openpose/pose_deploy_linevec.prototxt'

# 포즈 점 개수, 점 연결 개수, 연결 점 번호 쌍
nparts = 18
npairs = 17
pose_pairs = [(1, 2), (2, 3), (3, 4),  # 왼팔
              (1, 5), (5, 6), (6, 7),  # 오른팔
              (1, 8), (8, 9), (9, 10),  # 왼쪽다리
              (1, 11), (11, 12), (12, 13),  # 오른쪽다리
              (1, 0), (0, 14), (14, 16), (0, 15), (15, 17)]  # 얼굴

# 테스트 이미지 파일
img_files = ['pose1.jpg', 'pose2.jpg', 'pose3.jpg']

# 네트워크 생성
net = cv2.dnn.readNet(model, config)

if net.empty():
    print('Net open failed!')
    sys.exit()

for f in img_files:
    img = cv2.imread(f)

    if img is None:
        continue

    # 블롭 생성 & 추론
    blob = cv2.dnn.blobFromImage(img, 1/255., (368, 368))
    net.setInput(blob)
    out = net.forward()  # out.shape=(1, 57, 46, 46)

    h, w = img.shape[:2]

    # 검출된 점 추출
    points = []
    for i in range(nparts):
        heatMap = out[0, i, :, :]

        '''
        heatImg = cv2.normalize(heatMap, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
        heatImg = cv2.resize(heatImg, (w, h))
        heatImg = cv2.cvtColor(heatImg, cv2.COLOR_GRAY2BGR)
        heatImg = cv2.addWeighted(img, 0.5, heatImg, 0.5, 0)
        cv2.imshow('heatImg', heatImg)
        cv2.waitKey()
        '''

        _, conf, _, point = cv2.minMaxLoc(heatMap)
        x = int(w * point[0] / out.shape[3])
        y = int(h * point[1] / out.shape[2])

        points.append((x, y) if conf > 0.1 else None)  # heat map threshold=0.1

    # 검출 결과 영상 만들기
    for pair in pose_pairs:
        p1 = points[pair[0]]
        p2 = points[pair[1]]

        if p1 is None or p2 is None:
            continue

        cv2.line(img, p1, p2, (0, 255, 0), 3, cv2.LINE_AA)
        cv2.circle(img, p1, 4, (0, 0, 255), -1, cv2.LINE_AA)
        cv2.circle(img, p2, 4, (0, 0, 255), -1, cv2.LINE_AA)

    # 추론 시간 출력
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(img, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                0.7, (0, 0, 255), 1, cv2.LINE_AA)

    cv2.imshow('img', img)
    cv2.waitKey()

cv2.destroyAllWindows()


In [29]:
import sys
import math
import numpy as np
import cv2


def decode(scores, geometry, scoreThreshold):
    detections = []
    confidences = []

    # geometry.shape=(1, 5, 80, 80)
    # scores.shape=(1, 1, 80, 80)

    height = scores.shape[2]
    width = scores.shape[3]

    for y in range(0, height):
        # Extract data from scores
        scoresData = scores[0][0][y]
        x0_data = geometry[0][0][y]
        x1_data = geometry[0][1][y]
        x2_data = geometry[0][2][y]
        x3_data = geometry[0][3][y]
        anglesData = geometry[0][4][y]

        for x in range(0, width):
            score = scoresData[x]

            if(score < scoreThreshold):
                continue

            # feature map은 320x320 블롭의 1/4 크기이므로, 다시 4배 확대
            offsetX = x * 4.0
            offsetY = y * 4.0
            angle = anglesData[x]

            # (offsetX, offsetY) 위치에서 회전된 사각형 정보 추출
            cosA = math.cos(angle)
            sinA = math.sin(angle)
            h = x0_data[x] + x2_data[x]
            w = x1_data[x] + x3_data[x]

            # 회전된 사각형의 한쪽 모서리 점 좌표 계산
            offset = ([offsetX + cosA * x1_data[x] + sinA * x2_data[x],
                       offsetY - sinA * x1_data[x] + cosA * x2_data[x]])

            # 회전된 사각형의 대각선에 위치한 두 모서리 점 좌표 계산
            p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
            p3 = (-cosA * w + offset[0],  sinA * w + offset[1])
            center = ((p1[0]+p3[0])/2, (p1[1]+p3[1])/2)

            detections.append((center, (w, h), -1*angle * 180.0 / math.pi))
            confidences.append(float(score))

    return [detections, confidences]


# 모델 & 설정 파일
model = 'EAST/frozen_east_text_detection.pb'
confThreshold = 0.5
nmsThreshold = 0.4

# 테스트 이미지 파일
img_files = ['road_closed.jpg', 'patient.jpg', 'copy_center.jpg']

# 네트워크 생성
net = cv2.dnn.readNet(model)

if net.empty():
    print('Net open failed!')
    sys.exit()

# 출력 레이어 이름 받아오기
'''
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
print(output_layers)
'''

# 실행

for f in img_files:
    img = cv2.imread(f)

    if img is None:
        continue

    # 블롭 생성 & 추론
    blob = cv2.dnn.blobFromImage(img, 1, (320, 320), (123.68, 116.78, 103.94), True)
    net.setInput(blob)
    scores, geometry = net.forward(['feature_fusion/Conv_7/Sigmoid', 'feature_fusion/concat_3'])

    # scores.shape=(1, 1, 80, 80)
    # geometry.shape=(1, 5, 80, 80)

    # score가 confThreshold보다 큰 RBOX 정보를 RotatedRect 형식으로 변환하여 반환
    [boxes, confidences] = decode(scores, geometry, confThreshold)

    # 회전된 사각형에 대한 비최대 억제
    indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold)

    rw = img.shape[1] / 320
    rh = img.shape[0] / 320

    for i in indices:
        # 회전된 사각형의 네 모서리 점 좌표 계산 & 표시
        vertices = cv2.boxPoints(boxes[i])

        for j in range(4):
            vertices[j][0] *= rw
            vertices[j][1] *= rh

        for j in range(4):
            # p1 = (vertices[j][0], vertices[j][1])
            # p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
            p1 = (int(vertices[j][0]), int(vertices[j][1]))
            p2 = (int(vertices[(j + 1) % 4][0]), int(vertices[(j + 1) % 4][1]))
            cv2.line(img, p1, p2, (0, 0, 255), 2, cv2.LINE_AA)

    cv2.imshow('img', img)
    cv2.waitKey()

cv2.destroyAllWindows()