In [4]:
from transformers import ViTImageProcessor, ViTForImageClassification
from PIL import Image
import requests

url = 'https://images.squarespace-cdn.com/content/v1/5704c8071d07c094a3a1a6c7/1517310672216-MXQVELTMZUICMD0KO5HW/Shanghai+Cyclists?format=2500w'
image = Image.open(requests.get(url, stream=True).raw)

processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", model.config.id2label[predicted_class_idx])

Predicted class: moped


In [5]:
from transformers import AutoImageProcessor, ResNetForImageClassification
import torch
from datasets import load_dataset

dataset = load_dataset("huggingface/cats-image")
image = dataset["test"]["image"][0]

processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")

inputs = processor(image, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

# model predicts one of the 1000 ImageNet classes
predicted_label = logits.argmax(-1).item()
print(model.config.id2label[predicted_label])

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
Downloading builder script: 100%|██████████████████████████████████████| 2.56k/2.56k [00:00<00:00, 6.28MB/s]
Downloading data: 100%|███████████████████████████████████████████████████| 173k/173k [00:00<00:00, 381kB/s]
Generating test split: 1 examples [00:00, 39.77 examples/s]


tiger cat


In [2]:
pip install opencv-python


Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python
  Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Downloading opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl (54.8 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.8/54.8 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m0m eta [36m0:00:01[0m[36m0:00:01[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import cv2


In [5]:
cap = cv2.VideoCapture('drum.mp4')
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()
frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    frame_count += 1
    # 保存帧到文件
    cv2.imwrite(f'frame_{frame_count}.jpg', frame)
cap.release()
cv2.destroyAllWindows()


In [11]:
import cv2

# 读取图像
image = cv2.imread('frame_1.jpg')  # 替换为你要处理的图像文件的路径

# 检查图像是否成功读取
if image is None:
    print("无法读取图像，请检查文件是否存在或文件路径是否正确。")
else:
    # 将图像转换为灰度图像
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 使用Canny边缘检测算法，设置低阈值和高阈值
    edges = cv2.Canny(gray_image, 100, 200)  # 可以根据实际情况调整这两个阈值
    
    # 显示原始图像
    cv2.imshow('Original Image', image)
    # 显示边缘检测后的图像
    cv2.imshow('Edges', edges)
    
    # 等待用户按键，然后关闭窗口
    cv2.waitKey(0)
    cv2.destroyAllWindows()



In [12]:
from transformers import YolosImageProcessor, YolosForObjectDetection
from PIL import Image
import torch
import requests

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

model = YolosForObjectDetection.from_pretrained('hustvl/yolos-tiny')
image_processor = YolosImageProcessor.from_pretrained("hustvl/yolos-tiny")

inputs = image_processor(images=image, return_tensors="pt")
outputs = model(**inputs)

# model predicts bounding boxes and corresponding COCO classes
logits = outputs.logits
bboxes = outputs.pred_boxes


# print results
target_sizes = torch.tensor([image.size[::-1]])
results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[0]
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
        f"Detected {model.config.id2label[label.item()]} with confidence "
        f"{round(score.item(), 3)} at location {box}"
    )

  from .autonotebook import tqdm as notebook_tqdm


Detected remote with confidence 0.991 at location [46.48, 72.78, 178.98, 119.3]
Detected remote with confidence 0.908 at location [336.48, 79.27, 368.23, 192.36]
Detected cat with confidence 0.934 at location [337.19, 18.06, 638.14, 373.09]
Detected cat with confidence 0.979 at location [10.93, 53.74, 313.41, 470.67]
Detected remote with confidence 0.974 at location [41.63, 72.23, 178.09, 119.99]
