AnomalyCLIP
======

 **AnomalyCLIP: Object-agnostic Prompt Learning for Zero-shot Anomaly Detection**

* Paper: https://arxiv.org/pdf/2310.18961

![AnomalyCLIP overview](../assets/anomalyclip_overview.png)

```bash
git clone https://github.com/zqhang/AnomalyCLIP.git AnomalyCLIP_repo

conda create --name anomalyclip python=3.10 -y
conda activate anomalyclip

pip install -r AnomalyCLIP_repo/requirements.txt 
pip install thop ftfy regex tabulate opencv-python

pip install "numpy<2"
```

In [None]:
import os
import sys
import random
import argparse

import torch
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np
from tabulate import tabulate
from scipy.ndimage import gaussian_filter

sys.path.append("AnomalyCLIP_repo")
import AnomalyCLIP_lib
from prompt_ensemble import AnomalyCLIP_PromptLearner
from loss import FocalLoss, BinaryDiceLoss
from utils import normalize
from dataset import Dataset
from logger import get_logger
from utils import get_transform

def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

from visualization import visualizer
from metrics import image_level_metrics, pixel_level_metrics




In [None]:
n_ctx = 12
depth = 9
t_n_ctx = 4
image_size = 518
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

AnomalyCLIP_parameters = {
    "Prompt_length": n_ctx,
    "learnabel_text_embedding_depth": depth,
    "learnabel_text_embedding_length": t_n_ctx
}

model, _ = AnomalyCLIP_lib.load(
    "ViT-L/14@336px",
    device=device,
    design_details=AnomalyCLIP_parameters
)
model.eval();


name ViT-L/14@336px
text_layer False
text_layer True


In [None]:
import torchvision.transforms as transforms
from AnomalyCLIP_lib.transform import image_transform
from AnomalyCLIP_lib.constants import (
    OPENAI_DATASET_MEAN, OPENAI_DATASET_STD
)

def get_transform(image_size,):
    preprocess = image_transform(
        image_size,
        is_train=False,
        mean=OPENAI_DATASET_MEAN,
        std=OPENAI_DATASET_STD
    )
    target_transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.CenterCrop(image_size),
        transforms.ToTensor()
    ])
    preprocess.transforms[0] = transforms.Resize(
        size=(image_size, image_size),
        interpolation=transforms.InterpolationMode.BICUBIC,
        max_size=None,
        antialias=None
    )
    preprocess.transforms[1] = transforms.CenterCrop(
        size=(image_size, image_size)
    )
    return preprocess, target_transform


preprocess, target_transform = get_transform(image_size)