CLIPseg
===

### Image Segmentation Using Text and Image Prompts
 * Paper: https://arxiv.org/abs/2112.10003

![CLIPseg](../assets/clipseg-architecture.png)

```bash
pip install torch torchvision
pip install transformers
pip install matplotlib
```

In [2]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torchvision
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation

device = "cuda" if torch.cuda.is_available() else "cpu"
processor = CLIPSegProcessor.from_pretrained(
    "CIDAS/clipseg-rd64-refined"
)
model = CLIPSegForImageSegmentation.from_pretrained(
    "CIDAS/clipseg-rd64-refined"
).to(device)
model.eval();

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
def inference(image, prompts, device="cpu"):
    inputs = processor(
        text=prompts, images=[image] * len(prompts),
        padding="max_length", return_tensors="pt"
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
    preds = outputs.logits.unsqueeze(1)

    return preds