CLIP Model
Creating a customer script to run CLIP on sagemaker.
# clip/code/inference.py
import base64
from io import BytesIO
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
def model_fn(model_dir):
# Load model model_dir
model = CLIPModel.from_pretrained(model_dir)
processor = CLIPProcessor.from_pretrained(model_dir)
return model, processor
def predict_fn(data, model_and_processor):
# destruct model and processor
model, processor = model_and_processor
b64, candidates = data['inputs']['image'],
data['inputs']['candidates']
image = Image.open(BytesIO(base64.b64decode(b64)))
inputs = processor(text=candidates,
images=image,
return_tensors="pt",
padding=True)
outputs = model(**inputs)
# this is the image-text similarity score
logits_per_image = outputs.logits_per_image
# we can take the softmax to get the label probabilities
probs = logits_per_image.softmax(dim=1)
# return dictonary, which will be json serializable
return {"probs": probs}