! pip install ftfy regex tqdm ! pip install git+https://github.com/openai/CLIP.gitimport numpy as np import torch from pkg_resources import packaging
print("Torch version:", torch.__version__)
加载模型
import clipclip.available\_models\(\) # it will list the names of available CLIP modelsmodel, preprocess = clip.load\("ViT-B/32"\) model.cuda\(\).eval\(\) input\_resolution = model.visual.input\_resolution context\_length = model.context\_length vocab\_size = model.vocab\_size
print\("Model parameters:", f"\{np.sum\(\[int\(np.prod\(p.shape\)\) for p in model.parameters\(\)\]\):,\}"\) print\("Input resolution:", input\_resolution\) print\("Context length:", context\_length\) print\("Vocab size:", vocab\_size\)
图像预处理
我们将向模型输入8个示例图像及其文本描述,并比较对应特征之间的相似性。
分词器不区分大小写,我们可以自由地给出任何合适的文本描述。
import os import skimage import IPython.display import matplotlib.pyplot as plt from PIL import Image import numpy as np
\# images in skimage to use and their textual descriptions descriptions = \{ "page": "a page of text about segmentation", "chelsea": "a facial photo of a tabby cat", "astronaut": "a portrait of an astronaut with the American flag", "rocket": "a rocket standing on a launchpad", "motorcycle\_right": "a red motorcycle standing in a garage", "camera": "a person looking at a camera on a tripod", "horse": "a black-and-white silhouette of a horse", "coffee": "a cup of coffee on a saucer" \}original\_images = \[\] images = \[\] texts = \[\] plt.figure\(figsize=\(16, 5\)\)
for filename in \[filename for filename in os.listdir\(skimage.data\_dir\) if filename.endswith\(".png"\) or filename.endswith\(".jpg"\)\]: name = os.path.splitext\(filename\)\[0\] if name not in descriptions: continue
plt.figure\(figsize=\(20, 14\)\) plt.imshow\(similarity, vmin=0.1, vmax=0.3\) \# plt.colorbar\(\) plt.yticks\(range\(count\), texts, fontsize=18\) plt.xticks\(\[\]\) for i, image in enumerate\(original\_images\): plt.imshow\(image, extent=\(i - 0.5, i + 0.5, -1.6, -0.6\), origin="lower"\) for x in range\(similarity.shape\[1\]\): for y in range\(similarity.shape\[0\]\): plt.text\(x, y, f"\{similarity\[y, x\]:.2f\}", ha="center", va="center", size=12\)
for side in \["left", "top", "right", "bottom"\]: plt.gca\(\).spines\[side\].set\_visible\(False\)