import
torch
from transformers import AutoTokenizer, AutoModel
model_name_or_path = "codefuse-ai/CodeFuse-CGE-Large"
model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, truncation_side='right', padding_side='right')
if torch.cuda.is_available():
device = 'cuda'
else:
device = 'cpu'
model.to(device)
prefix_dict = {'python':{'query':'Retrieve the Python code that solves the following query:', 'passage':'Python code:'},
'java':{'query':'Retrieve the Java code that solves the following query:', 'passage':'Java code:'},
'go':{'query':'Retrieve the Go code that solves the following query:', 'passage':'Go code:'},
'c++':{'query':'Retrieve the C++ code that solves the following query:', 'passage':'C++ code:'},
'javascript':{'query':'Retrieve the Javascript code that solves the following query:', 'passage':'Javascript code:'},
'php':{'query':'Retrieve the PHP code that solves the following query:', 'passage':'PHP code:'},
'ruby':{'query':'Retrieve the Ruby code that solves the following query:', 'passage':'Ruby code:'},
'default':{'query':'Retrieve the code that solves the following query:', 'passage':'Code:'}
}
text = ["Writes a Boolean to the stream.",
"def writeBoolean(self, n): t = TYPE_BOOL_TRUE if n is False: t = TYPE_BOOL_FALSE self.stream.write(t)"]
text[0] += prefix_dict['python']['query']
text[1] += prefix_dict['python']['passage']
embed = model.encode(tokenizer, text)
score = embed[0] @ embed[1].T
print("score", score)