以下是简短的示例,演示如何通过 Hugging Face Transformers 和阿里云 DashScope API 使用 QwQ-32B。
from transformers import AutoModelForCausalLM, AutoTokenizer model_name = "Qwen/QwQ-32B" model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype="auto", device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained(model_name) prompt = "How many r's are in the word \"strawberry\"" messages = [ {"role": "user", "content": prompt} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = tokenizer([text], return_tensors="pt").to(model.device) generated_ids = model.generate( **model_inputs, max_new_tokens=32768 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] print(response)
from openai import OpenAI import os # Initialize OpenAI client client = OpenAI( # If the environment variable is not configured, replace with your API Key: api_key="sk-xxx" # How to get an API Key:https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key api_key=os.getenv("DASHSCOPE_API_KEY"), base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" ) reasoning_content = "" content = "" is_answering = False completion = client.chat.completions.create( model="qwq-32b", messages=[ {"role": "user", "content": "Which is larger, 9.9 or 9.11?"} ], stream=True, # Uncomment the following line to return token usage in the last chunk # stream_options={ # "include_usage": True # } ) print("\n" + "=" * 20 + "reasoning content" + "=" * 20 + "\n") for chunk in completion: # If chunk.choices is empty, print usage if not chunk.choices: print("\nUsage:") print(chunk.usage) else: delta = chunk.choices[0].delta # Print reasoning content if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None: print(delta.reasoning_content, end='', flush=True) reasoning_content += delta.reasoning_content else: if delta.content != "" and is_answering is False: print("\n" + "=" * 20 + "content" + "=" * 20 + "\n") is_answering = True # Print content print(delta.content, end='', flush=True) content += delta.content