update README.md
Browse files
README.md
CHANGED
@@ -151,9 +151,9 @@ model_inputs = tokenizer([prompt_text], return_tensors="pt").to(device)
|
|
151 |
|
152 |
model_outputs = model.generate(
|
153 |
**model_inputs,
|
154 |
-
max_new_tokens=
|
155 |
-
top_p=0.
|
156 |
-
temperature=0.
|
157 |
)
|
158 |
output_token_ids = [
|
159 |
model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs['input_ids']))
|
@@ -242,8 +242,8 @@ response = client.chat.completions.create(
|
|
242 |
messages=[
|
243 |
{"role": "user", "content": "Write an article about Artificial Intelligence."},
|
244 |
],
|
245 |
-
temperature=0.
|
246 |
-
max_tokens=
|
247 |
)
|
248 |
|
249 |
print(response.choices[0].message.content)
|
@@ -263,7 +263,7 @@ from transformers import AutoTokenizer
|
|
263 |
from vllm import LLM, SamplingParams
|
264 |
|
265 |
model_name = "openbmb/MiniCPM4.1-8B"
|
266 |
-
prompt = [{"role": "user", "content": "
|
267 |
|
268 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
269 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
@@ -275,7 +275,7 @@ llm = LLM(
|
|
275 |
dtype="bfloat16",
|
276 |
gpu_memory_utilization=0.8,
|
277 |
)
|
278 |
-
sampling_params = SamplingParams(top_p=0.
|
279 |
|
280 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
281 |
|
@@ -301,8 +301,8 @@ response = client.chat.completions.create(
|
|
301 |
messages=[
|
302 |
{"role": "user", "content": "Write an article about Artificial Intelligence."},
|
303 |
],
|
304 |
-
temperature=0.
|
305 |
-
max_tokens=
|
306 |
extra_body=dict(add_special_tokens=True), # Ensures special tokens are added for chat template
|
307 |
|
308 |
)
|
|
|
151 |
|
152 |
model_outputs = model.generate(
|
153 |
**model_inputs,
|
154 |
+
max_new_tokens=32768,
|
155 |
+
top_p=0.95,
|
156 |
+
temperature=0.6
|
157 |
)
|
158 |
output_token_ids = [
|
159 |
model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs['input_ids']))
|
|
|
242 |
messages=[
|
243 |
{"role": "user", "content": "Write an article about Artificial Intelligence."},
|
244 |
],
|
245 |
+
temperature=0.6,
|
246 |
+
max_tokens=32768,
|
247 |
)
|
248 |
|
249 |
print(response.choices[0].message.content)
|
|
|
263 |
from vllm import LLM, SamplingParams
|
264 |
|
265 |
model_name = "openbmb/MiniCPM4.1-8B"
|
266 |
+
prompt = [{"role": "user", "content": "Write an article about Artificial Intelligence."}]
|
267 |
|
268 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
269 |
input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
|
|
275 |
dtype="bfloat16",
|
276 |
gpu_memory_utilization=0.8,
|
277 |
)
|
278 |
+
sampling_params = SamplingParams(top_p=0.95, temperature=0.6, max_tokens=32768)
|
279 |
|
280 |
outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
|
281 |
|
|
|
301 |
messages=[
|
302 |
{"role": "user", "content": "Write an article about Artificial Intelligence."},
|
303 |
],
|
304 |
+
temperature=0.6,
|
305 |
+
max_tokens=32768,
|
306 |
extra_body=dict(add_special_tokens=True), # Ensures special tokens are added for chat template
|
307 |
|
308 |
)
|