BigDong commited on
Commit
10cd12a
·
1 Parent(s): 5914bfe

update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -9
README.md CHANGED
@@ -151,9 +151,9 @@ model_inputs = tokenizer([prompt_text], return_tensors="pt").to(device)
151
 
152
  model_outputs = model.generate(
153
  **model_inputs,
154
- max_new_tokens=8192,
155
- top_p=0.7,
156
- temperature=0.7
157
  )
158
  output_token_ids = [
159
  model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs['input_ids']))
@@ -242,8 +242,8 @@ response = client.chat.completions.create(
242
  messages=[
243
  {"role": "user", "content": "Write an article about Artificial Intelligence."},
244
  ],
245
- temperature=0.7,
246
- max_tokens=8192,
247
  )
248
 
249
  print(response.choices[0].message.content)
@@ -263,7 +263,7 @@ from transformers import AutoTokenizer
263
  from vllm import LLM, SamplingParams
264
 
265
  model_name = "openbmb/MiniCPM4.1-8B"
266
- prompt = [{"role": "user", "content": "Please recommend 5 tourist attractions in Beijing."}]
267
 
268
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
269
  input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
@@ -275,7 +275,7 @@ llm = LLM(
275
  dtype="bfloat16",
276
  gpu_memory_utilization=0.8,
277
  )
278
- sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=8192, repetition_penalty=1.02)
279
 
280
  outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
281
 
@@ -301,8 +301,8 @@ response = client.chat.completions.create(
301
  messages=[
302
  {"role": "user", "content": "Write an article about Artificial Intelligence."},
303
  ],
304
- temperature=0.7,
305
- max_tokens=1024,
306
  extra_body=dict(add_special_tokens=True), # Ensures special tokens are added for chat template
307
 
308
  )
 
151
 
152
  model_outputs = model.generate(
153
  **model_inputs,
154
+ max_new_tokens=32768,
155
+ top_p=0.95,
156
+ temperature=0.6
157
  )
158
  output_token_ids = [
159
  model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs['input_ids']))
 
242
  messages=[
243
  {"role": "user", "content": "Write an article about Artificial Intelligence."},
244
  ],
245
+ temperature=0.6,
246
+ max_tokens=32768,
247
  )
248
 
249
  print(response.choices[0].message.content)
 
263
  from vllm import LLM, SamplingParams
264
 
265
  model_name = "openbmb/MiniCPM4.1-8B"
266
+ prompt = [{"role": "user", "content": "Write an article about Artificial Intelligence."}]
267
 
268
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
269
  input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
 
275
  dtype="bfloat16",
276
  gpu_memory_utilization=0.8,
277
  )
278
+ sampling_params = SamplingParams(top_p=0.95, temperature=0.6, max_tokens=32768)
279
 
280
  outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
281
 
 
301
  messages=[
302
  {"role": "user", "content": "Write an article about Artificial Intelligence."},
303
  ],
304
+ temperature=0.6,
305
+ max_tokens=32768,
306
  extra_body=dict(add_special_tokens=True), # Ensures special tokens are added for chat template
307
 
308
  )