ibm-granite
/

granite-3b-code-instruct-2k

Text Generation

text-generation-inference

Model card Files Files and versions

mayank-mishra commited on May 6, 2024

Commit

237b6a5

·

verified ·

1 Parent(s): 2edb2bd

update example

Files changed (1) hide show

README.md +8 -5

README.md CHANGED Viewed

@@ -226,25 +226,28 @@ This is a simple example of how to use **Granite-3B-Code-Instruct** model.
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # or "cpu"
-model_path = "ibm-granite/granite-3B-code-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 # drop device_map if running on CPU
 model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
 model.eval()
 # change input text as desired
-input_text = "Write a code to find the maximum value in a list of numbers. The list can contain both positive and negative numbers, and the maximum value can be either a positive or negative number."
 # tokenize the text
-input_tokens = tokenizer(input_text, return_tensors="pt")
 # transfer tokenized inputs to the device
 for i in input_tokens:
     input_tokens[i] = input_tokens[i].to(device)
 # generate output tokens
-output = model.generate(**input_tokens)
 # decode output tokens into text
 output = tokenizer.batch_decode(output)
 # loop over the batch to print, in this example the batch size is 1
 for i in output:
-    print(output)
 ```
 <!-- TO DO: Check this part -->

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # or "cpu"
+model_path = "granite-8b-code-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 # drop device_map if running on CPU
 model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
 model.eval()
 # change input text as desired
+chat = [
+    { "role": "user", "content": "Write a code to find the maximum value in a list of numbers." },
+]
+chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
 # tokenize the text
+input_tokens = tokenizer(chat, return_tensors="pt")
 # transfer tokenized inputs to the device
 for i in input_tokens:
     input_tokens[i] = input_tokens[i].to(device)
 # generate output tokens
+output = model.generate(**input_tokens, max_new_tokens=100)
 # decode output tokens into text
 output = tokenizer.batch_decode(output)
 # loop over the batch to print, in this example the batch size is 1
 for i in output:
+    print(i)
 ```
 <!-- TO DO: Check this part -->