Update README.md
Browse files
README.md
CHANGED
@@ -5,8 +5,9 @@ license: mit
|
|
5 |
# 🧠 AlphaMed
|
6 |
|
7 |
This is the official model checkpoint for the paper:
|
8 |
-
**[AlphaMed: Incentivizing Medical Reasoning with
|
9 |
-
AlphaMed is a medical large language model trained **without supervised fine-tuning
|
|
|
10 |
|
11 |
## 🚀 Usage
|
12 |
|
@@ -21,7 +22,7 @@ To use the model, format your input prompt as:
|
|
21 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
22 |
|
23 |
# Load model and tokenizer
|
24 |
-
model_id = "
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
26 |
model = AutoModelForCausalLM.from_pretrained(model_id)
|
27 |
|
@@ -35,5 +36,6 @@ prompt = (
|
|
35 |
)
|
36 |
|
37 |
# Generate output
|
38 |
-
|
|
|
39 |
print(output)
|
|
|
5 |
# 🧠 AlphaMed
|
6 |
|
7 |
This is the official model checkpoint for the paper:
|
8 |
+
**[AlphaMed: Incentivizing Medical Reasoning with minimalist Rule-Based RL](https://www.arxiv.org/abs/2505.17952)**
|
9 |
+
AlphaMed is a medical large language model trained **without supervised fine-tuning on chain-of-thought (CoT) data**,
|
10 |
+
relying solely on reinforcement learning to elicit step-by-step reasoning in complex medical tasks.
|
11 |
|
12 |
## 🚀 Usage
|
13 |
|
|
|
22 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
23 |
|
24 |
# Load model and tokenizer
|
25 |
+
model_id = "che111/AlphaMed-3B-instruct-rl" # Replace with actual repo path
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
27 |
model = AutoModelForCausalLM.from_pretrained(model_id)
|
28 |
|
|
|
36 |
)
|
37 |
|
38 |
# Generate output
|
39 |
+
max_new_tokens=8196
|
40 |
+
output = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=False)[0]["generated_text"]
|
41 |
print(output)
|