cpt core 4
Browse files- scripts/cpt_core_model_4.py +16 -6
scripts/cpt_core_model_4.py
CHANGED
|
@@ -33,14 +33,16 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
| 33 |
|
| 34 |
model = FastLanguageModel.get_peft_model(
|
| 35 |
model,
|
| 36 |
-
r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
|
|
|
| 37 |
target_modules=[
|
| 38 |
"q_proj", "k_proj", "v_proj", "o_proj",
|
| 39 |
"gate_proj",
|
| 40 |
"up_proj", "down_proj",
|
| 41 |
"embed_tokens", "lm_head",
|
| 42 |
],
|
| 43 |
-
lora_alpha=32,
|
|
|
|
| 44 |
lora_dropout=0, # Supports any, but = 0 is optimized
|
| 45 |
bias="none", # Supports any, but = "none" is optimized
|
| 46 |
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
|
@@ -73,6 +75,7 @@ def unlsoth_generator():
|
|
| 73 |
|
| 74 |
|
| 75 |
train_dataset = Dataset.from_generator(unlsoth_generator)
|
|
|
|
| 76 |
|
| 77 |
#
|
| 78 |
# trainer
|
|
@@ -86,9 +89,11 @@ from unsloth import UnslothTrainer, UnslothTrainingArguments
|
|
| 86 |
trainer = UnslothTrainer(
|
| 87 |
model=model,
|
| 88 |
tokenizer=tokenizer,
|
| 89 |
-
train_dataset=train_dataset,
|
| 90 |
-
|
|
|
|
| 91 |
dataset_num_proc=32,
|
|
|
|
| 92 |
max_steps=len(litgpt_streaming_dataset),
|
| 93 |
packing=False, # Can make training 5x faster for short sequences.
|
| 94 |
|
|
@@ -112,11 +117,16 @@ trainer = UnslothTrainer(
|
|
| 112 |
seed=23,
|
| 113 |
output_dir=output_dir,
|
| 114 |
report_to='wandb',
|
|
|
|
|
|
|
| 115 |
run_name=run_name,
|
| 116 |
|
| 117 |
do_eval=True,
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
| 120 |
),
|
| 121 |
)
|
| 122 |
|
|
|
|
| 33 |
|
| 34 |
model = FastLanguageModel.get_peft_model(
|
| 35 |
model,
|
| 36 |
+
# r=256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 37 |
+
r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
| 38 |
target_modules=[
|
| 39 |
"q_proj", "k_proj", "v_proj", "o_proj",
|
| 40 |
"gate_proj",
|
| 41 |
"up_proj", "down_proj",
|
| 42 |
"embed_tokens", "lm_head",
|
| 43 |
],
|
| 44 |
+
# lora_alpha=32,
|
| 45 |
+
lora_alpha=16,
|
| 46 |
lora_dropout=0, # Supports any, but = 0 is optimized
|
| 47 |
bias="none", # Supports any, but = "none" is optimized
|
| 48 |
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
train_dataset = Dataset.from_generator(unlsoth_generator)
|
| 78 |
+
dataset = train_dataset.train_test_split(test_size=0.01)
|
| 79 |
|
| 80 |
#
|
| 81 |
# trainer
|
|
|
|
| 89 |
trainer = UnslothTrainer(
|
| 90 |
model=model,
|
| 91 |
tokenizer=tokenizer,
|
| 92 |
+
# train_dataset=train_dataset,
|
| 93 |
+
train_dataset=dataset['train'],
|
| 94 |
+
eval_dataset=dataset['test'],
|
| 95 |
dataset_num_proc=32,
|
| 96 |
+
max_seq_length=max_seq_length,
|
| 97 |
max_steps=len(litgpt_streaming_dataset),
|
| 98 |
packing=False, # Can make training 5x faster for short sequences.
|
| 99 |
|
|
|
|
| 117 |
seed=23,
|
| 118 |
output_dir=output_dir,
|
| 119 |
report_to='wandb',
|
| 120 |
+
|
| 121 |
+
save_steps=100,
|
| 122 |
run_name=run_name,
|
| 123 |
|
| 124 |
do_eval=True,
|
| 125 |
+
fp16_full_eval=True,
|
| 126 |
+
per_device_eval_batch_size=2,
|
| 127 |
+
eval_accumulation_steps=4,
|
| 128 |
+
eval_strategy='steps',
|
| 129 |
+
eval_steps=10,
|
| 130 |
),
|
| 131 |
)
|
| 132 |
|