Model save

Files changed (7) hide show

README.md CHANGED Viewed

@@ -19,8 +19,6 @@ should probably proofread and complete it, then remove this comment. -->
 # gemma7b-kasa-coding-11-v1
 This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the generator dataset.
-It achieves the following results on the evaluation set:
-- Loss: 9.9363
 ## Model description
@@ -55,9 +53,6 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 10.3696       | 1.0   | 140  | 9.9363          |
 ### Framework versions

 # gemma7b-kasa-coding-11-v1
 This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the generator dataset.
 ## Model description
 ### Training results
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -5,7 +5,7 @@
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
-  "init_lora_weights": null,
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,

   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
+  "init_lora_weights": true,
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,

all_results.json CHANGED Viewed

@@ -6,9 +6,9 @@
     "eval_samples_per_second": 122.269,
     "eval_steps_per_second": 2.061,
     "total_flos": 4.268850850782249e+17,
-    "train_loss": 14.163321549551828,
-    "train_runtime": 384.2161,
     "train_samples": 51241,
-    "train_samples_per_second": 46.599,
-    "train_steps_per_second": 0.364
 }

     "eval_samples_per_second": 122.269,
     "eval_steps_per_second": 2.061,
     "total_flos": 4.268850850782249e+17,
+    "train_loss": 0.0,
+    "train_runtime": 0.0104,
     "train_samples": 51241,
+    "train_samples_per_second": 1725286.468,
+    "train_steps_per_second": 13490.846
 }

runs/Nov18_17-29-16_bold-food-flourishes-fin-02/events.out.tfevents.1731951860.bold-food-flourishes-fin-02.93842.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:67123b9f4ad63877e399824e3133b426e5f1d5d6aa0f13a073f5c411afe3fec4
+size 6076

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
     "total_flos": 4.268850850782249e+17,
-    "train_loss": 14.163321549551828,
-    "train_runtime": 384.2161,
     "train_samples": 51241,
-    "train_samples_per_second": 46.599,
-    "train_steps_per_second": 0.364
 }

 {
     "epoch": 1.0,
     "total_flos": 4.268850850782249e+17,
+    "train_loss": 0.0,
+    "train_runtime": 0.0104,
     "train_samples": 51241,
+    "train_samples_per_second": 1725286.468,
+    "train_steps_per_second": 13490.846
 }

trainer_state.json CHANGED Viewed

@@ -211,22 +211,14 @@
       "loss": 10.3696,
       "step": 140
     },
-    {
-      "epoch": 1.0,
-      "eval_loss": 9.936275482177734,
-      "eval_runtime": 1.4505,
-      "eval_samples_per_second": 122.712,
-      "eval_steps_per_second": 2.068,
-      "step": 140
-    },
     {
       "epoch": 1.0,
       "step": 140,
       "total_flos": 4.268850850782249e+17,
-      "train_loss": 14.163321549551828,
-      "train_runtime": 384.2161,
-      "train_samples_per_second": 46.599,
-      "train_steps_per_second": 0.364
     }
   ],
   "logging_steps": 5,

       "loss": 10.3696,
       "step": 140
     },
     {
       "epoch": 1.0,
       "step": 140,
       "total_flos": 4.268850850782249e+17,
+      "train_loss": 0.0,
+      "train_runtime": 0.0104,
+      "train_samples_per_second": 1725286.468,
+      "train_steps_per_second": 13490.846
     }
   ],
   "logging_steps": 5,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ccd51d1b8585c643058bf237174a980d5e3868354205d145e13c0c7e0c9f15a
 size 5816

 version https://git-lfs.github.com/spec/v1
+oid sha256:50695091913ac2106f6db57ae23750875e32e773c2c58db582498b3010055545
 size 5816