deb101
/

mistral-7b-instruct-v0.3-mimic4-adapt-l2r

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
-license: mit
-base_model: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract
 tags:
 - generated_from_trainer
 model-index:
@@ -14,10 +14,10 @@ should probably proofread and complete it, then remove this comment. -->
 # mistral-7b-instruct-v0.3-mimic4-adapt-l2r
-This model is a fine-tuned version of [microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract](https://huggingface.co/microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract) on the None dataset.
 It achieves the following results on the evaluation set:
-- Ndcg: 0.9475
-- Loss: 2109.9384
 ## Model description
@@ -49,11 +49,11 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Ndcg   | Validation Loss |
-|:-------------:|:-----:|:----:|:------:|:---------------:|
-| 8815.8352     | 1.0   | 10   | 0.9466 | 2171.9216       |
-| 8678.457      | 2.0   | 20   | 0.9472 | 2132.7099       |
-| 8569.9031     | 3.0   | 30   | 0.9475 | 2109.9384       |
 ### Framework versions

 ---
 library_name: transformers
+license: apache-2.0
+base_model: mistralai/Mistral-7B-Instruct-v0.3
 tags:
 - generated_from_trainer
 model-index:
 # mistral-7b-instruct-v0.3-mimic4-adapt-l2r
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) on the None dataset.
 It achieves the following results on the evaluation set:
+- Ndcg: 0.9589
+- Loss: 2196.0439
 ## Model description
 ### Training results
+| Training Loss | Epoch  | Step | Ndcg   | Validation Loss |
+|:-------------:|:------:|:----:|:------:|:---------------:|
+| No log        | 1.0    | 4    | 0.9586 | 2231.4878       |
+| No log        | 2.0    | 8    | 0.9589 | 2196.0439       |
+| No log        | 2.3077 | 9    | 0.9589 | 2196.0439       |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,24 +1,23 @@
 {
   "_attn_implementation_autoset": true,
-  "_name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
   "architectures": [
-    "BertForMaskedLM"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "num_labels": 7942,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
   "quantization_config": {
     "_load_in_4bit": true,
     "_load_in_8bit": false,
@@ -34,8 +33,12 @@
     "load_in_8bit": false,
     "quant_method": "bitsandbytes"
   },
   "transformers_version": "4.49.0",
-  "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 30522
 }

 {
   "_attn_implementation_autoset": true,
+  "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
   "architectures": [
+    "MistralForCausalLM"
   ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
   "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
   "num_labels": 7942,
   "quantization_config": {
     "_load_in_4bit": true,
     "_load_in_8bit": false,
     "load_in_8bit": false,
     "quant_method": "bitsandbytes"
   },
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
   "transformers_version": "4.49.0",
   "use_cache": true,
+  "vocab_size": 32768
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:942e579bc3b5110b83d56abe7d4321f5c7ea5e8750e302683aab366f40475ff5
-size 125319068

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c54c8c4c988abed597571362cd60ba8c227dcb2731c01f2f778b29409e5bc2b
+size 4323007331

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:284b8ae85e58704ceaa48598daabc81df9d387320a8582fd19e360e6578f2f8a
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf7c1660c8340ddd8a5681cb8db680eae54e303e4654882f288171462b81b370
 size 5432