deb101 commited on
Commit
246ab83
·
verified ·
1 Parent(s): a91b333

Model save

Browse files
Files changed (4) hide show
  1. README.md +10 -10
  2. config.json +20 -17
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
- license: mit
4
- base_model: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,10 +14,10 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # mistral-7b-instruct-v0.3-mimic4-adapt-l2r
16
 
17
- This model is a fine-tuned version of [microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract](https://huggingface.co/microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Ndcg: 0.9475
20
- - Loss: 2109.9384
21
 
22
  ## Model description
23
 
@@ -49,11 +49,11 @@ The following hyperparameters were used during training:
49
 
50
  ### Training results
51
 
52
- | Training Loss | Epoch | Step | Ndcg | Validation Loss |
53
- |:-------------:|:-----:|:----:|:------:|:---------------:|
54
- | 8815.8352 | 1.0 | 10 | 0.9466 | 2171.9216 |
55
- | 8678.457 | 2.0 | 20 | 0.9472 | 2132.7099 |
56
- | 8569.9031 | 3.0 | 30 | 0.9475 | 2109.9384 |
57
 
58
 
59
  ### Framework versions
 
1
  ---
2
  library_name: transformers
3
+ license: apache-2.0
4
+ base_model: mistralai/Mistral-7B-Instruct-v0.3
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # mistral-7b-instruct-v0.3-mimic4-adapt-l2r
16
 
17
+ This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Ndcg: 0.9589
20
+ - Loss: 2196.0439
21
 
22
  ## Model description
23
 
 
49
 
50
  ### Training results
51
 
52
+ | Training Loss | Epoch | Step | Ndcg | Validation Loss |
53
+ |:-------------:|:------:|:----:|:------:|:---------------:|
54
+ | No log | 1.0 | 4 | 0.9586 | 2231.4878 |
55
+ | No log | 2.0 | 8 | 0.9589 | 2196.0439 |
56
+ | No log | 2.3077 | 9 | 0.9589 | 2196.0439 |
57
 
58
 
59
  ### Framework versions
config.json CHANGED
@@ -1,24 +1,23 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
4
  "architectures": [
5
- "BertForMaskedLM"
6
  ],
7
- "attention_probs_dropout_prob": 0.1,
8
- "classifier_dropout": null,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
 
12
  "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "layer_norm_eps": 1e-12,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
  "num_labels": 7942,
20
- "pad_token_id": 0,
21
- "position_embedding_type": "absolute",
22
  "quantization_config": {
23
  "_load_in_4bit": true,
24
  "_load_in_8bit": false,
@@ -34,8 +33,12 @@
34
  "load_in_8bit": false,
35
  "quant_method": "bitsandbytes"
36
  },
 
 
 
 
 
37
  "transformers_version": "4.49.0",
38
- "type_vocab_size": 2,
39
  "use_cache": true,
40
- "vocab_size": 30522
41
  }
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
4
  "architectures": [
5
+ "MistralForCausalLM"
6
  ],
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 32768,
16
+ "model_type": "mistral",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 8,
20
  "num_labels": 7942,
 
 
21
  "quantization_config": {
22
  "_load_in_4bit": true,
23
  "_load_in_8bit": false,
 
33
  "load_in_8bit": false,
34
  "quant_method": "bitsandbytes"
35
  },
36
+ "rms_norm_eps": 1e-05,
37
+ "rope_theta": 1000000.0,
38
+ "sliding_window": null,
39
+ "tie_word_embeddings": false,
40
+ "torch_dtype": "bfloat16",
41
  "transformers_version": "4.49.0",
 
42
  "use_cache": true,
43
+ "vocab_size": 32768
44
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:942e579bc3b5110b83d56abe7d4321f5c7ea5e8750e302683aab366f40475ff5
3
- size 125319068
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c54c8c4c988abed597571362cd60ba8c227dcb2731c01f2f778b29409e5bc2b
3
+ size 4323007331
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:284b8ae85e58704ceaa48598daabc81df9d387320a8582fd19e360e6578f2f8a
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf7c1660c8340ddd8a5681cb8db680eae54e303e4654882f288171462b81b370
3
  size 5432