End of training

Browse files

Files changed (13) hide show

README.md +24 -17
config.json +16 -15
model.safetensors +2 -2
runs/Jul14_00-45-54_ea68994bd6cb/events.out.tfevents.1752453954.ea68994bd6cb.9476.14 +3 -0
runs/Jul14_00-48-05_ea68994bd6cb/events.out.tfevents.1752454085.ea68994bd6cb.9476.15 +3 -0
runs/Jul14_00-55-40_ea68994bd6cb/events.out.tfevents.1752454541.ea68994bd6cb.34315.0 +3 -0
runs/Jul14_00-56-20_ea68994bd6cb/events.out.tfevents.1752454581.ea68994bd6cb.34315.1 +3 -0
runs/Jul14_00-56-37_ea68994bd6cb/events.out.tfevents.1752454599.ea68994bd6cb.34315.2 +3 -0
runs/Jul14_00-57-49_ea68994bd6cb/events.out.tfevents.1752454675.ea68994bd6cb.34315.3 +3 -0
runs/Jul14_01-01-07_ea68994bd6cb/events.out.tfevents.1752454868.ea68994bd6cb.34315.4 +3 -0
special_tokens_map.json +35 -5
tokenizer_config.json +10 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
 license: apache-2.0
-base_model: distilbert/distilbert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
@@ -19,13 +19,13 @@ should probably proofread and complete it, then remove this comment. -->
 # my_awesome_wnut_model
-This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1176
-- Precision: 0.3223
-- Recall: 0.2889
-- F1: 0.3047
-- Accuracy: 0.9671
 ## Model description
@@ -44,26 +44,33 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 2e-05
-- train_batch_size: 16
-- eval_batch_size: 16
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
-- num_epochs: 3
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
-|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
-| No log        | 1.0   | 37   | 0.1255          | 0.2105    | 0.1778 | 0.1928 | 0.9676   |
-| No log        | 2.0   | 74   | 0.1204          | 0.2881    | 0.2519 | 0.2688 | 0.9669   |
-| No log        | 3.0   | 111  | 0.1176          | 0.3223    | 0.2889 | 0.3047 | 0.9671   |
 ### Framework versions
-- Transformers 4.48.0
 - Pytorch 2.6.0+cu124
 - Datasets 2.14.4
 - Tokenizers 0.21.2

 ---
 library_name: transformers
 license: apache-2.0
+base_model: sentence-transformers/all-MiniLM-L6-v2
 tags:
 - generated_from_trainer
 metrics:
 # my_awesome_wnut_model
+This model is a fine-tuned version of [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8907
+- Precision: 0.0
+- Recall: 0.0
+- F1: 0.0
+- Accuracy: 0.8995
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 2e-06
+- train_batch_size: 128
+- eval_batch_size: 128
 - seed: 42
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
+- num_epochs: 10
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1  | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:---:|:--------:|
+| No log        | 1.0   | 5    | 1.0442          | 0.0       | 0.0    | 0.0 | 0.7738   |
+| No log        | 2.0   | 10   | 1.0181          | 0.0       | 0.0    | 0.0 | 0.8208   |
+| No log        | 3.0   | 15   | 0.9938          | 0.0       | 0.0    | 0.0 | 0.8437   |
+| No log        | 4.0   | 20   | 0.9709          | 0.0       | 0.0    | 0.0 | 0.8601   |
+| No log        | 5.0   | 25   | 0.9496          | 0.0       | 0.0    | 0.0 | 0.8719   |
+| No log        | 6.0   | 30   | 0.9306          | 0.0       | 0.0    | 0.0 | 0.8836   |
+| No log        | 7.0   | 35   | 0.9143          | 0.0       | 0.0    | 0.0 | 0.8915   |
+| No log        | 8.0   | 40   | 0.9018          | 0.0       | 0.0    | 0.0 | 0.8958   |
+| No log        | 9.0   | 45   | 0.8938          | 0.0       | 0.0    | 0.0 | 0.8986   |
+| No log        | 10.0  | 50   | 0.8907          | 0.0       | 0.0    | 0.0 | 0.8995   |
 ### Framework versions
+- Transformers 4.53.2
 - Pytorch 2.6.0+cu124
 - Datasets 2.14.4
 - Tokenizers 0.21.2

config.json CHANGED Viewed

@@ -1,34 +1,35 @@
 {
-  "_name_or_path": "distilbert/distilbert-base-uncased",
-  "activation": "gelu",
   "architectures": [
-    "DistilBertForTokenClassification"
   ],
-  "attention_dropout": 0.1,
-  "dim": 768,
-  "dropout": 0.1,
-  "hidden_dim": 3072,
   "id2label": {
     "0": "O",
     "1": "B-ENTITY",
     "2": "I-ENTITY"
   },
   "initializer_range": 0.02,
   "label2id": {
     "B-ENTITY": 1,
     "I-ENTITY": 2,
     "O": 0
   },
   "max_position_embeddings": 512,
-  "model_type": "distilbert",
-  "n_heads": 12,
-  "n_layers": 6,
   "pad_token_id": 0,
-  "qa_dropout": 0.1,
-  "seq_classif_dropout": 0.2,
-  "sinusoidal_pos_embds": false,
-  "tie_weights_": true,
   "torch_dtype": "float32",
-  "transformers_version": "4.48.0",
   "vocab_size": 30522
 }

 {
   "architectures": [
+    "BertForTokenClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
   "id2label": {
     "0": "O",
     "1": "B-ENTITY",
     "2": "I-ENTITY"
   },
   "initializer_range": 0.02,
+  "intermediate_size": 1536,
   "label2id": {
     "B-ENTITY": 1,
     "I-ENTITY": 2,
     "O": 0
   },
+  "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
   "pad_token_id": 0,
+  "position_embedding_type": "absolute",
   "torch_dtype": "float32",
+  "transformers_version": "4.53.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
   "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14367c497e71388cfebfda2c06e1c5979e87da8d76c449b6d465a3a56ee3df68
-size 265473092

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b1a3091481cf165e1d866d1dcf30e1436da76a2db2c49e272f2e46eb71fc795
+size 90277948

runs/Jul14_00-45-54_ea68994bd6cb/events.out.tfevents.1752453954.ea68994bd6cb.9476.14 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d41b2463975ab8130804a2a0bfe10e869916722d1acad5b8ca1ae5f7d19b4c3
+size 10143

runs/Jul14_00-48-05_ea68994bd6cb/events.out.tfevents.1752454085.ea68994bd6cb.9476.15 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47c20f370d6e62936cebead8a5966f61c791de9ae385d318a934656b9920d440
+size 6485

runs/Jul14_00-55-40_ea68994bd6cb/events.out.tfevents.1752454541.ea68994bd6cb.34315.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f52e7b01085c88669c5dff4b049df204fceb6b2bcb58f0ed2161a570a3ae9f84
+size 5123

runs/Jul14_00-56-20_ea68994bd6cb/events.out.tfevents.1752454581.ea68994bd6cb.34315.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c6909059bffc6397a204e458bb3dd9ccd80d357c969fb122d9d17b198d0da17
+size 6499

runs/Jul14_00-56-37_ea68994bd6cb/events.out.tfevents.1752454599.ea68994bd6cb.34315.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12ad725266dcee3deae09f3989f7d44cef3eb0719e5df845bb3e382866f4e180
+size 11681

runs/Jul14_00-57-49_ea68994bd6cb/events.out.tfevents.1752454675.ea68994bd6cb.34315.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deabf3935b11890a3b14027e1ef8474ec99f4d19593ffaca1536cfa1c95d1b46
+size 10107

runs/Jul14_01-01-07_ea68994bd6cb/events.out.tfevents.1752454868.ea68994bd6cb.34315.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae246d8b13dbeaf822b33b86a9365cb9960d15419b9e19231e80defcf3298d31
+size 10090

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,37 @@
 {
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
 }

 {
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -43,14 +43,23 @@
   },
   "clean_up_tokenization_spaces": false,
   "cls_token": "[CLS]",
   "do_lower_case": true,
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
-  "tokenizer_class": "DistilBertTokenizer",
   "unk_token": "[UNK]"
 }

   },
   "clean_up_tokenization_spaces": false,
   "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
   "do_lower_case": true,
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
+  "max_length": 128,
   "model_max_length": 512,
+  "never_split": null,
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f00798244c45c344061dad147cef044f59f7ffa8b0c7c653f2cda5268d1c6844
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:dea1ef3657ad8b2713f144d2e7399de9c0a02c57643b05cb494c1d51431e60e8
 size 5368