DayCardoso
/

modernbert-base-multi-head-values-context

Text Classification

Generated from Trainer

Model card Files Files and versions

DayCardoso commited on Jul 17

Commit

97ada1b

·

verified ·

1 Parent(s): 6f8fa23

Training in progress, step 200

Files changed (3) hide show

config.json +5 -7
model.safetensors +2 -2
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -1,10 +1,9 @@
 {
   "architectures": [
-    "ModernBERTForMultiLabelMultiHead"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "attention_probs_dropout_prob": 0.1,
   "bos_token_id": 50281,
   "classifier_activation": "gelu",
   "classifier_bias": false,
@@ -18,9 +17,7 @@
   "global_attn_every_n_layers": 3,
   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
-  "hidden_act": "gelu",
   "hidden_activation": "gelu",
-  "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
     "0": "Self-direction: thought",
@@ -73,7 +70,7 @@
   "max_position_embeddings": 8192,
   "mlp_bias": false,
   "mlp_dropout": 0.0,
-  "model_type": "bert",
   "norm_bias": false,
   "norm_eps": 1e-05,
   "num_attention_heads": 12,
@@ -81,10 +78,11 @@
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "problem_type": "multi_label_classification",
   "sep_token_id": 50282,
   "torch_dtype": "float32",
   "transformers_version": "4.53.2",
-  "type_vocab_size": 2,
-  "use_cache": true,
   "vocab_size": 50389
 }

 {
   "architectures": [
+    "ModernBertForSequenceClassification"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 50281,
   "classifier_activation": "gelu",
   "classifier_bias": false,
   "global_attn_every_n_layers": 3,
   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
   "hidden_activation": "gelu",
   "hidden_size": 768,
   "id2label": {
     "0": "Self-direction: thought",
   "max_position_embeddings": 8192,
   "mlp_bias": false,
   "mlp_dropout": 0.0,
+  "model_type": "modernbert",
   "norm_bias": false,
   "norm_eps": 1e-05,
   "num_attention_heads": 12,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "problem_type": "multi_label_classification",
+  "repad_logits_with_grad": false,
   "sep_token_id": 50282,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
   "torch_dtype": "float32",
   "transformers_version": "4.53.2",
   "vocab_size": 50389
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31304f355e603b21ab83ffccf8d25dfc058655682caedabfef632f011fb9e986
-size 546480996

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8f1e7e81224bc4057d62ac9f808f5e526fa0ef26cb3f164f346f3c89d77bcff
+size 598556596

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6aa546690a1bbc7de455f8fb8c5e8ba1db1ba904581f5c854d3beab2e8ef3db1
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa8452647771e4a35c013f6a3592a9449a2aa3b5d25cb5a8cad9ed98c6611c04
 size 5432