DayCardoso commited on
Commit
97ada1b
·
verified ·
1 Parent(s): 6f8fa23

Training in progress, step 200

Browse files
Files changed (3) hide show
  1. config.json +5 -7
  2. model.safetensors +2 -2
  3. training_args.bin +1 -1
config.json CHANGED
@@ -1,10 +1,9 @@
1
  {
2
  "architectures": [
3
- "ModernBERTForMultiLabelMultiHead"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "attention_probs_dropout_prob": 0.1,
8
  "bos_token_id": 50281,
9
  "classifier_activation": "gelu",
10
  "classifier_bias": false,
@@ -18,9 +17,7 @@
18
  "global_attn_every_n_layers": 3,
19
  "global_rope_theta": 160000.0,
20
  "gradient_checkpointing": false,
21
- "hidden_act": "gelu",
22
  "hidden_activation": "gelu",
23
- "hidden_dropout_prob": 0.1,
24
  "hidden_size": 768,
25
  "id2label": {
26
  "0": "Self-direction: thought",
@@ -73,7 +70,7 @@
73
  "max_position_embeddings": 8192,
74
  "mlp_bias": false,
75
  "mlp_dropout": 0.0,
76
- "model_type": "bert",
77
  "norm_bias": false,
78
  "norm_eps": 1e-05,
79
  "num_attention_heads": 12,
@@ -81,10 +78,11 @@
81
  "pad_token_id": 50283,
82
  "position_embedding_type": "absolute",
83
  "problem_type": "multi_label_classification",
 
84
  "sep_token_id": 50282,
 
 
85
  "torch_dtype": "float32",
86
  "transformers_version": "4.53.2",
87
- "type_vocab_size": 2,
88
- "use_cache": true,
89
  "vocab_size": 50389
90
  }
 
1
  {
2
  "architectures": [
3
+ "ModernBertForSequenceClassification"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
7
  "bos_token_id": 50281,
8
  "classifier_activation": "gelu",
9
  "classifier_bias": false,
 
17
  "global_attn_every_n_layers": 3,
18
  "global_rope_theta": 160000.0,
19
  "gradient_checkpointing": false,
 
20
  "hidden_activation": "gelu",
 
21
  "hidden_size": 768,
22
  "id2label": {
23
  "0": "Self-direction: thought",
 
70
  "max_position_embeddings": 8192,
71
  "mlp_bias": false,
72
  "mlp_dropout": 0.0,
73
+ "model_type": "modernbert",
74
  "norm_bias": false,
75
  "norm_eps": 1e-05,
76
  "num_attention_heads": 12,
 
78
  "pad_token_id": 50283,
79
  "position_embedding_type": "absolute",
80
  "problem_type": "multi_label_classification",
81
+ "repad_logits_with_grad": false,
82
  "sep_token_id": 50282,
83
+ "sparse_pred_ignore_index": -100,
84
+ "sparse_prediction": false,
85
  "torch_dtype": "float32",
86
  "transformers_version": "4.53.2",
 
 
87
  "vocab_size": 50389
88
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31304f355e603b21ab83ffccf8d25dfc058655682caedabfef632f011fb9e986
3
- size 546480996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8f1e7e81224bc4057d62ac9f808f5e526fa0ef26cb3f164f346f3c89d77bcff
3
+ size 598556596
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aa546690a1bbc7de455f8fb8c5e8ba1db1ba904581f5c854d3beab2e8ef3db1
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8452647771e4a35c013f6a3592a9449a2aa3b5d25cb5a8cad9ed98c6611c04
3
  size 5432