albertvillanova HF Staff commited on
Commit
2379302
·
verified ·
1 Parent(s): 82845b2

Upload Gemma3ForConditionalGeneration (#3)

Browse files

- Upload Gemma3ForConditionalGeneration (5b3dad501ce477b2820f9508a750b990af5df4e7)

Files changed (3) hide show
  1. config.json +2 -34
  2. generation_config.json +1 -1
  3. model.safetensors +1 -1
config.json CHANGED
@@ -3,6 +3,7 @@
3
  "Gemma3ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 255999,
 
6
  "eoi_token_index": 256000,
7
  "eos_token_id": [
8
  1,
@@ -24,38 +25,6 @@
24
  "initializer_range": 0.02,
25
  "intermediate_size": 10240,
26
  "layer_types": [
27
- "sliding_attention",
28
- "sliding_attention",
29
- "sliding_attention",
30
- "sliding_attention",
31
- "sliding_attention",
32
- "full_attention",
33
- "sliding_attention",
34
- "sliding_attention",
35
- "sliding_attention",
36
- "sliding_attention",
37
- "sliding_attention",
38
- "full_attention",
39
- "sliding_attention",
40
- "sliding_attention",
41
- "sliding_attention",
42
- "sliding_attention",
43
- "sliding_attention",
44
- "full_attention",
45
- "sliding_attention",
46
- "sliding_attention",
47
- "sliding_attention",
48
- "sliding_attention",
49
- "sliding_attention",
50
- "full_attention",
51
- "sliding_attention",
52
- "sliding_attention",
53
- "sliding_attention",
54
- "sliding_attention",
55
- "sliding_attention",
56
- "full_attention",
57
- "sliding_attention",
58
- "sliding_attention",
59
  "sliding_attention",
60
  "sliding_attention"
61
  ],
@@ -76,8 +45,7 @@
76
  "use_cache": true,
77
  "vocab_size": 262208
78
  },
79
- "torch_dtype": "bfloat16",
80
- "transformers_version": "4.56.0.dev0",
81
  "vision_config": {
82
  "attention_dropout": 0.0,
83
  "hidden_act": "gelu_pytorch_tanh",
 
3
  "Gemma3ForConditionalGeneration"
4
  ],
5
  "boi_token_index": 255999,
6
+ "dtype": "bfloat16",
7
  "eoi_token_index": 256000,
8
  "eos_token_id": [
9
  1,
 
25
  "initializer_range": 0.02,
26
  "intermediate_size": 10240,
27
  "layer_types": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "sliding_attention",
29
  "sliding_attention"
30
  ],
 
45
  "use_cache": true,
46
  "vocab_size": 262208
47
  },
48
+ "transformers_version": "4.56.1",
 
49
  "vision_config": {
50
  "attention_dropout": 0.0,
51
  "hidden_act": "gelu_pytorch_tanh",
generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  106
7
  ],
8
  "pad_token_id": 0,
9
- "transformers_version": "4.56.0.dev0"
10
  }
 
6
  106
7
  ],
8
  "pad_token_id": 0,
9
+ "transformers_version": "4.56.1"
10
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf90459fa823642f18860212325d52bc76a2e0e520e70ab88d56d132396d673f
3
  size 11287536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56dc37066c675420391d220a2ed601068b2eb092ed96d8e8d2ba4cd1f9de8e97
3
  size 11287536