Upload folder using huggingface_hub
Browse files- README.md +8 -0
- config.json +1 -0
- pytorch_model.bin +3 -0
README.md
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# My Gemma3 270M Model
|
2 |
+
|
3 |
+
A custom PyTorch implementation of the Gemma-3 270M model. For full details see the original notebook.
|
4 |
+
|
5 |
+
## Usage
|
6 |
+
|
7 |
+
Download model, config, and tokenizer. Instantiate Gemma3Model, load weights and config as shown in this repo.
|
8 |
+
|
config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"vocab_size": 262144, "context_length": 32768, "emb_dim": 640, "n_heads": 4, "n_layers": 18, "hidden_dim": 2048, "head_dim": 256, "qk_norm": true, "n_kv_groups": 1, "rope_local_base": 10000.0, "rope_base": 1000000.0, "sliding_window": 512, "layer_types": ["sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention"], "dtype": "torch.bfloat16", "query_pre_attn_scalar": 256}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7302890fb452e29c803a07d4b87ab6f033e16a3e8a1ad090fadcd9ce1ee74ed1
|
3 |
+
size 871827902
|