myselfsaurabh commited on
Commit
829a5ca
·
verified ·
1 Parent(s): 3541479

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +8 -0
  2. config.json +1 -0
  3. pytorch_model.bin +3 -0
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # My Gemma3 270M Model
2
+
3
+ A custom PyTorch implementation of the Gemma-3 270M model. For full details see the original notebook.
4
+
5
+ ## Usage
6
+
7
+ Download model, config, and tokenizer. Instantiate Gemma3Model, load weights and config as shown in this repo.
8
+
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"vocab_size": 262144, "context_length": 32768, "emb_dim": 640, "n_heads": 4, "n_layers": 18, "hidden_dim": 2048, "head_dim": 256, "qk_norm": true, "n_kv_groups": 1, "rope_local_base": 10000.0, "rope_base": 1000000.0, "sliding_window": 512, "layer_types": ["sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention"], "dtype": "torch.bfloat16", "query_pre_attn_scalar": 256}
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7302890fb452e29c803a07d4b87ab6f033e16a3e8a1ad090fadcd9ce1ee74ed1
3
+ size 871827902