Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +8 -0
config.json +1 -0
pytorch_model.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# My Gemma3 270M Model
+A custom PyTorch implementation of the Gemma-3 270M model. For full details see the original notebook.
+## Usage
+Download model, config, and tokenizer. Instantiate Gemma3Model, load weights and config as shown in this repo.

config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"vocab_size": 262144, "context_length": 32768, "emb_dim": 640, "n_heads": 4, "n_layers": 18, "hidden_dim": 2048, "head_dim": 256, "qk_norm": true, "n_kv_groups": 1, "rope_local_base": 10000.0, "rope_base": 1000000.0, "sliding_window": 512, "layer_types": ["sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention"], "dtype": "torch.bfloat16", "query_pre_attn_scalar": 256}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7302890fb452e29c803a07d4b87ab6f033e16a3e8a1ad090fadcd9ce1ee74ed1
+size 871827902