diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..deb85fbc2eb4434f831facf41b160625e64f9dd5 --- /dev/null +++ b/config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda3917efe530201d73c2eff5f9f00d7d2635e356e40d113775ee498654f53c2 +size 1886 diff --git a/configuration_deepseek.py b/configuration_deepseek.py new file mode 100644 index 0000000000000000000000000000000000000000..f2a42479fd055c4fe236178953965f6353d16b7f --- /dev/null +++ b/configuration_deepseek.py @@ -0,0 +1,210 @@ +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + +logger = logging.get_logger(__name__) + +DEEPSEEK_PRETRAINED_CONFIG_ARCHIVE_MAP = {} +class DeepseekV3Config(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`DeepseekV3Model`]. It is used to instantiate an DeepSeek + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the DeepSeek-V3. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + vocab_size (`int`, *optional*, defaults to 129280): + Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`DeepseekV3Model`] + hidden_size (`int`, *optional*, defaults to 4096): + Dimension of the hidden representations. + intermediate_size (`int`, *optional*, defaults to 11008): + Dimension of the MLP representations. + moe_intermediate_size (`int`, *optional*, defaults to 1407): + Dimension of the MoE representations. + num_hidden_layers (`int`, *optional*, defaults to 32): + Number of hidden layers in the Transformer decoder. + num_nextn_predict_layers (`int`, *optional*, defaults to 1): + Number of nextn predict layers in the DeepSeekV3 Model. + num_attention_heads (`int`, *optional*, defaults to 32): + Number of attention heads for each attention layer in the Transformer decoder. + n_shared_experts (`int`, *optional*, defaults to None): + Number of shared experts, None means dense model. + n_routed_experts (`int`, *optional*, defaults to None): + Number of routed experts, None means dense model. + routed_scaling_factor (`float`, *optional*, defaults to 1.0): + Scaling factor or routed experts. + topk_method (`str`, *optional*, defaults to `gready`): + Topk method used in routed gate. + n_group (`int`, *optional*, defaults to None): + Number of groups for routed experts. + topk_group (`int`, *optional*, defaults to None): + Number of selected groups for each token(for each token, ensuring the selected experts is only within `topk_group` groups). + num_experts_per_tok (`int`, *optional*, defaults to None): + Number of selected experts, None means dense model. + moe_layer_freq (`int`, *optional*, defaults to 1): + The frequency of the MoE layer: one expert layer for every `moe_layer_freq - 1` dense layers. + first_k_dense_replace (`int`, *optional*, defaults to 0): + Number of dense layers in shallow layers(embed->dense->dense->...->dense->moe->moe...->lm_head). + \--k dense layers--/ + norm_topk_prob (`bool`, *optional*, defaults to False): + Whether to normalize the weights of the routed experts. + scoring_func (`str`, *optional*, defaults to 'softmax'): + Method of computing expert weights. + aux_loss_alpha (`float`, *optional*, defaults to 0.001): + Auxiliary loss weight coefficient. + seq_aux = (`bool`, *optional*, defaults to True): + Whether to compute the auxiliary loss for each individual sample. + num_key_value_heads (`int`, *optional*): + This is the number of key_value heads that should be used to implement Grouped Query Attention. If + `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if + `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When + converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed + by meanpooling all the original heads within that group. For more details checkout [this + paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to + `num_attention_heads`. + hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): + The non-linear activation function (function or string) in the decoder. + max_position_embeddings (`int`, *optional*, defaults to 2048): + The maximum sequence length that this model might ever be used with. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + rms_norm_eps (`float`, *optional*, defaults to 1e-06): + The epsilon used by the rms normalization layers. + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the model should return the last key/values attentions (not used by all models). Only + relevant if `config.is_decoder=True`. + pad_token_id (`int`, *optional*): + Padding token id. + bos_token_id (`int`, *optional*, defaults to 1): + Beginning of stream token id. + eos_token_id (`int`, *optional*, defaults to 2): + End of stream token id. + pretraining_tp (`int`, *optional*, defaults to 1): + Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this + document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is + necessary to ensure exact reproducibility of the pretraining results. Please refer to [this + issue](https://github.com/pytorch/pytorch/issues/76232). + tie_word_embeddings (`bool`, *optional*, defaults to `False`): + Whether to tie weight embeddings + rope_theta (`float`, *optional*, defaults to 10000.0): + The base period of the RoPE embeddings. + rope_scaling (`Dict`, *optional*): + Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling + strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is + `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update + `max_position_embeddings` to the expected new maximum. + attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`): + Whether to use a bias in the query, key, value and output projection layers during self-attention. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + + ```python + >>> from transformers import DeepseekV3Model, DeepseekV3Config + + >>> # Initializing a Deepseek-V3 style configuration + >>> configuration = DeepseekV3Config() + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + + model_type = "deepseek_v3" + keys_to_ignore_at_inference = ["past_key_values"] + + def __init__( + self, + vocab_size=129280, + hidden_size=7168, + intermediate_size=18432, + moe_intermediate_size = 2048, + num_hidden_layers=61, + num_nextn_predict_layers=1, + num_attention_heads=128, + num_key_value_heads=128, + n_shared_experts = 1, + n_routed_experts = 256, + ep_size = 1, + routed_scaling_factor = 2.5, + kv_lora_rank = 512, + q_lora_rank = 1536, + qk_rope_head_dim = 64, + v_head_dim = 128, + qk_nope_head_dim = 128, + topk_method = 'noaux_tc', + n_group = 8, + topk_group = 4, + num_experts_per_tok = 8, + moe_layer_freq = 1, + first_k_dense_replace = 3, + norm_topk_prob = True, + scoring_func = 'sigmoid', + aux_loss_alpha = 0.001, + seq_aux = True, + hidden_act="silu", + max_position_embeddings=4096, + initializer_range=0.02, + rms_norm_eps=1e-6, + use_cache=True, + pad_token_id=None, + bos_token_id=0, + eos_token_id=1, + pretraining_tp=1, + tie_word_embeddings=False, + rope_theta=10000.0, + rope_scaling=None, + attention_bias=False, + attention_dropout=0.0, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.moe_intermediate_size = moe_intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_nextn_predict_layers = num_nextn_predict_layers + self.num_attention_heads = num_attention_heads + self.n_shared_experts = n_shared_experts + self.n_routed_experts = n_routed_experts + self.ep_size = ep_size + self.routed_scaling_factor = routed_scaling_factor + self.kv_lora_rank = kv_lora_rank + self.q_lora_rank = q_lora_rank + self.qk_rope_head_dim = qk_rope_head_dim + self.v_head_dim = v_head_dim + self.qk_nope_head_dim = qk_nope_head_dim + self.topk_method = topk_method + self.n_group = n_group + self.topk_group = topk_group + self.num_experts_per_tok = num_experts_per_tok + self.moe_layer_freq = moe_layer_freq + self.first_k_dense_replace = first_k_dense_replace + self.norm_topk_prob = norm_topk_prob + self.scoring_func = scoring_func + self.aux_loss_alpha = aux_loss_alpha + self.seq_aux = seq_aux + # for backward compatibility + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.pretraining_tp = pretraining_tp + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + self.attention_bias = attention_bias + self.attention_dropout = attention_dropout + + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e3bb2b9b2cd6419b70a7a6a66d733da0f600c9c --- /dev/null +++ b/generation_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e44d6fe8832f8e5d5720140f885c999289ea3afb15b84f9207afcf9ab63809f +size 171 diff --git a/model-00001-of-00073.safetensors b/model-00001-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0ed645156ba6090419f3b0f14de98278403fe02 --- /dev/null +++ b/model-00001-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8254c90e3d77741bc932188071fe1df6b8d6aa967d11c71ed65ca364209122ef +size 4996241008 diff --git a/model-00002-of-00073.safetensors b/model-00002-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5cd87c0d5e94a66c33cb2a8393c4654b41e1a80a --- /dev/null +++ b/model-00002-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f90949d71f0bfcba08c8a115e2cc4a279f55c6315544e1a06ad41af71c036df +size 4994816120 diff --git a/model-00003-of-00073.safetensors b/model-00003-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f152ec2ccba2f7af5e0feeccddb725597b78c450 --- /dev/null +++ b/model-00003-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226f8ebb5c58b020fd4e816870fb9f45320bc01dc9f63e49a7b8cbd3a589ff30 +size 4996418792 diff --git a/model-00004-of-00073.safetensors b/model-00004-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1979820b3af6a8ff2e4987a19d582f8e54b35ffd --- /dev/null +++ b/model-00004-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db4a3fefe91334014dd6f7971a388396624294ba5296c4b211631e308328841 +size 4996408560 diff --git a/model-00005-of-00073.safetensors b/model-00005-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89460449fc0200fcf9a13a034a97381d5815b9e5 --- /dev/null +++ b/model-00005-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb28a277a31b0a321fb242f2dfe97a1e141af41b16c04eaba3259c6bc6f729d +size 4996419192 diff --git a/model-00006-of-00073.safetensors b/model-00006-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5850c7a0a3ffed4e0c5860552f106a1c0f1f95bd --- /dev/null +++ b/model-00006-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b19edd07e2b0bde915ba306563e3dbc15c68a3575843db4bc619f039e29c71 +size 4996419664 diff --git a/model-00007-of-00073.safetensors b/model-00007-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98f0a26cc38445f25bcf0004cf4fa60396bac04e --- /dev/null +++ b/model-00007-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:810c0aa0935d8de8581436cf4756086dce79a98fd1adf79d7bc626aa35dc2a26 +size 4994816448 diff --git a/model-00008-of-00073.safetensors b/model-00008-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8753128c48476d25bd3a786117a9b0f9f65fc0a4 --- /dev/null +++ b/model-00008-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b184b7d875a7caceaffdab3f09db2f7bef9e3d436b5e198aa0bde1e4e43312e +size 4996418792 diff --git a/model-00009-of-00073.safetensors b/model-00009-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8d48d60da4523358dcc7097abf7997e242cad23 --- /dev/null +++ b/model-00009-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ed787e164a0b30e20de99759b7739c32551a248da55335be3f0d4138149818 +size 4996408560 diff --git a/model-00010-of-00073.safetensors b/model-00010-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7af87058068a0dc76c9b2ac2db790019321477ae --- /dev/null +++ b/model-00010-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a195955c13f00401e384d01fa168808a8135c25ac5f3ac75bbc2ebcaad9ea5 +size 4996420624 diff --git a/model-00011-of-00073.safetensors b/model-00011-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10831a2676f41b974aa9dd9d30edaf5726e2b9fb --- /dev/null +++ b/model-00011-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef20d6f3b9660a9eef1073e3af467cf4540026456c389e41f0b97e12113350e4 +size 4996423456 diff --git a/model-00012-of-00073.safetensors b/model-00012-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c67757d6b3f9fc428835ac7e686f3184945e6906 --- /dev/null +++ b/model-00012-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78588152b925f8ad368f1105e2d0c2114793455048212e89437b4a0b0eb6a46a +size 4973140832 diff --git a/model-00013-of-00073.safetensors b/model-00013-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6dd387832ade5919f32553bed0c91a7df5e1e126 --- /dev/null +++ b/model-00013-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b18a7e501b1dbbc6e1f4049a2ce818e09ed24aea0354263ab2b7907da2aa3e61 +size 4994615704 diff --git a/model-00014-of-00073.safetensors b/model-00014-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..639234ec155acc2ec562696410554ae0b534b240 --- /dev/null +++ b/model-00014-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c5853a42b4c7f60876009d50aa1de68236c0f9510fc7dc64582b8bc757bc8a +size 4996412344 diff --git a/model-00015-of-00073.safetensors b/model-00015-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a15c29d2e2bdb21e8a9f140210f342cc4a155fa4 --- /dev/null +++ b/model-00015-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8572a84e69a95725a0fe2603fb0ce36b1d13a074ff8232cc6ba93f3ebd97ace +size 4996422576 diff --git a/model-00016-of-00073.safetensors b/model-00016-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0028277d9764c6eb9523bdd0af2cb4293f79893e --- /dev/null +++ b/model-00016-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a397de2136353fcb5ac815dee3190dd31f006bf41bb336a524819b2d9d9e61 +size 4996423224 diff --git a/model-00017-of-00073.safetensors b/model-00017-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..244f46afc7120b34ae8cd4b35f6cfde66209f9e4 --- /dev/null +++ b/model-00017-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:784311382bd62da31627b98db7f09f2c7cadf444de7dcfcc0ebcac3ee3e7428a +size 4996413216 diff --git a/model-00018-of-00073.safetensors b/model-00018-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..255e8cbe1f7bc51f3125621b30b5e5e0412a6b05 --- /dev/null +++ b/model-00018-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e530747c7418fb45e6edea62e477934a03e3e050f85907a2818dc577a7077c16 +size 4994830264 diff --git a/model-00019-of-00073.safetensors b/model-00019-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbde65ca14ef52c8d0028505055c3d1f9e774561 --- /dev/null +++ b/model-00019-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d80f11f71b6e514c12fa78b52a71e689e4e108db805627cc5e0d10ae3f50ef +size 4996412344 diff --git a/model-00020-of-00073.safetensors b/model-00020-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2c40ea45fda4b0880aa26fb003a874114ee6dae --- /dev/null +++ b/model-00020-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:685bb9a0505263e96012a5eed293ead7071498c1fdfd2cd09035567b20cf8af7 +size 4996422576 diff --git a/model-00021-of-00073.safetensors b/model-00021-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88b4970d5b1054aaa2816c1ac78880319566fadc --- /dev/null +++ b/model-00021-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b1967a83d2cfc160844a072d6bc6e331172f17de13e25b0e6554e9ec88a188 +size 4996422896 diff --git a/model-00022-of-00073.safetensors b/model-00022-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec7ddba9ecd568b5c76a8275dbd5c4250018acb7 --- /dev/null +++ b/model-00022-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ecfc255764b3053fa4d4e26ef41a61cf52014f5250acd1d064bf6e0d8bf8eba +size 4996413216 diff --git a/model-00023-of-00073.safetensors b/model-00023-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bde8154858cffae6e1e0ac5ddb283949aeca5ed --- /dev/null +++ b/model-00023-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2947a08c91b59690a5738879fff98895876911814b223d702bd08593cf96ea5b +size 4994830584 diff --git a/model-00024-of-00073.safetensors b/model-00024-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c182c89b02b8a143c594ae3e33b28e154c4edc49 --- /dev/null +++ b/model-00024-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcbfe8d9493cfb25ef4cda7acd5e8dbe75f320e63d7d3eb96a9937220fac8233 +size 4996412344 diff --git a/model-00025-of-00073.safetensors b/model-00025-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c27eb12e7cec3e4e240be8c24bb1e870f871807 --- /dev/null +++ b/model-00025-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19fde0cbcfe158501c09a227dd1ab83a35d657bd6e49cbc24dc71ac8c4a9f0f9 +size 4996422576 diff --git a/model-00026-of-00073.safetensors b/model-00026-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9060c024c06433f171bd9e48e6ad71a2c417397 --- /dev/null +++ b/model-00026-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80860f957af8e72b41042b4ccb93c166a983770e8ddab224b9fd427b89fce725 +size 4996422576 diff --git a/model-00027-of-00073.safetensors b/model-00027-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef078adb940531c405c6e362279301b1c8857610 --- /dev/null +++ b/model-00027-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fa4846d6d6986e22030cb20ac64cdf18d78b05f49bef3b7b2c0bf829d87ae9 +size 4996413216 diff --git a/model-00028-of-00073.safetensors b/model-00028-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..528efa65df194a0e70e7bb4a934fa5fd845427de --- /dev/null +++ b/model-00028-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19402d79c62bbe604f5c13c60bfe108eed695fd98fccc2090e5a13a5593b4aad +size 4996423584 diff --git a/model-00029-of-00073.safetensors b/model-00029-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24bcdb19cf9ce4adf49017603d7134ffaea04cd3 --- /dev/null +++ b/model-00029-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:131030f6141ab316b1af62027b3f8be3784f7e723e58b4ca4e61a0ab33e768f1 +size 4994819672 diff --git a/model-00030-of-00073.safetensors b/model-00030-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..715b66313923f4da61e95b1abfebfff48e1b74d3 --- /dev/null +++ b/model-00030-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001d6b2036d7267a4431572d0fcf2593389794bc8941f18cb37d66adc822a8e1 +size 4996422576 diff --git a/model-00031-of-00073.safetensors b/model-00031-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3530a935126d10696e79fb153b5d7c868639c51 --- /dev/null +++ b/model-00031-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66ea4ee7e5892919d3f98bfe43e5fe04f6612c6b8ad92f449502710117bc2d74 +size 4996422576 diff --git a/model-00032-of-00073.safetensors b/model-00032-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..900284828d0b5b41d2082627885b046688a79147 --- /dev/null +++ b/model-00032-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99c1f6691a1f42e0cb29330f4ede7f3579911b9db7858de470b2adc5b18e1298 +size 4996412896 diff --git a/model-00033-of-00073.safetensors b/model-00033-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe884dc72e7a8c70b2641a7ef1faed25f1318cf4 --- /dev/null +++ b/model-00033-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b98f3af91de96ffb79cff5b68fd765753911de6becea8300c80dc4407bb9ad9 +size 4996423456 diff --git a/model-00034-of-00073.safetensors b/model-00034-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8151e754cce3e723fe0a2f58314ae26e3925ec46 --- /dev/null +++ b/model-00034-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8915507fd2ca05ff7a116e7376f28e1a0763d8444bddf079d77600e7640837 +size 4994820128 diff --git a/model-00035-of-00073.safetensors b/model-00035-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9c07fde7e23f7df6a4cb7eed7f8efdecc4c1547 --- /dev/null +++ b/model-00035-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8d25c426adf1240b25fa772b48512f9fb0491c05e51a332c9b231ae99d3211 +size 4996422576 diff --git a/model-00036-of-00073.safetensors b/model-00036-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e586eac3ddbe087d15aad5f59684fe537cf6c0c2 --- /dev/null +++ b/model-00036-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2755904436dd62966842e6de1c9a5ff8313c985ed2e8038dcf1e67ff7a902da +size 4996422576 diff --git a/model-00037-of-00073.safetensors b/model-00037-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c71e753ae8e6c85bfd9a63957ec5da1035bf3644 --- /dev/null +++ b/model-00037-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd279790c4e987cd96afe34d0e3fda538ccc4c0e2fb345db818c6dbc8d22d74 +size 4996412568 diff --git a/model-00038-of-00073.safetensors b/model-00038-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98d90ea316340ed8e4024e86f86dc6dfcac463ac --- /dev/null +++ b/model-00038-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f596c85245f51bb369d1211652e2fbb084d078973bc7655f1d8628066e50311 +size 4996423456 diff --git a/model-00039-of-00073.safetensors b/model-00039-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d94f096c4a204b8e90e071014f150a7893b7f81 --- /dev/null +++ b/model-00039-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2cc084e37ba995f8c444e95315db05e656aa03cdc0d30cc47bf00e6fff7649f +size 4994820448 diff --git a/model-00040-of-00073.safetensors b/model-00040-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8fb702b2d9715e09b152cdd0de9320dfe683f86 --- /dev/null +++ b/model-00040-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be27c086224406dd64f6d271da1e3fa84167a40f857be54e065d9947bd254064 +size 4996422576 diff --git a/model-00041-of-00073.safetensors b/model-00041-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf4c5004fa0938a4b8b292891f403975dd11d1f8 --- /dev/null +++ b/model-00041-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745013558c7fcb1ea77fa189172a97342a1f5ab8aad926f5ddf394d61bbf7c4c +size 4996422576 diff --git a/model-00042-of-00073.safetensors b/model-00042-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaceeb4d40d11525e94ea79a599354364b3b18bc --- /dev/null +++ b/model-00042-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c0feef1e861f83856897b14d708d69c9435a178fdff8c11902da7be751cf27 +size 4996412344 diff --git a/model-00043-of-00073.safetensors b/model-00043-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..596007c2b36ce1fbea4ebcaab06d6ad082d3661f --- /dev/null +++ b/model-00043-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bbdaa7b92e05c8a8f9347b3e6797eb750b233312bf32f113f48b231bd4484d1 +size 4996423352 diff --git a/model-00044-of-00073.safetensors b/model-00044-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42d9b7725fbb38d1b347d800767f5edc34700c8f --- /dev/null +++ b/model-00044-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b10e97cd976732a8077cb37ed457b792905d887ce3c77e08996521f66726e5d +size 4996423488 diff --git a/model-00045-of-00073.safetensors b/model-00045-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..747ed71a0f614a86bc42fa7a626cc735f35f40e2 --- /dev/null +++ b/model-00045-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4130f8ae6f505074a592ebf59c21a9dc76cb2f360ee2ad1f474f16c6244900 +size 4994819864 diff --git a/model-00046-of-00073.safetensors b/model-00046-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0eab51cf76be513e8a3d4eaaa64ae0f71e9c31c --- /dev/null +++ b/model-00046-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14567f6fe758dfaed739357520b64948a5a045ba1858e84a28685ca34a71b8ba +size 4996422576 diff --git a/model-00047-of-00073.safetensors b/model-00047-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53b9439feb398b2b879cbd8d530fe80821e99a61 --- /dev/null +++ b/model-00047-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07e942a4bacd7421e77e022736c21ceffd652dcda43fec698dde39e463d9808 +size 4996412344 diff --git a/model-00048-of-00073.safetensors b/model-00048-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e90637d00bf50829a01d89358d09d8ffd95d0cf5 --- /dev/null +++ b/model-00048-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba57941731937769f170de2514efe05e66b80d26443b539cf90de44285d9aa0 +size 4996423032 diff --git a/model-00049-of-00073.safetensors b/model-00049-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cab37f9e834138313049aac94b3c330068a11108 --- /dev/null +++ b/model-00049-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f045bd8439d1ea19f86b1f4e0d38f2fe15ff9da285205d8935cee03fa1a3a7b +size 4996423456 diff --git a/model-00050-of-00073.safetensors b/model-00050-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1d6e8c33676badb9b9658cf154fd65300edb63d --- /dev/null +++ b/model-00050-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b204d541c7a29c21debb3d38fb3fc0e0aa6d5db22e7665b39f3718c1a3d37219 +size 4994820224 diff --git a/model-00051-of-00073.safetensors b/model-00051-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55294a99d6e6ab7c3036ea7788e805fdd79e4bd3 --- /dev/null +++ b/model-00051-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd4b58301b09bd324b38875490583fbe0aca018cb8dc630a72d52f7de037bdc +size 4996422576 diff --git a/model-00052-of-00073.safetensors b/model-00052-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f18c24518efc70032dda2f740ab0d4571de509d --- /dev/null +++ b/model-00052-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be0a989db0719045f74812348e76905f4e9e4e3152c802c5aeb16f5a598d6eb +size 4996412344 diff --git a/model-00053-of-00073.safetensors b/model-00053-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3f86d6a0ee4a5eaeaad36f6d7d058ab8c1a464e --- /dev/null +++ b/model-00053-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a7132ce16e9ef653cf0ac4124b21658698892efcaa14c804a6bab54df61e38 +size 4996422704 diff --git a/model-00054-of-00073.safetensors b/model-00054-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6584abf5a85f3c32b9db0bbffc7c46e4134d4298 --- /dev/null +++ b/model-00054-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ddcd9e730b517aaef8bd7a899f6d2e8394d142172fad0ce6ee331b8ae7e69b +size 4996423456 diff --git a/model-00055-of-00073.safetensors b/model-00055-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c14662e10a08dc9de2ba734394292a623c73c9c0 --- /dev/null +++ b/model-00055-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707ae0b1752cc8e70464d632c02c5cbdcb26877038353d13b64c4350fc315078 +size 4998491344 diff --git a/model-00056-of-00073.safetensors b/model-00056-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ece78f881f1bfeec0ef144f5dfd7928258f78759 --- /dev/null +++ b/model-00056-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8902d26e027a6d4b22d65b53a620e627ae45ea6efa33be8f77b59535208f99 +size 5000321432 diff --git a/model-00057-of-00073.safetensors b/model-00057-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5d018a848560629bc84a7a0052ab3864f838685 --- /dev/null +++ b/model-00057-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7028de6e750e094dd1a24e667ac69c1c45e2243e50c90eaf43a017661329953 +size 4996674976 diff --git a/model-00058-of-00073.safetensors b/model-00058-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d7f3f02a05bd0199c0deccf262cd8cadddbda92 --- /dev/null +++ b/model-00058-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51f37039272d9587662ed13f9e73fef60c316e15f0431df9d0d536d1a6d9cef +size 4996422576 diff --git a/model-00059-of-00073.safetensors b/model-00059-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88e71adea756d09263c5995628a4f038a7792e6b --- /dev/null +++ b/model-00059-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c3aee7fd38df326da3cdc7ae0ec6d9d51359756b543fc0b355cda60b1280d29 +size 4996413016 diff --git a/model-00060-of-00073.safetensors b/model-00060-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27178aae0b62291e1162ff7aeff4ea64ba3eb386 --- /dev/null +++ b/model-00060-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244d98939c8d49efea590963d7596e3bf103e1947a617ec243ba1aa36bb721d6 +size 4996423456 diff --git a/model-00061-of-00073.safetensors b/model-00061-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..736458dc0f2d6ff1ec2f26f212e44720b6c8b850 --- /dev/null +++ b/model-00061-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1804307106c72ee599d09bf5a0c71bd7615bb92e6d09878d06618e90073f872 +size 4994820000 diff --git a/model-00062-of-00073.safetensors b/model-00062-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8efad43b4b8f190094cc6fe8bf3b46e0edc8f64 --- /dev/null +++ b/model-00062-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b983226b5b36ef1699e1ad044747c5f47b0d66f8ac0c6c88c017a48dffea42 +size 4996422576 diff --git a/model-00063-of-00073.safetensors b/model-00063-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d81e940dba19688330143fe388134257ede84e6d --- /dev/null +++ b/model-00063-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11723874f2bb67ee15a0fc938386cf85b9d172d086b41a1dcd40adf08725bd59 +size 4996422576 diff --git a/model-00064-of-00073.safetensors b/model-00064-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70c9d98b8a8a5107844043566fc49f90129ee239 --- /dev/null +++ b/model-00064-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64793b05c8001c588fc67f270ba3b386d744620fa72eed5fb2e52e8a4e810ad2 +size 4996412696 diff --git a/model-00065-of-00073.safetensors b/model-00065-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08f465d40d7a943514b543117f7b230369d0f6f5 --- /dev/null +++ b/model-00065-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09b7e3c936a5f0a82e91072a75104f491a0afa11562c81cebb7b36ba3bb073a +size 4996423456 diff --git a/model-00066-of-00073.safetensors b/model-00066-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15f69a9bd6d586f53559b425641e5def91a28523 --- /dev/null +++ b/model-00066-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f413a4e40194cb4b9f3cac1012af6ca19d729ddbdd20b4c86d71646dbe44ab0 +size 4994820320 diff --git a/model-00067-of-00073.safetensors b/model-00067-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9485e4b2b44d54bf97fadd26e9c2a89b1c18b79 --- /dev/null +++ b/model-00067-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cbbed4908117ddbcdd867be9c8c84fe520cc2ce8717a046379199a208cd7c67 +size 4996422576 diff --git a/model-00068-of-00073.safetensors b/model-00068-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bab1fa500d8e020a49fd6aac0ad20d9390c5ea90 --- /dev/null +++ b/model-00068-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cad8ad6d28d814f8c5a4a09eeb00632298fc4a45a5bd9db81314f9fa1c6e1ea +size 4996422576 diff --git a/model-00069-of-00073.safetensors b/model-00069-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb95844bc2b33281ab1df719d1092c14fd7c394a --- /dev/null +++ b/model-00069-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e778f3bcb93ffd732aed6e0467b7d035cb0b0a3e76faed510199f2263a7754 +size 4996412368 diff --git a/model-00070-of-00073.safetensors b/model-00070-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b0f85e0f6e4e00efaa6d9c542456f9347b0cb1c --- /dev/null +++ b/model-00070-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef79541ba17b88b52add9c0a1f3aa77dd570aa92c582e30f9d57c3e19b50e791 +size 4996423456 diff --git a/model-00071-of-00073.safetensors b/model-00071-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fd192c067ce3e283a6bfcbaeefd6e10aba76f43 --- /dev/null +++ b/model-00071-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f776246d209dde3aaac8bf297c4318d882eaceee169797f45a8bccd53c98a78e +size 4996423616 diff --git a/model-00072-of-00073.safetensors b/model-00072-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11b0264740c03ccaffec4158633fc1b4686091c1 --- /dev/null +++ b/model-00072-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2835d8799a54b6e4785b7a416402b9010efb825ae6c0cd852a3994ca1c094a8 +size 4994819616 diff --git a/model-00073-of-00073.safetensors b/model-00073-of-00073.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a60d39eb9888876d42cbef4faaf730a362e8831e --- /dev/null +++ b/model-00073-of-00073.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0b6fc46df58f0d9a1c23739483986771f3fdb013d4895ffaa7310676995b1c +size 4728169392 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..46009047c132b9a0ceed356838441cfdf5833a8c --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8d5112c3151a598b1f12f85dab4ece63049475f4dad3ed67d875ce273c80d7 +size 24780799 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..b8069d141d985224799b817e12d5e5139cc4a111 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce20a7877bec9454dd611bc4e9116b7db765594f78524bd94edbaab422eddf02 +size 9977280 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..38442bd3ce171abfbca4ede902630e60e5977505 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba88ac0ca8ed5e27de383440945e3da15f09be91a6fc6d4f7ec819dbbe20371 +size 166530