dtype: bfloat16 | |
tokenizer: | |
source: union | |
merge_method: scope_merge | |
base_model: meta-llama/Meta-Llama-3-8B-Instruct | |
models: | |
- model: meta-llama/Meta-Llama-3-8B-Instruct | |
parameters: {} | |
- model: tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1 | |
parameters: {} | |
parameters: | |
lambda_reg: 1.5 | |
k_fisher: 64 | |
project: false | |
svd_cap: 4096 | |
tall_skip_ratio: 4 | |
cpu_svd: false | |
rank_shrink: 0 | |
seed: 0 | |
write_readme: README.md | |