| dtype: bfloat16 | |
| tokenizer: | |
| source: union | |
| merge_method: mmot_merge | |
| base_model: meta-llama/Meta-Llama-3-8B-Instruct | |
| models: | |
| - model: meta-llama/Meta-Llama-3-8B-Instruct | |
| parameters: {} | |
| - model: tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1 | |
| parameters: {} | |
| parameters: | |
| align: true | |
| eps: 0.05 | |
| iters: 30 | |
| gamma: 1.0 | |
| eta: 5.0 | |
| alpha_cap: 0.8 | |
| ot_cap: 512 | |
| group_size: 512 | |
| cpu_align: false | |
| write_readme: README.md | |