gsjang
/

ja-llama-3-swallow-8b-instruct-v0.1-x-meta-llama-3-8b-instruct-mmot_merge

	dtype: bfloat16
	tokenizer:
	source: union
	merge_method: mmot_merge
	base_model: meta-llama/Meta-Llama-3-8B-Instruct
	models:
	- model: meta-llama/Meta-Llama-3-8B-Instruct
	parameters: {}
	- model: tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1
	parameters: {}
	parameters:
	align: true
	eps: 0.05
	iters: 30
	gamma: 1.0
	eta: 5.0
	alpha_cap: 0.8
	ot_cap: 512
	group_size: 512
	cpu_align: false
	write_readme: README.md