gsjang
/

ja-llama-3-swallow-8b-instruct-v0.1-x-meta-llama-3-8b-instruct-kv_ot_merge

	dtype: bfloat16
	tokenizer:
	source: union
	merge_method: kv_ot_merge
	base_model: meta-llama/Meta-Llama-3-8B-Instruct
	models:
	- model: meta-llama/Meta-Llama-3-8B-Instruct
	parameters: {}
	- model: tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1
	parameters: {}
	parameters:
	align: true
	eps: 0.05
	iters: 20
	sigma_x: 1.0
	write_readme: README.md