{ // 获取包含Hugging Face文本的span元素 const spans = link.querySelectorAll('span.whitespace-nowrap, span.hidden.whitespace-nowrap'); spans.forEach(span => { if (span.textContent && span.textContent.trim().match(/Hugging\s*Face/i)) { span.textContent = 'AI快站'; } }); }); // 替换logo图片的alt属性 document.querySelectorAll('img[alt*="Hugging"], img[alt*="Face"]').forEach(img => { if (img.alt.match(/Hugging\s*Face/i)) { img.alt = 'AI快站 logo'; } }); } // 替换导航栏中的链接 function replaceNavigationLinks() { // 已替换标记,防止重复运行 if (window._navLinksReplaced) { return; } // 已经替换过的链接集合,防止重复替换 const replacedLinks = new Set(); // 只在导航栏区域查找和替换链接 const headerArea = document.querySelector('header') || document.querySelector('nav'); if (!headerArea) { return; } // 在导航区域内查找链接 const navLinks = headerArea.querySelectorAll('a'); navLinks.forEach(link => { // 如果已经替换过,跳过 if (replacedLinks.has(link)) return; const linkText = link.textContent.trim(); const linkHref = link.getAttribute('href') || ''; // 替换Spaces链接 - 仅替换一次 if ( (linkHref.includes('/spaces') || linkHref === '/spaces' || linkText === 'Spaces' || linkText.match(/^s*Spacess*$/i)) && linkText !== 'OCR模型免费转Markdown' && linkText !== 'OCR模型免费转Markdown' ) { link.textContent = 'OCR模型免费转Markdown'; link.href = 'https://fast360.xyz'; link.setAttribute('target', '_blank'); link.setAttribute('rel', 'noopener noreferrer'); replacedLinks.add(link); } // 删除Posts链接 else if ( (linkHref.includes('/posts') || linkHref === '/posts' || linkText === 'Posts' || linkText.match(/^s*Postss*$/i)) ) { if (link.parentNode) { link.parentNode.removeChild(link); } replacedLinks.add(link); } // 替换Docs链接 - 仅替换一次 else if ( (linkHref.includes('/docs') || linkHref === '/docs' || linkText === 'Docs' || linkText.match(/^s*Docss*$/i)) && linkText !== '模型下载攻略' ) { link.textContent = '模型下载攻略'; link.href = '/'; replacedLinks.add(link); } // 删除Enterprise链接 else if ( (linkHref.includes('/enterprise') || linkHref === '/enterprise' || linkText === 'Enterprise' || linkText.match(/^s*Enterprises*$/i)) ) { if (link.parentNode) { link.parentNode.removeChild(link); } replacedLinks.add(link); } }); // 查找可能嵌套的Spaces和Posts文本 const textNodes = []; function findTextNodes(element) { if (element.nodeType === Node.TEXT_NODE) { const text = element.textContent.trim(); if (text === 'Spaces' || text === 'Posts' || text === 'Enterprise') { textNodes.push(element); } } else { for (const child of element.childNodes) { findTextNodes(child); } } } // 只在导航区域内查找文本节点 findTextNodes(headerArea); // 替换找到的文本节点 textNodes.forEach(node => { const text = node.textContent.trim(); if (text === 'Spaces') { node.textContent = node.textContent.replace(/Spaces/g, 'OCR模型免费转Markdown'); } else if (text === 'Posts') { // 删除Posts文本节点 if (node.parentNode) { node.parentNode.removeChild(node); } } else if (text === 'Enterprise') { // 删除Enterprise文本节点 if (node.parentNode) { node.parentNode.removeChild(node); } } }); // 标记已替换完成 window._navLinksReplaced = true; } // 替换代码区域中的域名 function replaceCodeDomains() { // 特别处理span.hljs-string和span.njs-string元素 document.querySelectorAll('span.hljs-string, span.njs-string, span[class*="hljs-string"], span[class*="njs-string"]').forEach(span => { if (span.textContent && span.textContent.includes('huggingface.co')) { span.textContent = span.textContent.replace(/huggingface.co/g, 'aifasthub.com'); } }); // 替换hljs-string类的span中的域名(移除多余的转义符号) document.querySelectorAll('span.hljs-string, span[class*="hljs-string"]').forEach(span => { if (span.textContent && span.textContent.includes('huggingface.co')) { span.textContent = span.textContent.replace(/huggingface.co/g, 'aifasthub.com'); } }); // 替换pre和code标签中包含git clone命令的域名 document.querySelectorAll('pre, code').forEach(element => { if (element.textContent && element.textContent.includes('git clone')) { const text = element.innerHTML; if (text.includes('huggingface.co')) { element.innerHTML = text.replace(/huggingface.co/g, 'aifasthub.com'); } } }); // 处理特定的命令行示例 document.querySelectorAll('pre, code').forEach(element => { const text = element.innerHTML; if (text.includes('huggingface.co')) { // 针对git clone命令的专门处理 if (text.includes('git clone') || text.includes('GIT_LFS_SKIP_SMUDGE=1')) { element.innerHTML = text.replace(/huggingface.co/g, 'aifasthub.com'); } } }); // 特别处理模型下载页面上的代码片段 document.querySelectorAll('.flex.border-t, .svelte_hydrator, .inline-block').forEach(container => { const content = container.innerHTML; if (content && content.includes('huggingface.co')) { container.innerHTML = content.replace(/huggingface.co/g, 'aifasthub.com'); } }); // 特别处理模型仓库克隆对话框中的代码片段 try { // 查找包含"Clone this model repository"标题的对话框 const cloneDialog = document.querySelector('.svelte_hydration_boundary, [data-target="MainHeader"]'); if (cloneDialog) { // 查找对话框中所有的代码片段和命令示例 const codeElements = cloneDialog.querySelectorAll('pre, code, span'); codeElements.forEach(element => { if (element.textContent && element.textContent.includes('huggingface.co')) { if (element.innerHTML.includes('huggingface.co')) { element.innerHTML = element.innerHTML.replace(/huggingface.co/g, 'aifasthub.com'); } else { element.textContent = element.textContent.replace(/huggingface.co/g, 'aifasthub.com'); } } }); } // 更精确地定位克隆命令中的域名 document.querySelectorAll('[data-target]').forEach(container => { const codeBlocks = container.querySelectorAll('pre, code, span.hljs-string'); codeBlocks.forEach(block => { if (block.textContent && block.textContent.includes('huggingface.co')) { if (block.innerHTML.includes('huggingface.co')) { block.innerHTML = block.innerHTML.replace(/huggingface.co/g, 'aifasthub.com'); } else { block.textContent = block.textContent.replace(/huggingface.co/g, 'aifasthub.com'); } } }); }); } catch (e) { // 错误处理但不打印日志 } } // 当DOM加载完成后执行替换 if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => { replaceHeaderBranding(); replaceNavigationLinks(); replaceCodeDomains(); // 只在必要时执行替换 - 3秒后再次检查 setTimeout(() => { if (!window._navLinksReplaced) { console.log('[Client] 3秒后重新检查导航链接'); replaceNavigationLinks(); } }, 3000); }); } else { replaceHeaderBranding(); replaceNavigationLinks(); replaceCodeDomains(); // 只在必要时执行替换 - 3秒后再次检查 setTimeout(() => { if (!window._navLinksReplaced) { console.log('[Client] 3秒后重新检查导航链接'); replaceNavigationLinks(); } }, 3000); } // 增加一个MutationObserver来处理可能的动态元素加载 const observer = new MutationObserver(mutations => { // 检查是否导航区域有变化 const hasNavChanges = mutations.some(mutation => { // 检查是否存在header或nav元素变化 return Array.from(mutation.addedNodes).some(node => { if (node.nodeType === Node.ELEMENT_NODE) { // 检查是否是导航元素或其子元素 if (node.tagName === 'HEADER' || node.tagName === 'NAV' || node.querySelector('header, nav')) { return true; } // 检查是否在导航元素内部 let parent = node.parentElement; while (parent) { if (parent.tagName === 'HEADER' || parent.tagName === 'NAV') { return true; } parent = parent.parentElement; } } return false; }); }); // 只在导航区域有变化时执行替换 if (hasNavChanges) { // 重置替换状态,允许再次替换 window._navLinksReplaced = false; replaceHeaderBranding(); replaceNavigationLinks(); } }); // 开始观察document.body的变化,包括子节点 if (document.body) { observer.observe(document.body, { childList: true, subtree: true }); } else { document.addEventListener('DOMContentLoaded', () => { observer.observe(document.body, { childList: true, subtree: true }); }); } })(); \n\nThe video on the above was generated by CogVideoX-2B (100 NFE). In the same amount of time, TDM (4NFE) can generate 25 videos, as shown in the below, achieving roughly a 25 times speedup without performance degradation.\n\n","html":"\n
\n
\n \n
Teacher Samples (CogVideoX-2B 100 NFE).
\n
\n
\n \n
Student Samples (4NFE).
\n
\n
\n\n\nThe video on the above was generated by CogVideoX-2B (100 NFE). In the same amount of time, TDM (4NFE) can generate 25 videos, as shown in the below, achieving roughly a 25 times speedup without performance degradation.\n\n","updatedAt":"2025-03-17T04:10:52.489Z","author":{"_id":"65f7e6856bd4bac5b6a4ecc3","avatarUrl":"/avatars/9c0c48310fb5e99c19700316a0ab531e.svg","fullname":"Yihong Luo","name":"Luo-Yihong","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":6}},"numEdits":0,"identifiedLanguage":{"language":"en","probability":0.4509582221508026},"editors":["Luo-Yihong"],"editorAvatarUrls":["/avatars/9c0c48310fb5e99c19700316a0ab531e.svg"],"reactions":[],"isReport":false}},{"id":"67d8cdd785113ba32f41297d","author":{"_id":"63d3e0e8ff1384ce6c5dd17d","avatarUrl":"https://aifasthub.com/avatars/v1/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg","fullname":"Librarian Bot (Bot)","name":"librarian-bot","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":261},"createdAt":"2025-03-18T01:35:19.000Z","type":"comment","data":{"edited":false,"hidden":false,"latest":{"raw":"This is an automated message from the [Librarian Bot](https://huggingface.co/librarian-bots). I found the following papers similar to this paper. \n\nThe following papers were recommended by the Semantic Scholar API \n\n* [Adding Additional Control to One-Step Diffusion with Joint Distribution Matching](https://huggingface.co/papers/2503.06652) (2025)\n* [One-step Diffusion Models with $f$-Divergence Distribution Matching](https://huggingface.co/papers/2502.15681) (2025)\n* [SANA-Sprint: One-Step Diffusion with Continuous-Time Consistency Distillation](https://huggingface.co/papers/2503.09641) (2025)\n* [Diffusion-Sharpening: Fine-tuning Diffusion Models with Denoising Trajectory Sharpening](https://huggingface.co/papers/2502.12146) (2025)\n* [RayFlow: Instance-Aware Diffusion Acceleration via Adaptive Flow Trajectories](https://huggingface.co/papers/2503.07699) (2025)\n* [ProReflow: Progressive Reflow with Decomposed Velocity](https://huggingface.co/papers/2503.04824) (2025)\n* [ROCM: RLHF on consistency models](https://huggingface.co/papers/2503.06171) (2025)\n\n\n Please give a thumbs up to this comment if you found it helpful!\n\n If you want recommendations for any Paper on Hugging Face checkout [this](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) Space\n\n You can directly ask Librarian Bot for paper recommendations by tagging it in a comment: `@librarian-bot recommend`","html":"

This is an automated message from the Librarian Bot. I found the following papers similar to this paper.

\n

The following papers were recommended by the Semantic Scholar API

\n\n

Please give a thumbs up to this comment if you found it helpful!

\n

If you want recommendations for any Paper on Hugging Face checkout this Space

\n

You can directly ask Librarian Bot for paper recommendations by tagging it in a comment: \n\n@librarian-bot\n\t recommend

\n","updatedAt":"2025-03-18T01:35:19.250Z","author":{"_id":"63d3e0e8ff1384ce6c5dd17d","avatarUrl":"https://aifasthub.com/avatars/v1/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg","fullname":"Librarian Bot (Bot)","name":"librarian-bot","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":261}},"numEdits":0,"identifiedLanguage":{"language":"en","probability":0.7008101940155029},"editors":["librarian-bot"],"editorAvatarUrls":["https://aifasthub.com/avatars/v1/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg"],"reactions":[],"isReport":false}}],"primaryEmailConfirmed":false,"paper":{"id":"2503.06674","authors":[{"_id":"67d6881cf997964e21f90598","user":{"_id":"65f7e6856bd4bac5b6a4ecc3","avatarUrl":"/avatars/9c0c48310fb5e99c19700316a0ab531e.svg","isPro":false,"fullname":"Yihong Luo","user":"Luo-Yihong","type":"user"},"name":"Yihong Luo","status":"claimed_verified","statusLastChangedAt":"2025-03-17T08:44:52.452Z","hidden":false},{"_id":"67d6881cf997964e21f90599","user":{"_id":"636a40faa6f948c4f0c62ae5","avatarUrl":"/avatars/30c35b194ba84d6e274df30e91a8cc45.svg","isPro":false,"fullname":"Tianyang Hu","user":"whatlegequ","type":"user"},"name":"Tianyang Hu","status":"admin_assigned","statusLastChangedAt":"2025-03-17T08:56:48.283Z","hidden":false},{"_id":"67d6881cf997964e21f9059a","user":{"_id":"67b91a3c186bc4f8d83c94cf","avatarUrl":"/avatars/a79538be4b5ed02cd54556458375e4af.svg","isPro":false,"fullname":"Jiacheng Sun","user":"JIACSUN96","type":"user"},"name":"Jiacheng Sun","status":"admin_assigned","statusLastChangedAt":"2025-03-17T08:56:55.114Z","hidden":false},{"_id":"67d6881cf997964e21f9059b","name":"Yujun Cai","hidden":false},{"_id":"67d6881cf997964e21f9059c","user":{"_id":"636d660056c0762cfd9dc8d5","avatarUrl":"/avatars/50ea2100e00b67ef10adc57556477184.svg","isPro":false,"fullname":"jing tang","user":"jingtang","type":"user"},"name":"Jing Tang","status":"admin_assigned","statusLastChangedAt":"2025-03-17T08:57:09.362Z","hidden":false}],"publishedAt":"2025-03-09T15:53:49.000Z","submittedOnDailyAt":"2025-03-17T02:34:51.976Z","title":"Learning Few-Step Diffusion Models by Trajectory Distribution Matching","submittedOnDailyBy":{"_id":"65f7e6856bd4bac5b6a4ecc3","avatarUrl":"/avatars/9c0c48310fb5e99c19700316a0ab531e.svg","isPro":false,"fullname":"Yihong Luo","user":"Luo-Yihong","type":"user"},"summary":"Accelerating diffusion model sampling is crucial for efficient AIGC\ndeployment. While diffusion distillation methods -- based on distribution\nmatching and trajectory matching -- reduce sampling to as few as one step, they\nfall short on complex tasks like text-to-image generation. Few-step generation\noffers a better balance between speed and quality, but existing approaches face\na persistent trade-off: distribution matching lacks flexibility for multi-step\nsampling, while trajectory matching often yields suboptimal image quality. To\nbridge this gap, we propose learning few-step diffusion models by Trajectory\nDistribution Matching (TDM), a unified distillation paradigm that combines the\nstrengths of distribution and trajectory matching. Our method introduces a\ndata-free score distillation objective, aligning the student's trajectory with\nthe teacher's at the distribution level. Further, we develop a\nsampling-steps-aware objective that decouples learning targets across different\nsteps, enabling more adjustable sampling. This approach supports both\ndeterministic sampling for superior image quality and flexible multi-step\nadaptation, achieving state-of-the-art performance with remarkable efficiency.\nOur model, TDM, outperforms existing methods on various backbones, such as SDXL\nand PixArt-alpha, delivering superior quality and significantly reduced\ntraining costs. In particular, our method distills PixArt-alpha into a\n4-step generator that outperforms its teacher on real user preference at 1024\nresolution. This is accomplished with 500 iterations and 2 A800 hours -- a mere\n0.01% of the teacher's training cost. In addition, our proposed TDM can be\nextended to accelerate text-to-video diffusion. Notably, TDM can outperform its\nteacher model (CogVideoX-2B) by using only 4 NFE on VBench, improving the total\nscore from 80.91 to 81.65. Project page: https://tdm-t2x.github.io/","upvotes":8,"discussionId":"67d6881ef997964e21f90660","projectPage":"https://tdm-t2x.github.io/","githubRepo":"https://github.com/Luo-Yihong/TDM","ai_summary":"TDM, a Trajectory Distribution Matching method, accelerates diffusion model sampling by combining distribution and trajectory matching, achieving superior quality and efficiency in text-to-image and text-to-video generation.","ai_keywords":["diffusion model sampling","diffusion distillation","distribution matching","trajectory matching","few-step generation","data-free score distillation","sampling-steps-aware objective","deterministic sampling","SDXL","PixArt-$\\alpha$","text-to-video diffusion","CogVideoX-2B","VBench","TDM"],"githubStars":49},"canReadDatabase":false,"canManagePapers":false,"canSubmit":false,"hasHfLevelAccess":false,"upvoted":false,"upvoters":[{"_id":"65f7e6856bd4bac5b6a4ecc3","avatarUrl":"/avatars/9c0c48310fb5e99c19700316a0ab531e.svg","isPro":false,"fullname":"Yihong Luo","user":"Luo-Yihong","type":"user"},{"_id":"631c386bc73939ffc0716a37","avatarUrl":"https://aifasthub.com/avatars/v1/production/uploads/1662793811119-noauth.jpeg","isPro":false,"fullname":"SeongWan Kim","user":"idgmatrix","type":"user"},{"_id":"648eb1eb59c4e5c87dc116e0","avatarUrl":"/avatars/c636cea39c2c0937f01398c94ead5dad.svg","isPro":false,"fullname":"fdsqefsgergd","user":"T-representer","type":"user"},{"_id":"65089ae54afcb7378d1e3fcb","avatarUrl":"https://aifasthub.com/avatars/v1/production/uploads/65089ae54afcb7378d1e3fcb/jau-lPKCnry75SBJ0mjjc.jpeg","isPro":false,"fullname":"Bugrahan","user":"nuwandaa","type":"user"},{"_id":"641dbda28dc52733fa4419cf","avatarUrl":"https://aifasthub.com/avatars/v1/production/uploads/641dbda28dc52733fa4419cf/vdIsa6UlMIaqHinGrYDb-.png","isPro":false,"fullname":"Sichen Liu","user":"Seas0","type":"user"},{"_id":"636a40faa6f948c4f0c62ae5","avatarUrl":"/avatars/30c35b194ba84d6e274df30e91a8cc45.svg","isPro":false,"fullname":"Tianyang Hu","user":"whatlegequ","type":"user"},{"_id":"65a4567e212d6aca9a3e8f5a","avatarUrl":"/avatars/1a434e8006febee11cf86ae833986acd.svg","isPro":false,"fullname":"Lin Huang","user":"Lin17","type":"user"},{"_id":"683d7bf514488daab31b2f09","avatarUrl":"https://aifasthub.com/avatars/v1/production/uploads/no-auth/vU3viqKk7mMmO8M0al2Nq.png","isPro":false,"fullname":"Heng Jiang","user":"SIAT-HJ","type":"user"}],"acceptLanguages":["*"],"dailyPaperRank":0}">
Papers
arxiv:2503.06674

Learning Few-Step Diffusion Models by Trajectory Distribution Matching

Published on Mar 9
· Submitted by Luo-Yihong on Mar 17
Authors:
,

Abstract

TDM, a Trajectory Distribution Matching method, accelerates diffusion model sampling by combining distribution and trajectory matching, achieving superior quality and efficiency in text-to-image and text-to-video generation.

AI-generated summary

Accelerating diffusion model sampling is crucial for efficient AIGC deployment. While diffusion distillation methods -- based on distribution matching and trajectory matching -- reduce sampling to as few as one step, they fall short on complex tasks like text-to-image generation. Few-step generation offers a better balance between speed and quality, but existing approaches face a persistent trade-off: distribution matching lacks flexibility for multi-step sampling, while trajectory matching often yields suboptimal image quality. To bridge this gap, we propose learning few-step diffusion models by Trajectory Distribution Matching (TDM), a unified distillation paradigm that combines the strengths of distribution and trajectory matching. Our method introduces a data-free score distillation objective, aligning the student's trajectory with the teacher's at the distribution level. Further, we develop a sampling-steps-aware objective that decouples learning targets across different steps, enabling more adjustable sampling. This approach supports both deterministic sampling for superior image quality and flexible multi-step adaptation, achieving state-of-the-art performance with remarkable efficiency. Our model, TDM, outperforms existing methods on various backbones, such as SDXL and PixArt-alpha, delivering superior quality and significantly reduced training costs. In particular, our method distills PixArt-alpha into a 4-step generator that outperforms its teacher on real user preference at 1024 resolution. This is accomplished with 500 iterations and 2 A800 hours -- a mere 0.01% of the teacher's training cost. In addition, our proposed TDM can be extended to accelerate text-to-video diffusion. Notably, TDM can outperform its teacher model (CogVideoX-2B) by using only 4 NFE on VBench, improving the total score from 80.91 to 81.65. Project page: https://tdm-t2x.github.io/

Community

Paper author Paper submitter

We introduce TDM to distill a few-step student that can surpass the teacher diffusion model in an image/video-free way. TDM is highly efficient and effective. In particular, our TDM distills PixArt-α into a 4-step generator that outperforms its teacher on real user preference. This is accomplished with 500 iterations and 2 A800 hours -- a mere 0.01% of the teacher's training cost. In addition, our proposed TDM can be extended to accelerate text-to-video diffusion. Notably, TDM can outperform its teacher model (CogVideoX-2B) by using only 4 NFE on VBench, improving the total score from 80.91 to 81.65.

Check details at our project page: https://tdm-t2x.github.io/

Moreover, the pre-trained models have also been released at https://github.com/Luo-Yihong/TDM

Paper author Paper submitter
Teacher Samples (CogVideoX-2B 100 NFE).
Student Samples (4NFE).
The video on the above was generated by CogVideoX-2B (100 NFE). In the same amount of time, TDM (4NFE) can generate 25 videos, as shown in the below, achieving roughly a 25 times speedup without performance degradation.

This is an automated message from the Librarian Bot. I found the following papers similar to this paper.

The following papers were recommended by the Semantic Scholar API

Please give a thumbs up to this comment if you found it helpful!

If you want recommendations for any Paper on Hugging Face checkout this Space

You can directly ask Librarian Bot for paper recommendations by tagging it in a comment: @librarian-bot recommend

Sign up or log in to comment

Models citing this paper 4

Datasets citing this paper 0

No dataset linking this paper

Cite arxiv.org/abs/2503.06674 in a dataset README.md to link it from this page.

Spaces citing this paper 0

No Space linking this paper

Cite arxiv.org/abs/2503.06674 in a Space README.md to link it from this page.

Collections including this paper 1