6e1e31a66a 
					 
					
						
						
							
							add code owner  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-17 22:19:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						50e80db4ef 
					 
					
						
						
							
							add mixin  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-17 22:15:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3d6afb355 
					 
					
						
						
							
							add mixin  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-17 22:12:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						808fa43d76 
					 
					
						
						
							
							add mixin  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-17 22:02:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ac510f484 
					 
					
						
						
							
							[Kernels] Enable DeepGEMM by default ( #24462 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-09-17 20:19:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7fb2a5be28 
					 
					
						
						
							
							[V0 Deprecation] Skip PP test ( #25128 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-17 20:18:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6c036615dc 
					 
					
						
						
							
							[V0 Deprecation] Remove misc V0 tests ( #25118 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-17 19:41:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2fc24e94f9 
					 
					
						
						
							
							[V0 Deprecation] Remove V0 Tracing & Metrics tests ( #25115 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-17 19:40:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c3c1bd07a 
					 
					
						
						
							
							[V0 Deprecation] Remove V0 Engine tests ( #25114 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-17 19:38:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5963b98b46 
					 
					
						
						
							
							[Kernel] Delegate construction of FusedMoEQuantConfig to FusedMoEMethodBase subclasses ( #22537 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-09-17 17:43:31 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6585ddb45 
					 
					
						
						
							
							[Bugfix] Fix accuracy issue for silu_mul + nvfp4 quant fusion kernel ( #24833 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-17 16:37:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2a4d6412e6 
					 
					
						
						
							
							Add a batched auto tune script ( #25076 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Karan Goel <karangoel@google.com >
Signed-off-by: Karan Goel <3261985+karan@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-17 22:41:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e67a79db03 
					 
					
						
						
							
							[Bugfix] Refactor Flashinfer TRTLLM attention kernel selection logic ( #24600 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-17 15:36:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9f882d8791 
					 
					
						
						
							
							Disable failing GPT-OSS Eval (Blackwell) for now ( #25107 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-17 15:36:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1a456c7c90 
					 
					
						
						
							
							Aiter mha fp8 fix ( #24991 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Doug Lehr <douglehr@amd.com >
Co-authored-by: Doug Lehr <douglehr@amd.com > 
						
						
					 
					
						2025-09-17 22:29:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fedb75fa27 
					 
					
						
						
							
							[Bugfix][B200] Fix cutlass_mla hang ( #24966 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexander Matveev <amatveev@redhat.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-17 18:06:38 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bff2e5f1d6 
					 
					
						
						
							
							[gpt-oss][2] fix types for streaming ( #24556 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Xia <axia@meta.com > 
						
						
					 
					
						2025-09-17 22:04:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c068c637b 
					 
					
						
						
							
							[Kernel] Faster pre-processing time for W4A8 ( #23972 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: czhu-cohere <conway.zhu@cohere.com > 
						
						
					 
					
						2025-09-17 14:35:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f20c3b0951 
					 
					
						
						
							
							[BUG] Exclude .pth files when pulling remote files  ( #25092 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ahao-anyscale <ahao@anyscale.com > 
						
						
					 
					
						2025-09-17 20:42:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						883131544f 
					 
					
						
						
							
							[Bugfix] Update import path for bc_linter_include ( #24766 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mohammad Miadh Angkad <mangkad.bsdsba2027@aim.edu > 
						
						
					 
					
						2025-09-17 20:33:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee5fd49150 
					 
					
						
						
							
							[Misc] Update owners for KV connector and V1 offloading ( #25041 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ApostaC <yihua98@uchicago.edu > 
						
						
					 
					
						2025-09-17 12:37:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ae9887542 
					 
					
						
						
							
							[V1] Logits processor docs ( #22919 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Feldman <afeldman@redhat.com >
Signed-off-by: afeldman-nm <156691304+afeldman-nm@users.noreply.github.com >
Co-authored-by: Joseph Marinier <Joseph.Marinier@gmail.com > 
						
						
					 
					
						2025-09-17 11:53:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3db5ebb66 
					 
					
						
						
							
							[CI Bugfix] Fix failing test_model_load_with_params tests due to tokenizer refactor ( #25086 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-17 11:15:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d442b7c48 
					 
					
						
						
							
							[V0 Deprecation] Remove V0 tests in test_sequence.py ( #25088 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-17 11:08:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eb68c2dcd9 
					 
					
						
						
							
							[CI] Revert back prepare_prompts and check_answers ( #25087 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-17 11:03:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8b32464ac1 
					 
					
						
						
							
							Change log level from info to debug for IOProcessor ( #24999 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-17 10:21:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						99cc41ad50 
					 
					
						
						
							
							[V0 Deprecation] Remove unused output processor util ( #25023 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-09-17 09:50:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6a518fdde 
					 
					
						
						
							
							Remove unused find_cuda_init helper script ( #25044 )  
						
						 
						
						
						
						
					 
					
						2025-09-17 09:47:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4aa8c7b047 
					 
					
						
						
							
							cleanup: remove adapter commons  ( #25045 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-17 16:46:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b946d693e 
					 
					
						
						
							
							[V0 Deprecation] Remove V0 Core tests ( #25082 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-17 09:32:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						087c6ffc92 
					 
					
						
						
							
							[CI Bugfix] Fix failing test_invalid_env ( #25078 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-17 08:28:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4a2d33e371 
					 
					
						
						
							
							[Docs] vllm/benchmarks/datasets.py fix docstring param format. ( #24970 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: samzong <samzong.lu@gmail.com > 
						
						
					 
					
						2025-09-17 08:11:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f3616f422 
					 
					
						
						
							
							Remove old cutlass mla ( #23961 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com >
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com > 
						
						
					 
					
						2025-09-17 14:31:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						47f670b03b 
					 
					
						
						
							
							[Docs] improve code formatting and comments for eliminate griffe build warning. ( #25010 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: samzong <samzong.lu@gmail.com > 
						
						
					 
					
						2025-09-17 07:31:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd6a910aac 
					 
					
						
						
							
							[Bugfix][Qwen3-Next] fixes the varlen issue in qwen3-next's MTP implementation. ( #24957 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com > 
						
						
					 
					
						2025-09-17 21:59:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b962e2457 
					 
					
						
						
							
							[fix] lora benchmarks pass no_lora_flag_cpu ( #23774 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dylan Maloy <34420038+dolpm@users.noreply.github.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-17 21:22:25 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bfe9380161 
					 
					
						
						
							
							Apply fixes for CUDA 13 ( #24599 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aidyn-A <aidyn.b.aitzhan@gmail.com > 
						
						
					 
					
						2025-09-17 09:15:42 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9fccd04e30 
					 
					
						
						
							
							[Bugfix] Fix Stream usage in CPU model runner and OneDNN kernel check ( #25046 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-09-17 05:54:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						252ada5559 
					 
					
						
						
							
							Add RADIO Vision Encoder Support to vLLM ( #24595 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniel Afrimi <danielafrimi8@gmail.com >
Co-authored-by: root <root@cw-dfw-h100-001-305-026.cm.cluster > 
						
						
					 
					
						2025-09-17 05:53:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e120533d7a 
					 
					
						
						
							
							[Misc] Avoid use of deprecated AutoModelForVision2Seq ( #25065 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-17 12:19:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b85697031 
					 
					
						
						
							
							[BugFix] enable DOTALL to match multi-line tool_call parameters in extract_tool_call_required_streaming ( #24668 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shijun Yin <shijun.yin@outlook.com > 
						
						
					 
					
						2025-09-17 09:21:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						544fe76b95 
					 
					
						
						
							
							[Frontend] Support returning all prompt logprobs ( #24956 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-09-17 09:03:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bb58dc8c20 
					 
					
						
						
							
							[DP] Create placement groups by ray_device_key ( #25026 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com >
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-09-17 08:57:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0fb2551c23 
					 
					
						
						
							
							[Docs] Fix griffe warning in base_static_graph.py ( #25018 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-09-17 08:49:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6c47f6bfa4 
					 
					
						
						
							
							[Core] Remove tokenizer group in vLLM ( #24078 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zhuohan Li <zhuohan123@gmail.com > 
						
						
					 
					
						2025-09-17 08:42:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c15309a730 
					 
					
						
						
							
							[Model] Apply SharedFusedMoE to glm4_moe. ( #24849 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: whx-sjtu <2952154980@qq.com > 
						
						
					 
					
						2025-09-17 16:02:31 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4a9375fe9d 
					 
					
						
						
							
							[Model] Pass param prefix to LLMHead ( #24862 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: whx-sjtu <2952154980@qq.com > 
						
						
					 
					
						2025-09-17 16:01:27 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						03191cd8f0 
					 
					
						
						
							
							[Core][MultiModalHasher] Hash images without converting image mode ( #24969 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-09-17 00:57:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b77bf34e53 
					 
					
						
						
							
							[EPLB] Support EPLB for Mixtral Model ( #22842 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rouchenzi <ruochenwen@gmail.com >
Signed-off-by: rouchenzi <40842833+rouchenzi@users.noreply.github.com >
Co-authored-by: Bowen Wang <abmfy@icloud.com > 
						
						
					 
					
						2025-09-17 07:27:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd39baf717 
					 
					
						
						
							
							[XPU] Fix xpu model runner call torch.cuda APIs ( #25011 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-09-17 06:45:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						43a62c51be 
					 
					
						
						
							
							Add more documentation and improve usability of lognormal dist (benchmark_serving_multi_turn) ( #23255 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: daniels <daniels@pliops.com > 
						
						
					 
					
						2025-09-17 05:53:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca2d1925ef 
					 
					
						
						
							
							[Rocm] [quantization] Fix quark ptpc moe and add test case ( #24649 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Haoyang Li <lihaoyang0109@gmail.com >
Co-authored-by: Haoyang Li <haoyang.li@amd.com > 
						
						
					 
					
						2025-09-16 22:15:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f7acdd73c 
					 
					
						
						
							
							[Model] Support Qwen3-VL Model Series ( #24727 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Huang Jie <92386084+JJJYmmm@users.noreply.github.com >
Co-authored-by: 松灵 <26085463+wulipc@users.noreply.github.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-17 05:01:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5801e49776 
					 
					
						
						
							
							[V0 Deprecation] Remove MQLLMEngine ( #25019 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-09-16 21:29:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						58d4c705a8 
					 
					
						
						
							
							[Core] Get num_encoder_tokens from scheduler config ( #24989 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-16 20:59:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ea3de5ef0d 
					 
					
						
						
							
							[misc] fix typo in value error ( #24995 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com > 
						
						
					 
					
						2025-09-16 20:58:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67532a1a68 
					 
					
						
						
							
							[UX] Remove "quantization is not fully optimized yet" log ( #25012 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-16 20:57:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5672ba90bd 
					 
					
						
						
							
							[Docs] fix invalid doc link ( #25017 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zxw <1020938856@qq.com > 
						
						
					 
					
						2025-09-16 20:53:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd83a157f1 
					 
					
						
						
							
							[UX] Enforce valid choices for envs like VLLM_ATTENTION_BACKEND, etc ( #24761 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-16 20:42:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a411ef6c4 
					 
					
						
						
							
							[Benchmarks] Add MMVU video dataset support and clean up deprecated datasets ( #24719 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-17 03:29:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eeb135eb87 
					 
					
						
						
							
							[Core] Use CpuGpuBuffer for block table tensors ( #24795 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-16 19:18:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3059b9cc6b 
					 
					
						
						
							
							[Doc] Add --force-overwrite option to generate_cmake_presets.py ( #24375 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 18:45:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						64ad551878 
					 
					
						
						
							
							Removes source compilation of nixl dependency ( #24874 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: bbartels <benjamin@bartels.dev >
Signed-off-by: Benjamin Bartels <benjamin@bartels.dev >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Daniele <36171005+dtrifiro@users.noreply.github.com > 
						
						
					 
					
						2025-09-17 01:33:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cef32104b4 
					 
					
						
						
							
							[FP8] Extend per-token-group quantization support to QuantFP8 ( #24342 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tahsin Tunan <tahsintunan@gmail.com >
Signed-off-by: Luka Govedič <lgovedic@redhat.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Luka Govedič <lgovedic@redhat.com > 
						
						
					 
					
						2025-09-16 18:31:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						493b10f8bf 
					 
					
						
						
							
							[CI] GPT-OSS GPQA eval test for Blackwell ( #24920 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-16 18:13:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d119fc8614 
					 
					
						
						
							
							[CI][Bugfix] Fix failing Blackwell test ( #24993 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 15:55:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dbebb7f812 
					 
					
						
						
							
							[Perf] Reuse workspace for FP8+FP4 Marlin MoE ( #20500 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 15:45:10 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3053a22b33 
					 
					
						
						
							
							fp8 kv cache support fix for torch.compile ( #22758 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aleksandr Malyshev <maleksan@amd.com >
Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com >
Co-authored-by: Aleksandr Malyshev <maleksan@amd.com >
Co-authored-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com >
Co-authored-by: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 21:27:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						02d4b85454 
					 
					
						
						
							
							Use kwargs for long lists of EngineCoreRequest arguments in tests and fix extra kwargs ( #24987 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Sansom <andrew@protopia.ai > 
						
						
					 
					
						2025-09-16 14:06:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86daa875fe 
					 
					
						
						
							
							[gpt-oss][1][bugfix] fix streaming final output ( #24466 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Xia <axia@meta.com > 
						
						
					 
					
						2025-09-16 13:56:16 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dcf2f3ec06 
					 
					
						
						
							
							[ROCm] Add dependencies for ROCm ( #24900 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yida Wu <yida.wu@amd.com > 
						
						
					 
					
						2025-09-16 19:49:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						218454b9b2 
					 
					
						
						
							
							[MISC] Add code owners of vllm/v1 to vllm/v1/core ( #24928 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-16 19:07:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4d6eb95cf 
					 
					
						
						
							
							[gpt-oss][1b] streaming add item id, content id ( #24788 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Xia <axia@meta.com > 
						
						
					 
					
						2025-09-16 18:41:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd1f885bcf 
					 
					
						
						
							
							Directly get max encoder len from VLLM config in V1 ( #24866 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sugar-zsg <952242923@qq.com > 
						
						
					 
					
						2025-09-16 17:52:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d593cf28fa 
					 
					
						
						
							
							[Misc] Add removed encoder-decoder models to previously supported models list ( #24961 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-16 10:46:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						faa7a5daac 
					 
					
						
						
							
							[Bugfix] Fix unable to run encoder model when disable_hybrid_kv_cache_manager is true ( #24571 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: lianyibo <lianyibo1@kunlunit.com >
Co-authored-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-16 17:36:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						567939953b 
					 
					
						
						
							
							[Core/DBO][1/N] Add Dual-Batch Overlap mechanism to VLLM ( #23693 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com >
Signed-off-by: Sage Moore <sage@neuralmagic.com >
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com >
Signed-off-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: Lucas Wilkinson <lwilkins@redhat.com >
Co-authored-by: Lucas Wilkinson <lwilkinson@neuralmagic.com >
Co-authored-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 12:21:48 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08369289af 
					 
					
						
						
							
							[Core][MultiModalHasher] Don't convert memoryviews to bytes during hashing ( #24925 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-09-16 15:32:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						73cfb3c5ee 
					 
					
						
						
							
							[Model] Clean up and simplify Mamba2 Metadata Usage in both V0 and V1 ( #24331 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 14:53:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e5affeaa1 
					 
					
						
						
							
							[CI] Add Decode Context Parallelism (DCP) test to CI ( #24487 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-09-16 21:21:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e4f0b4cd96 
					 
					
						
						
							
							(doc): set cmake c++ compatible standard when building on MacOS CPU. ( #23483 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: teekenl <teekenlau@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 06:08:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de3e53a75b 
					 
					
						
						
							
							feat: Add Grafana and Perces monitoring dashboards for vLLM ( #23498 )  
						
						 
						
						
						
						
					 
					
						2025-09-16 05:53:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						85e0df1392 
					 
					
						
						
							
							[Docs] move benchmarks README to contributing guides ( #24820 )  
						
						 
						
						
						
						
					 
					
						2025-09-16 05:52:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0faf3cc3e8 
					 
					
						
						
							
							Move SpeculativeConfig from config/__init__.py to config/speculative.py ( #24904 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 12:51:35 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ea5c73ad7 
					 
					
						
						
							
							[Feat][EPLB] A novel static EPLB placement strategy for MoE models. ( #23745 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: bruceszchen <bruceszchen@tencent.com >
Signed-off-by: Chen Bruce <bruceszchen@tencent.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Signed-off-by: Chen Bruce <cszwwdz@vip.qq.com >
Co-authored-by: lemon412 <lemon412@foxmail.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 10:55:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27fcfe7bcf 
					 
					
						
						
							
							[Mamba] Support TP>1 with quantization for mamba2 mixer in case n_groups % tp_size == 0 ( #24593 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com >
Signed-off-by: tomeras91 <57313761+tomeras91@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-16 10:51:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68dbde5dbb 
					 
					
						
						
							
							[Bugfix] remove duplicate tokens streamed in required tool choice streaming ( #23312 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jason Cheng <jasoncky96@gmail.com >
Co-authored-by: Chauncey <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-09-16 15:16:32 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04ad0dc275 
					 
					
						
						
							
							[benchmark] Add triton version in the moe tuned config ( #24769 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-16 14:10:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						238c4c1705 
					 
					
						
						
							
							[QWEN NEXT] Fused MoE kernels Optimization configs ( #24924 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Saman Keon <samanamp@outlook.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-16 13:06:03 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c54610265 
					 
					
						
						
							
							[Bug] [Spec Dec]: Fix kv_cache dtype mismatch for Eagle3 drafter on FP8 target ( #24505 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-09-16 04:45:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						17871983a2 
					 
					
						
						
							
							[Bugfix] Fix sequence parallelism bug when enable pipeline parallelism ( #24021 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com > 
						
						
					 
					
						2025-09-16 04:32:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						759ef49b15 
					 
					
						
						
							
							Remove V0 Encoder-Decoder Support ( #24907 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-09-15 21:17:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5206ab20ba 
					 
					
						
						
							
							[XPU] Fix circular import error.  ( #24927 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-09-16 03:35:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0af3ce1355 
					 
					
						
						
							
							Upgrade flashinfer to 0.3.1 ( #24470 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-16 02:36:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e1279ef00f 
					 
					
						
						
							
							[Docs] Update instructions for how to using existing torch binary ( #24892 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-16 02:25:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2942970d44 
					 
					
						
						
							
							[Metrics] Hide deprecated metrics with gpu_ prefix ( #24245 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-09-15 20:15:57 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c96e7b8a1 
					 
					
						
						
							
							[CI] Small Accuracy Eval Test for Deepseek Model ( #24259 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-15 20:14:50 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b42566f440 
					 
					
						
						
							
							[Bug] Fix is_flashmla_supported Check Error ( #24774 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-15 20:10:55 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d96e11167d 
					 
					
						
						
							
							Add pytest-cov and .coveragerc ( #24778 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Reza Barazesh <rezabarazesh@meta.com > 
						
						
					 
					
						2025-09-15 20:08:46 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2891603efd 
					 
					
						
						
							
							[ROCm][Bugfix] Fix the case where there's bias ( #24895 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-09-15 20:05:12 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de2cc3d867 
					 
					
						
						
							
							[Deprecation] Remove DeepGEMM Old Symbol Wrapper ( #24902 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-15 20:03:29 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e95084308b 
					 
					
						
						
							
							Updated CODEOWNERS for flashinfer, mla, fused_moe ( #24906 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-16 02:01:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f6f2c1182 
					 
					
						
						
							
							HuggingFace -> Hugging Face in Integration with Hugging Face docs (#24889 )  
						
						 
						
						
						
						
					 
					
						2025-09-15 17:28:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5bcc153d7b 
					 
					
						
						
							
							[Compile] Fix noop_elimination pass and add tests for noop_elimination ( #24880 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-09-15 23:33:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						45bfa49cb8 
					 
					
						
						
							
							[Tests] fix initialization of kv hash in tests ( #24273 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mickael Seznec <mickael@mistral.ai > 
						
						
					 
					
						2025-09-15 21:48:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fd2f10546c 
					 
					
						
						
							
							[ci] fix wheel names for arm wheels ( #24898 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-09-15 14:39:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e757a629e7 
					 
					
						
						
							
							[Bug] Fix Cutlass Scaled MM Compilation Error ( #24887 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-15 17:21:17 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aae725af7c 
					 
					
						
						
							
							[Performance] Remove redundant clone() calls in cutlass_mla ( #24891 )  
						
						 
						
						
						
						
					 
					
						2025-09-15 20:21:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						73df49ef3a 
					 
					
						
						
							
							[gpt-oss][1a] create_responses stream outputs BaseModel type, api server is SSE still ( #24759 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Xia <axia@meta.com > 
						
						
					 
					
						2025-09-15 13:08:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						25aba2b6a3 
					 
					
						
						
							
							[gpt-oss] Add IncompleteDetails to ResponsesRepsonse ( #24561 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Xia <axia@meta.com > 
						
						
					 
					
						2025-09-15 13:07:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94b03f88dd 
					 
					
						
						
							
							Bump Flashinfer to 0.3.1 ( #24868 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: bbartels <benjamin@bartels.dev > 
						
						
					 
					
						2025-09-15 12:45:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						49bfc538e4 
					 
					
						
						
							
							Update num_tokens_across_dp to use nccl instead of gloo ( #24105 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sage Moore <sage@neuralmagic.com > 
						
						
					 
					
						2025-09-15 19:05:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0b26701c9 
					 
					
						
						
							
							[Transform] Deterministic Hadacore Transforms ( #24106 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyle Sayers <kylesayrs@gmail.com > 
						
						
					 
					
						2025-09-15 12:59:31 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c4afdb69cc 
					 
					
						
						
							
							Move MultiModalConfig from config/__init__.py to config/multimodal.py ( #24659 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-15 17:43:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b834b4cbf1 
					 
					
						
						
							
							[USAGE] Improve error handling for weight initialization in Unquantized… ( #20321 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rafael Marcelino Koike <rafael.koike@oracle.com >
Signed-off-by: Rafael Koike <koike.rafael@gmail.com > 
						
						
					 
					
						2025-09-15 16:45:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						740f0647b1 
					 
					
						
						
							
							Reinstate existing torch script ( #24729 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-15 09:43:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01413e0cf5 
					 
					
						
						
							
							Fp8 paged attention update ( #22222 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiao Yu <xiao.yu@amd.com >
Signed-off-by: xiao-llm <xiao.yu.dc@outlook.com >
Co-authored-by: Xiao Yu <xiao.yu@metamaterial.com >
Co-authored-by: Xiao Yu <xiao.yu@amd.com >
Co-authored-by: Bowen Bao <bowenbao@amd.com > 
						
						
					 
					
						2025-09-15 10:43:26 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e219cd50b 
					 
					
						
						
							
							[Bugfix] Fix GLM4.1V multimodal processor with compatability for Transformers v4.56 ( #24822 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-15 20:45:06 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72c99f2a75 
					 
					
						
						
							
							[Model]: support Ling2.0 ( #24627 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vito.yy <vito.yy@antgroup.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-15 05:09:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf214ca226 
					 
					
						
						
							
							[Misc] Fix examples openai_pooling_client.py  ( #24853 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-15 11:57:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e41f5abca 
					 
					
						
						
							
							[XPU] Set consistent default KV cache layout ( #24745 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-15 18:09:34 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bc0f6059a2 
					 
					
						
						
							
							[UT] enhance free kv cache block queue popleft_n ( #24220 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-09-15 10:04:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8de261b04a 
					 
					
						
						
							
							[P/D]kv_output_aggregator support P TP > D TP ( #23917 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: LCAIZJ <leichao139636@163.com >
Co-authored-by: leichao.lc <leichao.lc@antgroup.com > 
						
						
					 
					
						2025-09-15 11:36:06 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0d8b9738d 
					 
					
						
						
							
							[Misc] Own KVConnectors installation ( #24867 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-15 02:21:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59e17dd4a0 
					 
					
						
						
							
							[Misc] rename interval to max_recent_requests ( #24229 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-09-15 09:18:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4979eb79da 
					 
					
						
						
							
							[Doc]: fix typos in various files ( #24821 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-15 01:08:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8c0f59973 
					 
					
						
						
							
							[Bugfix] MiDashengLM model contact error under concurrent testing ( #24738 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chenbing8 <chenbing8@xiaomi.com >
Signed-off-by: bingchen-mi <chenbing8@xiaomi.com > 
						
						
					 
					
						2025-09-15 06:38:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4a948f33f 
					 
					
						
						
							
							[Frontend] Skip stop in reasoning content ( #14550 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ce Gao <cegao@tensorchord.ai >
Co-authored-by: Chauncey <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-09-15 06:04:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3f3313981c 
					 
					
						
						
							
							[kv cache] update num_free_blocks in the end ( #24228 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-09-15 05:15:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78818dd1b0 
					 
					
						
						
							
							[Docs] Have a try to improve frameworks/streamlit.md ( #24841 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-09-14 21:50:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e5cdcda4e 
					 
					
						
						
							
							[Hybrid Allocator] Support Pipeline Parallel ( #23974 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-14 15:55:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90f3f7d73e 
					 
					
						
						
							
							[Spec Decoding]Support Spec Decoding Metrics in DP Mode ( #24049 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wuhang <wuhang6@huawei.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com > 
						
						
					 
					
						2025-09-14 21:11:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6dc8da5dc1 
					 
					
						
						
							
							[Chore] Remove ipex_ops warning ( #24835 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com > 
						
						
					 
					
						2025-09-14 19:41:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						79cbcab871 
					 
					
						
						
							
							Force use C++17 globally to avoid compilation error ( #24823 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chenfengjin <1871653365@qq.com > 
						
						
					 
					
						2025-09-14 19:30:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ff68035932 
					 
					
						
						
							
							[Benchmarks] Throw usage error when using dataset-name random and dataset-path together ( #24819 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-09-14 17:50:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1177dd53e9 
					 
					
						
						
							
							fix type of sampling rate for encode_base64 ( #24826 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: co63oc <co63oc@users.noreply.github.com > 
						
						
					 
					
						2025-09-14 16:17:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc2dbcda8b 
					 
					
						
						
							
							[Perf] Fix DeepGEMM Contiguous Layout Issue, 5.5% Throughput Improvement ( #24783 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com > 
						
						
					 
					
						2025-09-14 11:20:17 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fec347dee1 
					 
					
						
						
							
							[Misc] Improve s3_utils type hints with BaseClient ( #24825 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-09-14 12:11:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc3173ae98 
					 
					
						
						
							
							[Multi Modal][Performance] Fused Q,K's apply_rope into one ( #24511 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-14 08:10:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e903b6cb4 
					 
					
						
						
							
							[Chore] Minor simplification for non-PP path ( #24810 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-09-13 17:41:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						973c9d01da 
					 
					
						
						
							
							[Minor] Simplify duplicative device check for cuda ( #24793 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ziliang Peng <ziliangdotme@gmail.com > 
						
						
					 
					
						2025-09-13 18:28:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						15b8fef453 
					 
					
						
						
							
							Remove redundant assignment in xfer_buffers, This is a little fix ( #24732 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ChenTaoyu-SJTU <ctynb@qq.com > 
						
						
					 
					
						2025-09-13 08:11:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cfa3234a5b 
					 
					
						
						
							
							[CI][Spec Decode] Adjust threshold for flaky ngram spec decoding test again ( #24771 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com > 
						
						
					 
					
						2025-09-13 15:45:11 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41ae4a1eab 
					 
					
						
						
							
							[Doc]: fix typos in various files ( #24798 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-13 00:43:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4dad72f0d9 
					 
					
						
						
							
							[Misc] Correct an outdated comment. ( #24765 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-13 00:34:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59d7ffc17f 
					 
					
						
						
							
							[CI Failure] Fix test_flashinfer_cutlass_mxfp4_mxfp8_fused_moe ( #24750 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-13 07:29:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1da0f1441d 
					 
					
						
						
							
							[Core][Multimodal] Cache supports_kw ( #24773 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-09-13 07:27:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98229db244 
					 
					
						
						
							
							[Kernels][DP/EP] Optimize Silu Kernel for R1 ( #24054 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvircrn <elvircrn@gmail.com > 
						
						
					 
					
						2025-09-13 00:17:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dbeee3844c 
					 
					
						
						
							
							[Perf] Use NVIDIA hardware-accelerated instruction for float to fp8_e4m3 quantization ( #24757 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-09-13 00:16:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						30498f2a65 
					 
					
						
						
							
							[Doc]: Remove 404 hyperlinks ( #24785 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rakesh Asapanna  <45640029+rozeappletree@users.noreply.github.com > 
						
						
					 
					
						2025-09-13 00:15:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						abc7989adc 
					 
					
						
						
							
							[Docs] Remove Neuron install doc as backend no longer exists ( #24396 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-13 00:15:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a8966bcc2 
					 
					
						
						
							
							[Docs] Fix warnings in mkdocs build (continued) ( #24791 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-09-13 00:13:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5febdc8750 
					 
					
						
						
							
							[Chore] Remove unused batched RoPE op & kernel ( #24789 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-13 00:08:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						99bfef841f 
					 
					
						
						
							
							[Bugfix] Fix GPUModelRunner has no attribute lora_manager ( #24762 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-12 23:55:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						89e08d6d18 
					 
					
						
						
							
							[Model] Add Olmo3 model implementation ( #24534 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shane A <shanea@allenai.org >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-13 03:26:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f2ea7074e 
					 
					
						
						
							
							[Frontend][Multimodal] Allow skipping media data when UUIDs are provided.  ( #23950 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io >
Signed-off-by: Chenheli Hua <huachenheli@outlook.com >
Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Roger Wang <hey@rogerw.io >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-09-13 02:16:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4fdd6f5cbf 
					 
					
						
						
							
							[Core] Support async scheduling with uniproc executor  ( #24219 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Signed-off-by: Ronald1995 <ronaldautomobile@163.com >
Co-authored-by: Ronald1995 <ronaldautomobile@163.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com > 
						
						
					 
					
						2025-09-12 16:34:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8226dd56bf 
					 
					
						
						
							
							[Qwen3Next] Fixes the cuda graph capture conditions under large batch sizes ( #24660 ) ( #24667 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com > 
						
						
					 
					
						2025-09-12 22:31:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5fe643fc26 
					 
					
						
						
							
							Add FLASHINFER_MLA to backend selector test ( #24753 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com > 
						
						
					 
					
						2025-09-12 22:30:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ba32aa60b 
					 
					
						
						
							
							[Attention][FlashInfer] Enable FP8 FlashInfer (TRTLLM) MLA decode ( #24705 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com > 
						
						
					 
					
						2025-09-12 15:45:53 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c89ed8de43 
					 
					
						
						
							
							Invert pattern order to make sure that out_proj layers are identified ( #24781 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexandre Marques <almarque@redhat.com > 
						
						
					 
					
						2025-09-12 14:45:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3beadc2f25 
					 
					
						
						
							
							[Compilation Bug] Fix Inductor Graph Output with Shape Issue ( #24772 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-12 21:23:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bc636f21a6 
					 
					
						
						
							
							[Benchmark] Allow arbitrary headers to be passed to benchmarked endpoints ( #23937 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Clayton Coleman <smarterclayton@gmail.com > 
						
						
					 
					
						2025-09-12 13:57:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						017354c0ef 
					 
					
						
						
							
							[CI] Trigger BC Linter when labels are added/removed ( #24767 )  
						
						 
						
						
						
						
					 
					
						2025-09-12 11:44:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						010acc6e1e 
					 
					
						
						
							
							[Bugfix] Fix incompatibility between  #20452  and  #24548  ( #24754 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-12 11:17:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8c42597ab 
					 
					
						
						
							
							[CI] Speed up model unit tests in CI ( #24253 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Feldman <afeldman@redhat.com > 
						
						
					 
					
						2025-09-12 10:36:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d2a44606d 
					 
					
						
						
							
							[UX] Remove AsyncLLM torch profiler disabled log ( #24609 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-12 10:08:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f17c075884 
					 
					
						
						
							
							[Model] Switch to Fused RMSNorm in GLM-4.1V model ( #24733 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: SamitHuang <285365963@qq.com > 
						
						
					 
					
						2025-09-12 09:12:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b0d1213ac3 
					 
					
						
						
							
							[Models] Prevent CUDA sync in Qwen2.5-VL ( #24741 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-09-12 16:03:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						57f94e88ea 
					 
					
						
						
							
							[Models] Optimise and simplify _validate_and_reshape_mm_tensor ( #24742 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-09-12 15:37:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						684b6870e1 
					 
					
						
						
							
							[Bugfix][Frontend] Fix --enable-log-outputs does not match the documentation ( #24626 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-09-12 08:01:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a5b84f1cbf 
					 
					
						
						
							
							[Core] Shared memory based object store for Multimodal data caching and IPC ( #20452 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: donglu <donglu@cohere.com > 
						
						
					 
					
						2025-09-12 07:54:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9f04d9d55f 
					 
					
						
						
							
							[Qwen3-Next] MoE configs for H100 TP=1,2 and TP2/EP ( #24739 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvircrn <elvircrn@gmail.com > 
						
						
					 
					
						2025-09-12 07:54:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d7c1d531b 
					 
					
						
						
							
							[Bugfix] Fix MRoPE dispatch on XPU ( #24724 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yan Ma <yan.ma@intel.com > 
						
						
					 
					
						2025-09-12 21:43:56 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41f17bf290 
					 
					
						
						
							
							[Docs] Fix warnings in mkdocs build (continued) ( #24740 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-09-12 06:43:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bcb06d7baf 
					 
					
						
						
							
							[Doc]: fix typos in various files ( #24726 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-12 06:43:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0377802c20 
					 
					
						
						
							
							[Multimodal] Remove legacy multimodal fields in favor of MultiModalFeatureSpec  ( #24548 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com > 
						
						
					 
					
						2025-09-12 21:42:23 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72fc8aa412 
					 
					
						
						
							
							[Multi Modal] Add FA3 in VIT ( #24347 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com > 
						
						
					 
					
						2025-09-12 21:27:24 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fdb09c77d6 
					 
					
						
						
							
							[sleep mode] save memory for on-the-fly quantization ( #24731 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-09-12 11:25:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7a1c4025f1 
					 
					
						
						
							
							[Kernel] [CPU] refactor cpu_attn.py:_run_sdpa_forward for better memory access ( #24701 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ignaciosica <mignacio.sica@gmail.com > 
						
						
					 
					
						2025-09-12 19:23:07 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						60a0951924 
					 
					
						
						
							
							[Bugfix] Fix BNB name match ( #24735 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-12 11:12:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						64d90c3e4f 
					 
					
						
						
							
							[Misc][gpt-oss] Add gpt-oss label to PRs that mention harmony or related to builtin tool call ( #24717 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-12 18:57:07 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59d5d2c736 
					 
					
						
						
							
							[CI/Build] Skip prompt embeddings tests on V1-only CPU backend ( #24721 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-09-12 18:51:01 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d21a36f5f9 
					 
					
						
						
							
							[CI] Add ci_envs for convenient local testing ( #24630 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-09-12 08:52:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						561a0baee0 
					 
					
						
						
							
							[CI] Fix flaky test  v1/worker/test_gpu_model_runner.py::test_kv_cache_stride_order          ( #24640 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-12 07:49:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f592b3174b 
					 
					
						
						
							
							[BugFix] Fix Qwen3-Next PP ( #24709 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-11 23:35:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7920de0a2a 
					 
					
						
						
							
							[Bugfix] Fix MRoPE dispatch on CPU ( #24712 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-09-12 04:56:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ddcec289c7 
					 
					
						
						
							
							Fix implementation divergence for BLOOM models between vLLM and HuggingFace when using prompt embeds ( #24686 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Sansom <andrew@protopia.ai > 
						
						
					 
					
						2025-09-12 04:35:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e090b7b45b 
					 
					
						
						
							
							Enable conversion of multimodal models to pooling tasks ( #24451 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-09-12 03:30:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6a50eaa0d3 
					 
					
						
						
							
							[DOCs] Update ROCm installation docs section ( #24691 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-09-11 20:02:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						12a8414d81 
					 
					
						
						
							
							[Qwen3-Next] MoE configs for H20 TP=1,2,4,8 ( #24707 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-12 10:06:26 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						880c741bb6 
					 
					
						
						
							
							[Bugfix] fixes the causal_conv1d_update kernel update non-speculative decoding cases ( #24680 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-11 18:16:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						40b6c9122b 
					 
					
						
						
							
							[V1] feat:add engine v1 tracing ( #20372 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com >
Signed-off-by: Ye Zhang <zhysishu@gmail.com >
Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com >
Signed-off-by: simon-mo <simon.mo@hey.com >
Signed-off-by: Aaron Pham <contact@aarnphm.xyz >
Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com >
Co-authored-by: Ye Zhang <zhysishu@gmail.com >
Co-authored-by: Benjamin Bartels <benjamin@bartels.dev >
Co-authored-by: simon-mo <simon.mo@hey.com >
Co-authored-by: 瑜琮 <ly186375@antfin.com >
Co-authored-by: Aaron Pham <contact@aarnphm.xyz >
Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 17:10:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e6bc46821 
					 
					
						
						
							
							[Startup] Make DeepGEMM warmup scale with max-num-batched-tokens ( #24693 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-09-11 20:10:19 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fcba05c435 
					 
					
						
						
							
							[Bug] Fix Layer weight_block_size Assertion Issue ( #24674 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-11 19:47:59 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7a30fa8708 
					 
					
						
						
							
							[Doc] Clarify cudagraph capture size logic and default behavior in scheduler ( #18698 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zazzle516 <2405677060@qq.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 23:18:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f82f7a8990 
					 
					
						
						
							
							[Qwen3-Next] MOE configs for H100 TP4 ( #24699 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-11 15:45:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3aea10dc8 
					 
					
						
						
							
							[Perf] Use upstream CUTLASS for SM90 Block FP8 kernel ( #23280 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 15:43:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d4fd2768ef 
					 
					
						
						
							
							[Bugfix][Attention] Fix FlashInfer MLA block size logic ( #24692 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com > 
						
						
					 
					
						2025-09-11 22:39:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7a70a71892 
					 
					
						
						
							
							[Qwen3-Next] Add B200 MoE configs for Qwen3-next ( #24698 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com > 
						
						
					 
					
						2025-09-11 15:34:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7d4651997a 
					 
					
						
						
							
							[CI/Build] Add bc-linter to vLLM CI ( #21234 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zhewenli <zhewenli@meta.com > 
						
						
					 
					
						2025-09-11 15:34:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						569bf1c9c0 
					 
					
						
						
							
							[Qwen3-Next] MoE configs for H200 TP=1,2,4 ( #24695 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-09-11 14:38:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1ec20355f5 
					 
					
						
						
							
							[Bugfix] Set VLLM_ALLREDUCE_USE_SYMM_MEM default to False ( #24696 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-11 14:32:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e42af78b18 
					 
					
						
						
							
							[flashinfer] [kernel] support for fp8 kv cache for trtllm prefill attention ( #24197 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiaozhu <mxz297@gmail.com > 
						
						
					 
					
						2025-09-11 14:20:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						074854b24f 
					 
					
						
						
							
							[Kernel][B200] mxfp4 fused cutlass moe ( #23696 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Duncan Moss <djm.moss@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-11 17:04:56 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						79ac59f32e 
					 
					
						
						
							
							Update Spec Decode metrics to include drafted and accepted token throughput ( #24127 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Xia <axia@meta.com > 
						
						
					 
					
						2025-09-11 19:58:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b971f91504 
					 
					
						
						
							
							[BugFix] Fix tokenize asyncio task leak ( #24677 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-11 19:44:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c733bd5e87 
					 
					
						
						
							
							[Qwen3-Next] Add MoE Config for H200 ( #24688 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-09-11 12:40:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a892b259b4 
					 
					
						
						
							
							[Doc] Remove Useless Comments ( #24687 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-11 12:25:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						127ded0a9e 
					 
					
						
						
							
							[Ultravox] Use wrapped_model_config to instantiate inner model ( #24679 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Salas <peter@fixie.ai > 
						
						
					 
					
						2025-09-11 18:52:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bb2b5126da 
					 
					
						
						
							
							[VLM] Migrate remain DP-supported ViT models to use disable_tp ( #24363 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-11 18:30:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						361ae27f8a 
					 
					
						
						
							
							[Docs] Fix formatting of transcription doc ( #24676 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 11:18:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e26fef8397 
					 
					
						
						
							
							fix some typos ( #24616 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: co63oc <co63oc@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 10:48:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c1eda615ba 
					 
					
						
						
							
							Fix model name included in responses ( #24663 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 10:47:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4aa23892d6 
					 
					
						
						
							
							[Bugfix] Fix platform-specific routing in CustomOp implementations ( #24444 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Konrad Zawora <kzawora@habana.ai > 
						
						
					 
					
						2025-09-11 17:15:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1fdd5c42d7 
					 
					
						
						
							
							[Kernels] Enable Torch Symmetric Memory All-Reduce By Default ( #24111 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ilmarkov <markovilya197@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-11 09:45:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bcbe2a4d9e 
					 
					
						
						
							
							[VLM] Optimize GLM4.5-V-style video processing to only decode necessary frames ( #24161 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-11 09:44:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51d41265ad 
					 
					
						
						
							
							[Docs] Fix typos in EP deployment doc ( #24669 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 09:07:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4984a291d5 
					 
					
						
						
							
							[Doc] Fix Markdown Pre-commit Error ( #24670 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-11 09:05:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						404c85ca72 
					 
					
						
						
							
							[Docs] Add transcription support to model ( #24664 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-11 07:39:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						817beef7f3 
					 
					
						
						
							
							[Bugifx] Fix qwen-next packed_modules_mapping ( #24656 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-11 22:26:17 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f6593b058 
					 
					
						
						
							
							[HybridKVCache][Platform] Add support_hybrid_kv_cache for platform ( #24646 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: MengqingCao <cmq0113@163.com > 
						
						
					 
					
						2025-09-11 21:47:58 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94e6b2d55f 
					 
					
						
						
							
							Allow users to specify kv cache memory size ( #21489 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Boyuan Feng <boyuan@meta.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 13:41:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fd1ce98cdd 
					 
					
						
						
							
							[CI] Split mteb test from Language Models Test ( #24634 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-09-11 06:37:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d11ec124a0 
					 
					
						
						
							
							[Bench] Add qwen-next in benchmark_moe.py ( #24661 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-11 21:29:43 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f510715882 
					 
					
						
						
							
							[build] add torch to tool.uv no-build-isolation-package ( #24303 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 13:19:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f946197473 
					 
					
						
						
							
							[Docs] Fixes a typo in the qwen3next model name. ( #24654 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com > 
						
						
					 
					
						2025-09-11 19:35:14 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0cd72a7b72 
					 
					
						
						
							
							[XPU] add missing dependency tblib for XPU CI ( #24639 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Fanli Lin <fanli.lin@intel.com > 
						
						
					 
					
						2025-09-11 11:22:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f5271f1ee 
					 
					
						
						
							
							Move LoRAConfig from config/__init__.py to config/lora.py ( #24644 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 11:01:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6249d0699 
					 
					
						
						
							
							Fix typing for safetensors_load_strategy ( #24641 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-11 10:41:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						25bb9e8c65 
					 
					
						
						
							
							[CI Failure] fix models/language/pooling/test_auto_prefix_cache_support.py ( #24636 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-09-11 03:31:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a1213fae5f 
					 
					
						
						
							
							[Misc] Add @NickLucche to codeowners ( #24647 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-11 17:18:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8b0361c92 
					 
					
						
						
							
							[CI] Split pooling from entrypoints Test ( #24632 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-09-11 01:53:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed5ae4aace 
					 
					
						
						
							
							[Bugfix] Fix _synced_weight_loader ( #24565 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyuyeun Kim <kyuyeunk@google.com > 
						
						
					 
					
						2025-09-11 16:52:33 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0fc36463e0 
					 
					
						
						
							
							[CI]Add transformers_utils to Async Engine, Inputs, Utils, Worker Test ( #24615 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com > 
						
						
					 
					
						2025-09-11 01:52:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d14c4ebf08 
					 
					
						
						
							
							[Docs] Use 1-2-3 list for deploy steps in deployment/frameworks/ ( #24633 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-09-11 01:50:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba6011027d 
					 
					
						
						
							
							[Docs] Update V1 doc to reflect whisper support ( #24606 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-11 01:50:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						85df8afdae 
					 
					
						
						
							
							[Docs] Revise frameworks/anything-llm.md ( #24489 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-09-11 01:50:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6aeb1dab4a 
					 
					
						
						
							
							[Bugfix] Fix incorrect import of CacheConfig ( #24631 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-11 01:48:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e93f4cc9e3 
					 
					
						
						
							
							Add the support for the qwen3 next model (a hybrid attention model). ( #24526 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-11 15:32:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2048c4e379 
					 
					
						
						
							
							[torchao] Support quantization configs using module swap ( #21982 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jerry Zhang <jerryzh168@gmail.com > 
						
						
					 
					
						2025-09-10 23:53:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d13360183a 
					 
					
						
						
							
							Remove redundant all gather + split ( #23441 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Chenxi Yang <cxyang@meta.com >
Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com > 
						
						
					 
					
						2025-09-10 23:45:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9bd831f501 
					 
					
						
						
							
							[Model] New model support for Motif-1-Tiny ( #23414 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ca1207 <ca1207zzz@gmail.com >
Signed-off-by: TaehyunKim <73943231+ca1207@users.noreply.github.com >
Co-authored-by: WyldeCat <skan1543@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-10 23:29:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e2b1f863aa 
					 
					
						
						
							
							[Doc]: fixing doc typos ( #24635 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-10 23:19:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41329a0ff9 
					 
					
						
						
							
							[Core] feat: Add --safetensors-load-strategy flag for faster safetensors loading from Lustre ( #24469 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shiqi Sheng <shengshiqi@google.com >
Signed-off-by: shengshiqi-google <160179165+shengshiqi-google@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-10 23:10:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee0bc5e1b4 
					 
					
						
						
							
							Enable --profile in 'vllm bench throughput' ( #24575 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tomas Ruiz <tomas.ruiz.te@gmail.com > 
						
						
					 
					
						2025-09-10 23:06:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d1393f6fc 
					 
					
						
						
							
							Kimi K2 Fused MoE kernels Optimization configs ( #24597 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Saman Keon <samanamp@outlook.com > 
						
						
					 
					
						2025-09-10 23:06:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a894084d2 
					 
					
						
						
							
							[Engine][Chore] use local variable and remove output var assignment ( #24554 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guy Stone <guys@spotify.com > 
						
						
					 
					
						2025-09-10 23:05:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e2d8c27f68 
					 
					
						
						
							
							[BugFix] Fix pipeline parallel ( #24621 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-10 23:05:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						29799ddacc 
					 
					
						
						
							
							[Bugfix] Add missing VIT backend dispatch on CPU ( #24623 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-09-10 22:28:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f17a6aa4ec 
					 
					
						
						
							
							[Ultravox] Fix Gemma instantiation, support quantization via --hf-overrides ( #24131 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Salas <peter@fixie.ai > 
						
						
					 
					
						2025-09-10 22:25:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6c8deacd72 
					 
					
						
						
							
							[Bug] [Spec Decode] Fix model_initialization test and mismatch in aux_hidden_layers ( #24613 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com >
Signed-off-by: Roger Wang <hey@rogerw.io >
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Roger Wang <hey@rogerw.io >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-09-10 21:23:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55b823ba0f 
					 
					
						
						
							
							Add @chaunceyjiang to codeowner for reasoning Reasoning and Tool parser ( #24406 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-09-11 04:23:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c5a747246 
					 
					
						
						
							
							[distributed] update known issues ( #24624 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-09-11 11:09:38 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5931b7e5d9 
					 
					
						
						
							
							[Models][Quantization] Add quantization configuration update in Voxtral model ( #24122 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexandre Marques <almarque@redhat.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-10 19:13:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc99baf14d 
					 
					
						
						
							
							[Misc] Make timeout passable in init_distributed_environment ( #24522 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jberkhahn <jaberkha@us.ibm.com > 
						
						
					 
					
						2025-09-10 15:41:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dcb28a332b 
					 
					
						
						
							
							[Kernel] Flashinfer MLA (trtllm-gen) decode kernel integration ( #21078 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: hjjq <hanjieq@nvidia.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-09-10 15:31:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fba7856581 
					 
					
						
						
							
							[Perf] Warmup FlashInfer attention during startup ( #23439 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Signed-off-by: Luka Govedič <lgovedic@redhat.com >
Co-authored-by: Luka Govedič <lgovedic@redhat.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com >
Co-authored-by: Matthew Bonanni <mbonanni001@gmail.com > 
						
						
					 
					
						2025-09-10 15:03:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b5e383cd8b 
					 
					
						
						
							
							[gpt-oss] raise error for flashinfer backend without trtllm ( #24482 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-10 14:33:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a161307f5 
					 
					
						
						
							
							[torch.compile][ROCm][V1] Enable attention output FP8 fusion for V1 attention backends ( #19767 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com >
Signed-off-by: Luka Govedič <lgovedic@redhat.com >
Co-authored-by: Luka Govedič <lgovedic@redhat.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-09-10 13:59:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37e8182bfe 
					 
					
						
						
							
							[v1] Add Whisper model support (encoder-decoder) ( #21088 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-10 13:53:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4db4426404 
					 
					
						
						
							
							[CI] Fail subprocess tests with root-cause error ( #23795 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-10 13:53:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0933c3bd6 
					 
					
						
						
							
							[Bugfix] Enable FP8 KV cache for FlashInfer and Triton backend on non-sm100 GPUs ( #24577 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thien Tran <gau.nernst@yahoo.com.sg > 
						
						
					 
					
						2025-09-10 12:33:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						09e68bce34 
					 
					
						
						
							
							[Misc] update log level debug to warning when process port is used by ( #24226 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-10 11:32:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9fb74c27a7 
					 
					
						
						
							
							[Core] Support configuration parsing plugin ( #24277 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com >
Signed-off-by: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-10 11:32:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4032949630 
					 
					
						
						
							
							[Bugfix] Fix DeepEP config for DP4TP4 ( #23619 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-09-10 10:37:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08abfa78ec 
					 
					
						
						
							
							[Bugfix] fix modelopt exclude_modules name mapping ( #24178 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-10 10:20:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2bef2d1405 
					 
					
						
						
							
							[Logging] allow config logging stream ( #24336 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shiyan Deng <dsy842974287@meta.com > 
						
						
					 
					
						2025-09-10 15:02:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						36cacd0958 
					 
					
						
						
							
							[Doc] Add documentation for GLM-4.5 series models: tool-calling and reasoning parser ( #24589 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: WangErXiao <863579016@qq.com > 
						
						
					 
					
						2025-09-10 07:50:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bb3eb80d92 
					 
					
						
						
							
							[Core] Split LoRA layers ( #24574 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-10 07:47:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fcc0a3130a 
					 
					
						
						
							
							[CI] Fix tensorizer test assertion ( #24545 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Schuurman <psch@google.com > 
						
						
					 
					
						2025-09-10 06:57:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						736569da8d 
					 
					
						
						
							
							[Platform] Custom ops support for LMhead and LogitsProcessor ( #23564 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zzhx1 <zzh_201018@outlook.com > 
						
						
					 
					
						2025-09-10 06:26:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2eb9986a2d 
					 
					
						
						
							
							[BugFix] python collect_env.py and vllm collect-env compatibility with uv venv ( #24066 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kay Yan <kay.yan@daocloud.io > 
						
						
					 
					
						2025-09-10 21:25:33 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ccee371e86 
					 
					
						
						
							
							[Docs] Fix warnings in mkdocs build (continued) ( #24092 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-10 06:23:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c0bd6a684a 
					 
					
						
						
							
							Fix Auto_Round Quatization Loading on SM75 and Lower GPUs ( #24217 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: RoadToNowhereX <37441177+RoadToNowhereX@users.noreply.github.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-10 06:22:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3144d90217 
					 
					
						
						
							
							fix some typos ( #24167 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: co63oc <co63oc@users.noreply.github.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-10 06:21:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f5e5c18de 
					 
					
						
						
							
							[CI/Build] bump timm dependency ( #24189 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-10 06:20:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bd98842c8a 
					 
					
						
						
							
							[CI] Add PPL test for generation models ( #24485 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-09-10 06:16:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6069887c6 
					 
					
						
						
							
							[rocm] enable torchao quantization for rocm ( #24400 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lifan Shen <lifans@meta.com > 
						
						
					 
					
						2025-09-10 06:16:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						492196ed0e 
					 
					
						
						
							
							[CI/Build] split true unit tests to Entrypoints Unit Tests ( #24418 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-09-10 06:16:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4f1a8df22 
					 
					
						
						
							
							[BugFix] Ensure integrity of reused CPU tensors during async scheduling ( #24527 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: guoze.lin <guozelin@tencent.com > 
						
						
					 
					
						2025-09-10 21:15:14 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b9a612fa3 
					 
					
						
						
							
							[BugFix][easy] Fix flaky test test_gpt_oss_multi_turn_chat ( #24549 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: lacora2017 <yehu@meta.com >
Co-authored-by: lacora2017 <yehu@meta.com > 
						
						
					 
					
						2025-09-10 21:14:55 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c04eef706 
					 
					
						
						
							
							[BugFix][Multi Modal] Fix TensorSchema shape mismatch in Molmo ( #24559 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com > 
						
						
					 
					
						2025-09-10 06:14:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f36355abfd 
					 
					
						
						
							
							Move LoadConfig from config/__init__.py to config/load.py ( #24566 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-10 06:14:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e3c3a7df2 
					 
					
						
						
							
							[LoRA]: Add LoRA support to Mistral's Voxtral models ( #24517 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yash Pratap Singh <yashsingh20001@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-10 06:12:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6cbd41909e 
					 
					
						
						
							
							Feature/vit attention unification# 23880 ( #23978 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-10 06:10:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72d30108a0 
					 
					
						
						
							
							Support for NemotronH Nano VLM ( #23644 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniel Afrimi <danielafrimi8@gmail.com > 
						
						
					 
					
						2025-09-10 06:10:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8b83b93739 
					 
					
						
						
							
							[Docs] Document the extra memory footprint overhead when using EPLB ( #24537 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-09-10 06:09:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9dbefd88e9 
					 
					
						
						
							
							[Docs] Improve organisation of API Reference nav ( #24569 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-10 06:08:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c195d43da 
					 
					
						
						
							
							[ROCm][Bugfix] Fix Aiter RMSNorm  ( #23412 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-09-10 21:08:03 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ae43dbf8c 
					 
					
						
						
							
							[Attention] add DCP support for FLASH_ATTN_MLA backend ( #24453 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com >
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com >
Co-authored-by: Matthew Bonanni <mbonanni@redhat.com > 
						
						
					 
					
						2025-09-10 17:19:26 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						267c80d31f 
					 
					
						
						
							
							[Model] Limit CPU threads for image transformations in InternVL to reduce cpu contention. ( #24519 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: li-jinpeng <3332126450@qq.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-09-10 16:45:44 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						77f62613f9 
					 
					
						
						
							
							Consolidate rendering parameters into RenderConfig dataclass ( #24543 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com > 
						
						
					 
					
						2025-09-10 08:44:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						feaf202e93 
					 
					
						
						
							
							[Bugfix] Guard _may_reorder_batch for encoder-only models on CPU ( #24319 ) ( #24348 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Remy <eunhwan.shin@dtonic.io >
Co-authored-by: Li, Jiang <jiang1.li@intel.com > 
						
						
					 
					
						2025-09-10 14:24:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						91130ae376 
					 
					
						
						
							
							[docs] promo pytorch conf and ray summit ( #24562 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-09-09 23:24:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e40827280b 
					 
					
						
						
							
							[Docs] Enable relative links in examples to function when rendered in the docs ( #24041 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-09 21:40:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4377b1ae3b 
					 
					
						
						
							
							[Bugfix] Update Run:AI Model Streamer Loading Integration ( #23845 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Omer Dayan (SW-GPU) <omer@run.ai >
Signed-off-by: Peter Schuurman <psch@google.com >
Co-authored-by: Omer Dayan (SW-GPU) <omer@run.ai >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-09 21:37:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						009d689b0c 
					 
					
						
						
							
							[Core] Simplify and unify mm uuid handling & auto-generated mm hash overrides processing.  ( #24271 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-09-09 21:36:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0efdb5c3ba 
					 
					
						
						
							
							[gpt-oss] Cache permute indices for faster MXFP4 MoE layer loading ( #24154 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wei Wei <wwei6@meta.com > 
						
						
					 
					
						2025-09-10 04:27:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53b42f4102 
					 
					
						
						
							
							[BugFix][Spec Decode] Fix out-of-range index triggered by eagle3; re-enable test for LlamaForCausalLMEagle3 ( #24392 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com > 
						
						
					 
					
						2025-09-09 21:24:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						309d7aa401 
					 
					
						
						
							
							[P/D] MultiConnector supports shutdown ( #24425 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-09-09 21:24:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4a01aaf95 
					 
					
						
						
							
							[KV Connector] More async support for get_num_new_matched_tokens ( #23620 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ApostaC <yihua98@uchicago.edu > 
						
						
					 
					
						2025-09-09 21:23:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83dd28aae4 
					 
					
						
						
							
							[CI] Adjust threshold for flaky ngram spec decoding test ( #24528 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-09 21:07:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f88e84016f 
					 
					
						
						
							
							[BugFix] Fix async core engine client finalizer ( #24540 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-09 21:07:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c2156b3af 
					 
					
						
						
							
							[Hardware][Apple-CPU] Enable native bfloat16 on Apple Silicon (M2 and later) ( #24129 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ignaciosica <mignacio.sica@gmail.com > 
						
						
					 
					
						2025-09-10 03:50:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e7db04310 
					 
					
						
						
							
							[CI] Retry flaky fp8 cutlass mla tests ( #24536 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-09 20:33:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41f160b974 
					 
					
						
						
							
							Add @heheda12345 to CODEOWNERS of KVCacheManager related code ( #24546 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-10 03:30:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc625ea6b8 
					 
					
						
						
							
							[Perf] Convert np array to torch tensor to index into block table for attn chunking ( #24474 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-09-09 20:01:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b23fb78623 
					 
					
						
						
							
							[Bugfix] Fix for 24530. Fix naive all2all shared expert overlap. ( #24538 )  
						
						 
						
						
						
						
					 
					
						2025-09-09 17:53:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						561f38dc3c 
					 
					
						
						
							
							[Bugfix] Improve EPLB config validation error message ( #24524 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-09-10 00:32:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						73e688cb79 
					 
					
						
						
							
							[ROCm][Feature] Enable Pipeline Parallelism with Ray Compiled Graph on ROCm ( #24275 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-09-09 23:27:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fb1a8f932a 
					 
					
						
						
							
							[Benchmark] Add option to skip oversampling in benchmark ( #24457 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com > 
						
						
					 
					
						2025-09-09 22:00:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0dc9cbb527 
					 
					
						
						
							
							[Benchmark] Update bench doc with mtbench, blazedit, spec bench ( #24450 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com > 
						
						
					 
					
						2025-09-09 21:15:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b5fb3005a8 
					 
					
						
						
							
							[Log] Use a relative path in debug-level logs to distinguish files with identical names ( #23846 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-09-09 16:46:35 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						15de5ff9ea 
					 
					
						
						
							
							[Feature] Disallow FlashMLA on Blackwell ( #24521 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-09 14:59:34 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8a93076d3 
					 
					
						
						
							
							[CI] execute all piecewise compilation tests together ( #24502 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-09-09 11:05:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3f9773b2c 
					 
					
						
						
							
							[TPU] Fix tpu structured decoding in mixed batches ( #24458 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-09-09 11:04:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3707cb2505 
					 
					
						
						
							
							[Docs] Gemma3n transcriptions endpoint support ( #24512 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-09 11:03:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						920ed46b09 
					 
					
						
						
							
							[Misc] bump outlines_core to fix the version conflicts with outlines >= 1.2.0 ( #24368 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kazuhiro Serizawa <nserihiro@gmail.com >
Signed-off-by: Simon Mo <simon.mo@hey.com >
Co-authored-by: Aaron Pham <contact@aarnphm.xyz >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-09-09 10:59:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						15cb047e25 
					 
					
						
						
							
							Extend renderer with embedding support and integrate completion endpoint ( #24405 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com > 
						
						
					 
					
						2025-09-10 01:46:46 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ad0688e43 
					 
					
						
						
							
							[Bugfix] Fix  hidden_size for multimodal classification model ( #24501 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-09 10:37:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9a1c4c8a2 
					 
					
						
						
							
							[ROCm][CI/Build] Sync ROCm dockerfiles with the ROCm fork ( #24279 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-09-09 12:21:56 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1aa427fdc1 
					 
					
						
						
							
							[Kernels] Add Flash Linear Attention Kernels ( #24518 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-09-10 00:04:41 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1c63a16b65 
					 
					
						
						
							
							[Core] Run garbage collector after CUDA graph capture to fix throughput regression ( #24128 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com >
Co-authored-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-09-09 10:38:10 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						922d3b401b 
					 
					
						
						
							
							[Bugfix] Handle the edge case in detokenizer where processed tokens contain both stop str and eos token ( #23938 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dtransposed <damian.bogunowicz@gmail.com > 
						
						
					 
					
						2025-09-09 07:30:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19332c0479 
					 
					
						
						
							
							[Model] Systematic support for fp32 head, pooling models part ( #23810 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-09-09 07:29:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a55cf41a09 
					 
					
						
						
							
							[Compilation][WideEP] Enable Piecewise CUDAGraph for DeepEPHT ( #24123 )  
						
						 
						
						
						
						
					 
					
						2025-09-09 10:21:10 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6fb2788163 
					 
					
						
						
							
							[CI/Build][Doc] Fully deprecate old bench scripts for serving / throughput / latency ( #24411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-09-09 10:02:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d2a2de8f7 
					 
					
						
						
							
							[RL] fast weight update with zmq + ipc handles ( #24295 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: huangweixiao <huangweixiao@msh.team >
Signed-off-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-09-09 16:57:46 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1116590b16 
					 
					
						
						
							
							[gpt-oss] Validate gpt-oss python tool during initialization ( #23856 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-09 08:37:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ccb97338af 
					 
					
						
						
							
							[Misc] Add Codex settings to gitignore ( #24493 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-09-09 01:25:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						45c9cb5835 
					 
					
						
						
							
							[Misc] Add claude settings to gitignore ( #24492 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-09-09 01:14:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e283976f3a 
					 
					
						
						
							
							[Performance][MM] Building the inverse permutation in O(n) time in Qwen2_5_VisionTransformer ( #24443 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Junhong <liujunhong11@huawei.com >
Co-authored-by: Junhong <liujunhong11@huawei.com > 
						
						
					 
					
						2025-09-09 00:24:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						46876dff32 
					 
					
						
						
							
							[Doc]: fixing typos to improve docs ( #24480 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-08 23:06:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1823a00d67 
					 
					
						
						
							
							[Misc] Support bench serve long context ( #24373 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-09-08 22:53:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed16d0f26f 
					 
					
						
						
							
							[Doc] mention fpdb for multiprocess breakpoints ( #24452 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mickael Seznec <mickael@mistral.ai > 
						
						
					 
					
						2025-09-08 21:46:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0cdd213641 
					 
					
						
						
							
							[Misc] Improve Worker process title and logging prefix ( #22205 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 21:43:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						948dd3443b 
					 
					
						
						
							
							[Bugfix] Fix Apertus HF repo name ( #24447 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-08 21:40:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2f7745774 
					 
					
						
						
							
							Add data_parallel_size to VllmConfig string representation ( #24298 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Cong Chen <congc@meta.com > 
						
						
					 
					
						2025-09-08 21:35:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82dfb12e52 
					 
					
						
						
							
							[Core] Use sha256 bytes instead of BlockHash to reduce GC overhead ( #23673 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: linzebing <linzebing1995@gmail.com > 
						
						
					 
					
						2025-09-08 21:34:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bba1042c6f 
					 
					
						
						
							
							[Flashinfer] Support Flashinfer TRTLLM FP8-qkv BF16/FP16-out Attention Kernel ( #23647 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 20:53:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6fbc15634 
					 
					
						
						
							
							[BugFix][Model] Fix Ernie4.5-VL hanging on long inputs ( #24074 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangyafeng <wangyafeng@baidu.com > 
						
						
					 
					
						2025-09-09 11:37:16 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e0d4a3475 
					 
					
						
						
							
							Move KVTransferConfig from config/__init__.py to config/kv_transfer.py ( #24434 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 20:30:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						562663a044 
					 
					
						
						
							
							Bump actions/github-script from 7.0.1 to 8.0.0 ( #24413 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dependabot[bot] <support@github.com >
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-09 03:12:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed1623a88a 
					 
					
						
						
							
							Bump actions/stale from 9.1.0 to 10.0.0 ( #24412 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dependabot[bot] <support@github.com >
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-09 03:11:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						13b89bd823 
					 
					
						
						
							
							[doc] update vllm serve cli args documentation ( #24329 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com > 
						
						
					 
					
						2025-09-09 03:07:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22a0070530 
					 
					
						
						
							
							Bump actions/setup-python from 5.4.0 to 6.0.0 ( #24414 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dependabot[bot] <support@github.com >
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-09 02:54:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						170129eb28 
					 
					
						
						
							
							[gpt-oss] Harmony changes with container tool support ( #23386 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zhiweiz <zhiweiz@fb.com >
Signed-off-by: Aaron Pham <contact@aarnphm.xyz >
Signed-off-by: Lu Fang <30275821+houseroad@users.noreply.github.com >
Co-authored-by: zhiweiz <zhiweiz@fb.com >
Co-authored-by: Aaron Pham <contact@aarnphm.xyz >
Co-authored-by: Simon Mo <simon.mo@hey.com >
Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 19:03:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						955c624915 
					 
					
						
						
							
							[Bugfix][Wide EP] Fix redundant work when using DeepEP, TP Attn, and EP MoE ( #24134 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com > 
						
						
					 
					
						2025-09-08 19:01:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f87abdcc6 
					 
					
						
						
							
							Update reviewers for modelopt related files ( #24468 )  
						
						 
						
						
						
						
					 
					
						2025-09-09 01:53:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6910b56da2 
					 
					
						
						
							
							[CI] Add nightly multiarch manifests to dockerhub ( #24102 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sahithi Chigurupati <chigurupati.sahithi@gmail.com >
Signed-off-by: Simon Mo <simon.mo@hey.com >
Signed-off-by: simon-mo <simon.mo@hey.com >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-09-09 01:18:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e10fef0883 
					 
					
						
						
							
							[Hardware][IBM Z] Fix Outlines Core issue for s390x ( #24034 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rehan Khan <Rehan.Khan7@ibm.com > 
						
						
					 
					
						2025-09-08 16:50:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e680723eba 
					 
					
						
						
							
							[Bugfix] Disable the statslogger if the api_server_count is greater than 1 ( #22227 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-08 15:28:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						620db1fc58 
					 
					
						
						
							
							[Attention] FlashAttention MLA cudagraph support ( #23958 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 22:05:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41183c1fe0 
					 
					
						
						
							
							[Spec Decode] Fix offline spec_decode.py ( #24257 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-09-08 20:44:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						43d9ad03ba 
					 
					
						
						
							
							[Model loader]: support multi-thread model weight loading ( #23928 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Kaiyong <yangkaiyong.yky@antgroup.com >
Signed-off-by: Simon Mo <simon.mo@hey.com >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-09-08 18:49:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7be141b2c5 
					 
					
						
						
							
							[CI] Enable encoder model compilation test ( #24442 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-09-08 11:48:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d7f39b48c 
					 
					
						
						
							
							[Model] Remove quantized mixtral ( #24437 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-08 11:02:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd08636926 
					 
					
						
						
							
							[Spec Decode][Benchmark] Add Blitzedit dataset ( #23605 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-09-08 10:32:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3feeeb9fea 
					 
					
						
						
							
							[Spec Decode][Benchmark] Add Spec Bench Dataset for benchmarking ( #23563 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 10:32:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f4a82f8b5 
					 
					
						
						
							
							[Model] Enable BNB support for qwen2_5_omni_thinker ( #24420 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-08 09:37:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c44797a4d6 
					 
					
						
						
							
							[Docs]add eplb_config param use docs ( #24213 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-09-08 09:36:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55be93baf5 
					 
					
						
						
							
							[Doc]: fix 2 hyperlinks leading to Ray site after they changed Ray's doc structure ( #24438 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 09:36:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						717fc00e98 
					 
					
						
						
							
							[Docs] Move feature compatibility tables to README ( #24431 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 06:45:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01dfb5e982 
					 
					
						
						
							
							[Frontend] User-provided uuids for medias in chat. (RFC  #22044 ) ( #23449 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io >
Signed-off-by: Chenheli Hua <huachenheli@outlook.com >
Signed-off-by: Roger Wang <hey@rogerw.me >
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Roger Wang <hey@rogerw.io >
Co-authored-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-09-08 06:42:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						03dd652c16 
					 
					
						
						
							
							Move KVEventsConfig from config/__init__.py to config/kv_events.py ( #24433 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 06:41:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9cd76b71ab 
					 
					
						
						
							
							[Misc] Terratorch related fixes ( #24337 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Christian Pinto <christian.pinto@ibm.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-08 06:40:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e041314184 
					 
					
						
						
							
							[Bugfix] Fix mamba2 prefill chunking ( #23279 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com >
Signed-off-by: tomeras91 <57313761+tomeras91@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-08 11:42:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e537f45b4 
					 
					
						
						
							
							[Bugfix] Fix get_quant_config when using modelscope ( #24421 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangli <wangli858794774@gmail.com > 
						
						
					 
					
						2025-09-08 11:03:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c2a8b08fcd 
					 
					
						
						
							
							[Doc] Fix issues in integrations/llamastack.md ( #24428 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-09-08 02:28:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4962a6d55 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24417 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-08 00:22:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f0b833a05 
					 
					
						
						
							
							[Docs] Fix a tip indentation and typo ( #24419 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-09-08 00:19:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						425b04b8f4 
					 
					
						
						
							
							[gpt-oss][Responses API] Fix the function call id format ( #24409 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-09-08 06:49:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						60f0843ef8 
					 
					
						
						
							
							[Model] Remove unnecessary CUDA sync of Qwen2VL image and video preprocess ( #24334 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Win <chatcharinsang@gmail.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-09-07 23:11:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a46602606 
					 
					
						
						
							
							[Model] Remove unnecessary CUDA sync of GLM-4.1V image and video preprocess ( #24332 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Win <chatcharinsang@gmail.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-09-07 23:10:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61aa4b2901 
					 
					
						
						
							
							[P/D] Add a shutdown method to the Connector API ( #22699 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-09-07 23:07:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c892b1831 
					 
					
						
						
							
							[Doc] Fix UTF-8 encoding issues in documentation generation on Windows ( #24361 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: alekramelaheehridoy <aliqramalaheehridoy@gmail.com >
Signed-off-by: alekramelaheehridoy <alekramelaheehridoy@gmail.com >
Co-authored-by: alekramelaheehridoy <alekramelaheehridoy@gmail.com > 
						
						
					 
					
						2025-09-07 22:33:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3bca396f79 
					 
					
						
						
							
							[CI/Build] Fix local image inputs in test_pixtral.py ( #24401 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-09-08 03:31:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a3e91bdfe 
					 
					
						
						
							
							[CI/Build] Disable flaky test_structured_output tests ( #24404 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-09-08 02:51:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b3d7e3c845 
					 
					
						
						
							
							[Sampler] Support returning all prompt logprobs ( #23868 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com >
Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-07 19:34:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67841317d1 
					 
					
						
						
							
							[xpu] upgrade ipex/python3.12 for xpu ( #23830 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yan Ma <yan.ma@intel.com > 
						
						
					 
					
						2025-09-08 02:07:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86173ad593 
					 
					
						
						
							
							[Kernel] Support decode context parallelism on Blackwell with CUTLASS MLA ( #24385 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com >
Signed-off-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-09-08 09:27:12 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						795b6951cd 
					 
					
						
						
							
							Add @luccafong to codeowner for spec decode ( #24397 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com > 
						
						
					 
					
						2025-09-08 08:30:27 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e5d21378d 
					 
					
						
						
							
							Skip MM Encoder for non-first PP ranks ( #24387 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-07 09:38:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0661cb9df3 
					 
					
						
						
							
							Add renderer-based prompt processing for embedding and classification endpoints ( #24356 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com > 
						
						
					 
					
						2025-09-07 08:26:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						105d3d62ef 
					 
					
						
						
							
							[TPU] Remove TopKTopPSampler dependency for TPU sampler ( #24391 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-07 01:12:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						62f66be1f7 
					 
					
						
						
							
							[Bugfix] Fix Qwen3-coder moe tuned config ( #24072 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-07 05:19:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						81c53ef55c 
					 
					
						
						
							
							[Misc] collect flashinfer version in collect_env.py ( #24378 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-09-07 03:30:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						75334956c2 
					 
					
						
						
							
							QWEN3 Thinking Fused MoE kernels Optimization configs ( #24330 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Saman Keon <samanamp@outlook.com > 
						
						
					 
					
						2025-09-07 03:18:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						77aec83b8c 
					 
					
						
						
							
							[Benchmark] add benchmark for custom activation op ( #23908 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com >
Signed-off-by: Jiangyun Zhu <riverclouds.zhu@qq.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-09-06 20:12:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e67597545b 
					 
					
						
						
							
							[CI][Fix] deterministic seed for flaky CI runs on structured outputs ( #24380 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-09-07 11:10:40 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37a6fa95fd 
					 
					
						
						
							
							Migrate Qwen2 inputs to TensorSchema ( #23475 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-06 20:07:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						558f0907dc 
					 
					
						
						
							
							[attention][DCP] use AttentionImpl.need_to_return_lse_for_decode ( #24372 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-09-07 01:18:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4172235ab7 
					 
					
						
						
							
							[V0 deprecation] Deprecate V0 Neuron backend ( #21159 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-06 16:15:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						848562bd49 
					 
					
						
						
							
							break execute_model in gpu_model_runner into sub-functions for custom scopes ( #24265 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Bangsheng Tang <bangsheng@meta.com > 
						
						
					 
					
						2025-09-06 14:02:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e68dc2f014 
					 
					
						
						
							
							[Bugfix] Fix unstable silu_mul+nvfp4 quant fusion test ( #24370 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-09-06 20:39:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3645ed94d 
					 
					
						
						
							
							[Frontend][Responses API] Support reporting tool output tokens and fix reasoning token count ( #24285 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-09-06 13:27:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fb691ee4e7 
					 
					
						
						
							
							[Fix] [gpt-oss] fix non-tool calling path for chat completion ( #24324 )  
						
						 
						
						
						
						
					 
					
						2025-09-06 19:10:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6024d115cd 
					 
					
						
						
							
							Lora bias(enable_lora_bias) deprecate warning ( #24339 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-07 00:42:19 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7555d6b34a 
					 
					
						
						
							
							[Bugfix] Fix test_mixtral_moe ( #24371 )  
						
						 
						
						
						
						
					 
					
						2025-09-06 09:32:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						00a4e56d8d 
					 
					
						
						
							
							[Bugfix] Fix broken deepseek fp8 TP weights loading ( #24367 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-06 09:23:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0eadaeff7e 
					 
					
						
						
							
							[Bugfix] Avoid uninitialized usage of azp_val when AZP is false. ( #24335 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mohan Kumar Kumar <mohan.cbein@gmail.com >
Signed-off-by: mohankku <mohan.cbein@gmail.com > 
						
						
					 
					
						2025-09-06 08:17:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0077c8634e 
					 
					
						
						
							
							Add @benchislett to codeowner for spec decode and structured outputs ( #24362 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai > 
						
						
					 
					
						2025-09-06 22:03:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b121ca22ad 
					 
					
						
						
							
							[CI] Disable flaky structured output test from CI ( #24366 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-09-06 13:31:56 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eddaafc1c7 
					 
					
						
						
							
							[Multimodal] Improve max video embedding length estimation in V1 ( #24312 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-09-06 02:33:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						305a1cc0d2 
					 
					
						
						
							
							refactor: Turn GPUModelRunner.inputs_embeds to a CpuGpuBuffer ( #24345 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Sansom <andrew@protopia.ai > 
						
						
					 
					
						2025-09-05 23:01:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d6c6b05d3 
					 
					
						
						
							
							[New Model]: google/embeddinggemma-300m ( #24318 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-09-05 22:58:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53b19ccdd5 
					 
					
						
						
							
							[Core] Allow disabling TP sharding for parallel Linear layer ( #23024 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Signed-off-by: Isotr0py <2037008807@qq.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-05 22:53:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6432739ef1 
					 
					
						
						
							
							[Bugfix] Catch and log invalid token ids in detokenizer ( #24351 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-05 22:30:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ac201a0eaf 
					 
					
						
						
							
							[Feature] Support Decode Context Parallel (DCP) for MLA ( #23734 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: hongchao <hongchao@msh.team >
Signed-off-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: hongchao <hongchao@msh.team >
Co-authored-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-09-06 13:24:05 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c529fc994 
					 
					
						
						
							
							[KV Sharing] Raise error if using eagle with fast prefill ( #24350 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-09-05 20:22:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35bf193864 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24294 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-05 19:41:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35efa70297 
					 
					
						
						
							
							Add @22quinn as code reviewer for RL related components ( #24346 )  
						
						 
						
						
						
						
					 
					
						2025-09-06 01:56:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cee182b297 
					 
					
						
						
							
							[Perf][V1] Fully overlap model execution ( #23569 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai > 
						
						
					 
					
						2025-09-05 18:20:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c954c6629c 
					 
					
						
						
							
							[CI] Add timeouts to tests ( #24260 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rafael Vasquez <rafvasq21@gmail.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-05 17:26:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9dfbeb41e5 
					 
					
						
						
							
							[RFC] allow cancelation after shutdown in blocking collective_rpc ( #23390 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shiyan Deng <dsy842974287@meta.com > 
						
						
					 
					
						2025-09-05 14:14:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eedb2a2a10 
					 
					
						
						
							
							[Bugfix] Fix silu_mul+quant fusion test ( #24341 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-09-05 20:13:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23a6c5280e 
					 
					
						
						
							
							[gpt-oss][Bugfix]Fix streamableparser for missing handling of certain token_ids ( #24306 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-09-05 10:26:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7812bcf278 
					 
					
						
						
							
							[docs] add shenzhen meetup ( #24326 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-09-05 22:48:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						006e7a34ae 
					 
					
						
						
							
							Adding int4 and int8 models for CPU benchmarking ( #23709 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tsai, Louie <louie.tsai@intel.com > 
						
						
					 
					
						2025-09-05 20:08:50 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e599e2c65e 
					 
					
						
						
							
							[XPU][P/D] Add XPU support in NixlConnector ( #22436 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zhenwei <zhenwei.liu@intel.com >
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-09-04 21:03:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c29fb540ff 
					 
					
						
						
							
							[gpt-oss] tool parser supports for /chat/completions [1/n] ( #22386 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-09-04 20:39:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65e038931d 
					 
					
						
						
							
							[Frontend] Skip unnecessary detokenization when token_id is requested ( #24236 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-04 23:04:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						886ccbe5ba 
					 
					
						
						
							
							[CI/Build] Reduce the number of redundant cases to test for LoRA ( #24276 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zhuohan Li <zhuohan123@gmail.com > 
						
						
					 
					
						2025-09-04 21:58:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						adc3ddb430 
					 
					
						
						
							
							[Bugfix][Misc] Fix silu_and_mul_nvfp4_quant issue and extract common utils for nvfp4 kernel source files ( #23727 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com >
Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-09-04 14:25:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						60b755cbcb 
					 
					
						
						
							
							[Misc] Have AsyncLLM custom_stat_loggers extend default logger list ( #20952 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com >
Signed-off-by: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-04 14:25:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						482e52f56c 
					 
					
						
						
							
							QWEN3 Coder Fused MoE kernels Optimization configs ( #24266 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Saman Keon <samanamp@outlook.com > 
						
						
					 
					
						2025-09-04 20:33:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78336a0c3e 
					 
					
						
						
							
							Upgrade FlashInfer to v0.3.0 ( #24086 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Po-Han Huang <pohanh@nvidia.com >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-09-04 09:49:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94866d7c93 
					 
					
						
						
							
							[Misc] Slight improve deepgemm print ( #24085 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-04 16:06:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83609ca91d 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24173 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-04 08:52:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e41a0fa377 
					 
					
						
						
							
							[Perf] Freeze core engine proc heap after init ( #24008 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-09-04 22:55:23 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37241077d5 
					 
					
						
						
							
							[Misc] Removed force_fp8_e4m3fnuz from FP8LinearOp ( #23725 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Julien Lin <jullin@nvidia.com >
Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-09-04 09:25:40 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9f7081f9c 
					 
					
						
						
							
							[LoRA]: Add lora support to qwen-2.5-omni ( #24231 )  
						
						 
						
						
						
						
					 
					
						2025-09-04 05:50:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						16ded21eeb 
					 
					
						
						
							
							[XPU] support Triton Attention backend on Intel GPU ( #24149 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-09-04 20:41:08 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b30afa442 
					 
					
						
						
							
							Use hidden_size_per_head as head_size fallback ( #24221 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com > 
						
						
					 
					
						2025-09-04 12:59:16 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eafa8dcde6 
					 
					
						
						
							
							[Model] Add pp support for hunyuan ( #24212 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-09-04 03:58:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6c7af8110a 
					 
					
						
						
							
							[Doc] Update vLLM Singapore Meetup info ( #24234 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-09-04 02:58:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f423e5f43 
					 
					
						
						
							
							[Feature][Response API] Add streaming support for non-harmony ( #23741 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-09-04 17:49:06 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						369a079568 
					 
					
						
						
							
							[Hardware][Apple-CPU] Disable OneDNN build for Apple Silicon ( #24200 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ignaciosica <mignacio.sica@gmail.com >
Co-authored-by: Li, Jiang <jiang1.li@intel.com > 
						
						
					 
					
						2025-09-04 02:48:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						402759d472 
					 
					
						
						
							
							[Attention] FlashAttn MLA ( #14258 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com >
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com >
Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com >
Co-authored-by: Matthew Bonanni <mbonanni001@gmail.com >
Co-authored-by: Matthew Bonanni <mbonanni@redhat.com > 
						
						
					 
					
						2025-09-04 02:47:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c301ee2eb 
					 
					
						
						
							
							[Bugfix] Fix Incremental Detokenization with tokenizers == 0.22.0 ( #24159 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Fanli Lin <fanli.lin@intel.com >
Signed-off-by: Fanli Lin <fanli0116@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-04 02:47:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3efb9f4d95 
					 
					
						
						
							
							[Attention][Platform] Refactor MLA to support Custom Op ( #23332 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: whx-sjtu <2952154980@qq.com > 
						
						
					 
					
						2025-09-04 02:46:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04f3c35cff 
					 
					
						
						
							
							Improve flexibility of auto_tune.sh execution. ( #23766 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Anthony Su <50185138+anthonsu@users.noreply.github.com >
Signed-off-by: anthonsu <50185138+anthonsu@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-04 09:41:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51d5e9be7d 
					 
					
						
						
							
							[Core][Model] Terratorch backend integration ( #23513 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com >
Signed-off-by: Christian Pinto <christian.pinto@ibm.com >
Co-authored-by: Christian Pinto <christian.pinto@ibm.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-04 00:22:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e7fc70016f 
					 
					
						
						
							
							[Model] Add MiDashengLM model support ( #23652 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chenbing8 <chenbing8@xiaomi.com >
Signed-off-by: bingchen-mi <chenbing8@xiaomi.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-04 00:08:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						12e1e63cc5 
					 
					
						
						
							
							[Misc] Enhance output readability of helper script ( #24214 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Weida Hong <wdhongtw@google.com > 
						
						
					 
					
						2025-09-04 06:38:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						57b1ce94f7 
					 
					
						
						
							
							[CPU] Refactor CPU unquantized linear ( #24150 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-09-04 14:28:45 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb55ad86fe 
					 
					
						
						
							
							Migrate ultravox inputs to TensorSchema ( #23503 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-09-04 06:09:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						712b273f65 
					 
					
						
						
							
							[Refactor] Introduce basic Renderer for completion-style request ( #24010 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com > 
						
						
					 
					
						2025-09-04 05:21:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e919d6f549 
					 
					
						
						
							
							[Kernel][Bugfix] Fix grouped topk cu ( #24146 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mayuyuace <qiming1.zhang@intel.com > 
						
						
					 
					
						2025-09-04 12:37:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a38f8bd54c 
					 
					
						
						
							
							[Feature][Responses API]Support MCP tools with streaming mode + background mode ( #23927 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wuhang <wuhang6@huawei.com > 
						
						
					 
					
						2025-09-04 04:05:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b5ee1e3261 
					 
					
						
						
							
							Remove deprecated PyNcclConnector ( #24151 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Pan <Peter.Pan@daocloud.io > 
						
						
					 
					
						2025-09-03 22:49:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						36c260dad6 
					 
					
						
						
							
							[Feature][gpt-oss] Add support for num_cached_tokens and num_reasoning_tokens tracking ( #23460 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: George Nagy II <george.nagy0969@gmail.com >
Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-09-03 21:08:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a43a3f1770 
					 
					
						
						
							
							[Bugfix][DP] DP distribution does not require ray[default] ( #23822 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-09-03 13:21:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6adaed42f4 
					 
					
						
						
							
							[Feature][P/D]: Optimize NIXL Connector xfer Launch ( #23887 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ycyaw66 <497410282@qq.com >
Co-authored-by: ycyaw66 <497410282@qq.com > 
						
						
					 
					
						2025-09-03 19:14:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a742322092 
					 
					
						
						
							
							[Attention] Blackwell FP8 MLA support with CUTLASS_MLA backend ( #23289 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni@redhat.com > 
						
						
					 
					
						2025-09-03 14:05:24 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						731a6940e3 
					 
					
						
						
							
							Migrate whisper inputs to TensorSchema ( #23505 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-09-03 18:04:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e9b92dcd89 
					 
					
						
						
							
							[Kernels] Overlap shared experts with send/recv ( #23273 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-09-03 12:35:18 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fa4311d85f 
					 
					
						
						
							
							[V1] v1 engine + full CUDA graph support for PLaMo2 ( #23998 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hemmi Shinichi <shemmi@preferred.jp >
Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com >
Co-authored-by: Hemmi Shinichi <shemmi@preferred.jp >
Co-authored-by: Thomas Parnell <tom.parnell@gmail.com > 
						
						
					 
					
						2025-09-03 08:24:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d80ae83e1 
					 
					
						
						
							
							[Bugfix] Fixing division by zero in triton_attn if query_heads/kv_heads > 16  ( #23424 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Burkhard Ringlein <ngl@zurich.ibm.com > 
						
						
					 
					
						2025-09-03 15:01:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ba0c587ba 
					 
					
						
						
							
							FIX: Add libnuma-dev to Dockerfile for dev stage ( #20388 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dongbo910220 <1275604947@qq.com > 
						
						
					 
					
						2025-09-03 07:17:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6997a25ac6 
					 
					
						
						
							
							[Model] Remove useless code from MiniMax implementation ( #23982 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: QscQ <qscqesze@gmail.com >
Signed-off-by: qingjun <qingjun@minimaxi.com > 
						
						
					 
					
						2025-09-03 11:27:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						28f350e147 
					 
					
						
						
							
							Support add_generation_prompt in embeddings endpoint with chat request ( #23931 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: biba10 <jaksmid@seznam.cz > 
						
						
					 
					
						2025-09-03 10:47:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51383bd472 
					 
					
						
						
							
							[CI] Accelerate mteb test by setting SentenceTransformers mteb score to a constant ( #24088 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-09-03 17:23:56 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9c99e4871f 
					 
					
						
						
							
							[Misc] Clean up deadcode for legacy processing pipeline ( #24153 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-03 08:34:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						70549c1245 
					 
					
						
						
							
							[CI/Build] Serve images used by multimodal tests through local HTTP Server ( #23907 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Divyansh Singhvi <divyanshsinghvi@gmail.com >
Signed-off-by: dsinghvi <divyanshsinghvi@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-09-03 16:13:11 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f0c503f66e 
					 
					
						
						
							
							[Nixl] Heterogeneous TP support FlashInfer ( #20189 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-03 15:19:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f38035c123 
					 
					
						
						
							
							[distributed][rl] remove nccl cumem env var override ( #24141 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-03 06:45:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						426cc8629f 
					 
					
						
						
							
							[BugFix] Fix routed_scaling_factor double mul for dots1 and glm4 MoE models ( #24132 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-09-03 04:57:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e81d4e69c1 
					 
					
						
						
							
							[Misc] Add check for dual_chunk_attention ( #24070 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-09-03 04:19:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						02d411fdb2 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24115 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-02 21:14:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d7e1e59972 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24093 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-02 21:05:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c4ed78b14f 
					 
					
						
						
							
							[Compile] Fix Compile Warning for w4a8_mm_entry.cu ( #23660 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-09-02 20:45:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1bd007f234 
					 
					
						
						
							
							fix some typos ( #24071 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: co63oc <co63oc@users.noreply.github.com > 
						
						
					 
					
						2025-09-02 20:44:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						136d853e65 
					 
					
						
						
							
							[V1] Wrapper which plumbs request-level logits processors into vLLM batch-level logits processing ( #23656 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Feldman <afeldman@redhat.com > 
						
						
					 
					
						2025-09-03 02:52:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e32a0e8678 
					 
					
						
						
							
							Upgrade xgrammar to 0.1.23 ( #22988 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-09-03 02:32:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						42dc59dbac 
					 
					
						
						
							
							Update release pipeline post PyTorch 2.8.0 update ( #24073 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com >
Signed-off-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-09-03 10:09:19 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						862f2ef893 
					 
					
						
						
							
							[XPU] Fix the bug of LoRA logits on the XPU platform ( #24081 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chzhang <chaojun.zhang@intel.com > 
						
						
					 
					
						2025-09-03 08:21:18 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2fd1a40a54 
					 
					
						
						
							
							[CI/Build] Disable SiluMul NVFP4 quant fusion tests ( #24121 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni@redhat.com > 
						
						
					 
					
						2025-09-02 16:50:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						930a24144c 
					 
					
						
						
							
							[Bug] R1 Accuracy: Fix routed_scaling_factor Double Mul Issue ( #24119 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-02 22:22:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						457e471971 
					 
					
						
						
							
							[AMD][Kernel][Bugfix] Cast offsets tensor bn to tl.int64 to avoid GPU segfault ( #23692 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-09-02 22:13:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d328f7894f 
					 
					
						
						
							
							[CI] Enable all hf transformers baselines in test_hybrid ( #23936 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-09-02 20:15:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98aee612aa 
					 
					
						
						
							
							[Log] Only Print Profiler Results on Rank 0 ( #23370 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-09-02 18:53:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						598bd74cf8 
					 
					
						
						
							
							Fix weights loading for Apertus ( #24100 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nathan Ranchin <nranchin@student.ethz.ch > 
						
						
					 
					
						2025-09-02 18:34:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2417798471 
					 
					
						
						
							
							[Metrics] Deprecate TPOT in favor of ITL ( #24110 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-09-02 18:10:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9480ae24e3 
					 
					
						
						
							
							[Bugfix] Fix packed_factor missing attribute error ( #23902 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyuyeun Kim <kyuyeunk@google.com > 
						
						
					 
					
						2025-09-02 10:56:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f399182e8c 
					 
					
						
						
							
							Run ruff format on a few files. ( #24075 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-09-02 17:55:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1c41310584 
					 
					
						
						
							
							[Bugfix] Fix transform_config parsing in Compressed Tensors ( #23945 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyle Sayers <kylesayrs@gmail.com > 
						
						
					 
					
						2025-09-02 13:54:10 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c83c4ff815 
					 
					
						
						
							
							[Benchmark] Add support for local hf dataset path in benchmark ( #23999 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-09-02 17:49:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e1759cd54 
					 
					
						
						
							
							[docs] add SYS_NICE cap & security-opt for docker/k8s ( #24017 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Pan <Peter.Pan@daocloud.io >
Signed-off-by: Peter Pan <peter.pan@daocloud.io >
Co-authored-by: Li, Jiang <bigpyj64@gmail.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-02 17:27:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e66ed3e675 
					 
					
						
						
							
							[CI Failure] Skip failing nvfp4 silu test ( #23959 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-02 13:18:15 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0653f6c0b 
					 
					
						
						
							
							[Model] Classification models support logit_bias / sigmoid_normalize ( #24031 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-09-02 16:48:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						38ba061f6f 
					 
					
						
						
							
							[BugFix] Fix EXAONE4 rotary embeddings ( #23918 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: lkm2835 <lkm2835@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-02 14:40:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0a74e9d0f2 
					 
					
						
						
							
							[Gemma3n] Fix audio batching ( #24052 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-09-02 22:23:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8bd5844989 
					 
					
						
						
							
							correct LWS deployment yaml ( #23104 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cberge908 <42270330+cberge908@users.noreply.github.com > 
						
						
					 
					
						2025-09-02 12:04:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce30dca5c4 
					 
					
						
						
							
							[CI]: reduce HTTP calls inside entrypoints openai tests ( #23646 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: AzizCode92 <azizbenothman76@gmail.com >
Signed-off-by: Aziz <azizbenothman76@gmail.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-02 10:49:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f0bab3f26 
					 
					
						
						
							
							[Model] Support dp on ViT on GLM-4.5V ( #23168 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Chen <530634352@qq.com > 
						
						
					 
					
						2025-09-02 10:48:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fad73be1a5 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24077 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-02 02:38:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56d04089ef 
					 
					
						
						
							
							Migrate Interns1 inputs to TensorSchema ( #23510 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-09-02 04:35:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7be0cb8e9e 
					 
					
						
						
							
							[XPU][Feature] fp8 online quantization support for XPU ( #23148 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yan Ma <yan.ma@intel.com >
Co-authored-by: Qiming Zhang <qiming1.zhang@intel.com > 
						
						
					 
					
						2025-09-02 04:06:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1fa1d6a9a0 
					 
					
						
						
							
							Migrate OvisImagePatchInputs to TensorSchema ( #22024 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-09-02 12:01:36 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d59c986444 
					 
					
						
						
							
							Remove runtime checks based on pooling params ( #24051 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-09-02 11:54:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04d0c60770 
					 
					
						
						
							
							[Bugfix] Fix the issue that Blip2ForConditionalGeneration' object has… ( #24028 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dazhi Jiang <dazhi_jiang@163.com > 
						
						
					 
					
						2025-09-02 11:54:20 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b41cbbf03 
					 
					
						
						
							
							[V1][Mamba1] - FP32 SSM Kernel Support ( #23506 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com > 
						
						
					 
					
						2025-09-01 20:53:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0235103cbb 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24042 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-09-01 19:07:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a344a5aa0a 
					 
					
						
						
							
							[bugfix]fix MTP hidden states ( #24056 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com > 
						
						
					 
					
						2025-09-01 21:09:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5685370271 
					 
					
						
						
							
							[Chore][V0 Deprecation] Move LogProb to a separate file ( #24055 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-01 12:07:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0e0efd6bd 
					 
					
						
						
							
							[Model] Support DP for ViT on Kimi-VL-A3B-Thinking-2506 ( #23817 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Junhong <liujunhong11@huawei.com >
Signed-off-by: LJH-LBJ <98734602+LJH-LBJ@users.noreply.github.com >
Co-authored-by: Junhong <liujunhong11@huawei.com >
Co-authored-by: LJH-LBJ <98734602+LJH-LBJ@users.noreply.github.com >
Co-authored-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-09-01 16:56:56 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cf91a89dd2 
					 
					
						
						
							
							[docs][misc] IOProcessor plugins fixes ( #24046 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Christian Pinto <christian.pinto@ibm.com > 
						
						
					 
					
						2025-09-01 09:17:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						39a22dcaac 
					 
					
						
						
							
							[Misc] Minor code simplification for spec decode ( #24053 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-01 08:54:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41c80698b3 
					 
					
						
						
							
							Document multi-proc method selection for profiling ( #23802 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jdebache <jdebache@nvidia.com > 
						
						
					 
					
						2025-09-01 06:28:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c8271cd1e 
					 
					
						
						
							
							[Model]: support KeyeVL-1_5-8B ( #23838 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangruitao <wangruitao@kuaishou.com >
Co-authored-by: wangruitao <wangruitao@kuaishou.com > 
						
						
					 
					
						2025-09-01 03:50:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e330fcb21 
					 
					
						
						
							
							[Doc]: Fix CPU install docs: force torch-backend=cpu to avoid GPU torchvision errors ( #24033 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kay Yan <kay.yan@daocloud.io > 
						
						
					 
					
						2025-09-01 03:34:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d46934b229 
					 
					
						
						
							
							[Frontend] Gemma3n audio transcriptions/translations endpoint ( #23735 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-09-01 18:07:46 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						107284959a 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24026 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-09-01 09:38:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc1a53186d 
					 
					
						
						
							
							[Kernel] Update DeepGEMM to latest commit ( #23915 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-09-01 02:38:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55602bb2e6 
					 
					
						
						
							
							[Frontend] Update the warning log when using VLLM_ALLOW_LONG_MAX_MODEL_LEN ( #20904 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-09-01 08:50:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d7fbc6ddac 
					 
					
						
						
							
							[Misc] Enable V1 FP16 inference on pre-Ampere GPUs ( #24022 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-01 08:12:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5438967fbc 
					 
					
						
						
							
							[Misc] add hash_function doc string ( #24014 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-31 23:11:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						422e793fa6 
					 
					
						
						
							
							[Bugfix] Add support for <tool_call> format in streaming mode for XLAM Tool Parser ( #22769 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Devon Peroutky <devon@kindo.ai > 
						
						
					 
					
						2025-09-01 14:07:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1cb39dbcdd 
					 
					
						
						
							
							[Misc] IO Processor plugins for pooling models ( #22820 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Christian Pinto <christian.pinto@ibm.com >
Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Co-authored-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-08-31 23:07:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						437c3ce026 
					 
					
						
						
							
							Migrate Phi4 inputs to TensorSchema ( #23471 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-09-01 14:05:59 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						499b074bfd 
					 
					
						
						
							
							[Misc] refactor code by import as for torch._inductor.config ( #23677 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-09-01 14:05:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ff0e59d83a 
					 
					
						
						
							
							[CI/Build] Improve Tensor Schema tests speed by avoid engine core initialization ( #23357 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-31 22:52:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b55713683c 
					 
					
						
						
							
							[Misc] Move fast prefill logic to separate method ( #24013 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-01 05:40:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						acc1a6e10a 
					 
					
						
						
							
							Fix the bug related to loading GPTP INT3 weights. ( #23328 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: JunHowie <JunHowie@aliyun.com >
Co-authored-by: JunHowie <JunHowie@aliyun.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-01 05:39:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c742a66d1 
					 
					
						
						
							
							[Misc] Avoid redundant copy for encoder-only models ( #24012 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-09-01 04:02:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						183a70967a 
					 
					
						
						
							
							[BUGFIX] GPTQ quantization compatibility for Qwen3 MOE models (AutoGPTQ and AutoRound-GPTQ) ( #23994 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: JartX <sagformas@epdcenter.es >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-09-01 03:33:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14b4326b94 
					 
					
						
						
							
							v1: Support KV events from connectors ( #19737 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Or Ozeri <oro@il.ibm.com > 
						
						
					 
					
						2025-09-01 01:13:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						752d2e1c36 
					 
					
						
						
							
							[Minor] Fix some random typos in comments ( #24009 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-31 16:42:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						81eea3d348 
					 
					
						
						
							
							vllm fix check on max vocab size ( #22471 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io >
Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Roger Wang <hey@rogerw.io >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-31 20:57:05 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9701352e4b 
					 
					
						
						
							
							[Doc]: fix typos in Python comments ( #24001 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-08-31 08:21:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						749be00a98 
					 
					
						
						
							
							[Core][Multimodal] Allow passing multi_modal_uuids as multimodal identifiers. ( #23394 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-08-30 18:01:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b8077b8ac 
					 
					
						
						
							
							Fix wrong truncate_prompt_tokens type hint ( #22761 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gabriel Marinho <gmarinho@ibm.com >
Signed-off-by: Gabriel Marinho <104592062+gmarinho2@users.noreply.github.com >
Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-08-30 20:39:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						038e9be4eb 
					 
					
						
						
							
							[LoRA] Much faster startup when LoRA is enabled ( #23777 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Lo <andy@mistral.ai >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-30 15:37:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68a349114f 
					 
					
						
						
							
							[Misc] enhance type hint for rearrange return value ( #23519 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-30 06:43:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e80bca309e 
					 
					
						
						
							
							[Refactor] refactor freezing_value/cuda_event initialize outside try finally ( #23758 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-30 06:42:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fb4983e112 
					 
					
						
						
							
							[Misc] add reorder_batch AttentionMetadataBuilder ( #23798 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-30 06:41:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						379ea2823a 
					 
					
						
						
							
							Add LoRA support for DeepSeek models (V2, V3, R1-0528) ( #23971 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sadeghja1070 <sadegh.ja1070@gmail.com >
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Claude <noreply@anthropic.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-30 06:40:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a6acad431 
					 
					
						
						
							
							[Model] Enable encoder DP for MiniCPM-V ( #23948 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com >
Signed-off-by: Jiangyun Zhu <riverclouds.zhu@qq.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-30 06:31:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5490d633ce 
					 
					
						
						
							
							[UT] fix unify_kv_cache_configs when kv cache config needs sort ( #23843 )  
						
						 
						
						
						
						
					 
					
						2025-08-30 11:22:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						628d00cd7b 
					 
					
						
						
							
							[Bugfix] Fix test_lora_resolvers.py ( #23984 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-30 11:16:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4071c76cf3 
					 
					
						
						
							
							[V1] [Hybrid] Move MiniMaxLinearAttention into layers/mamba ( #23831 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-30 00:16:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f1bddbd852 
					 
					
						
						
							
							[Core] Cleanup TPU model runner for MM ( #23894 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-30 00:14:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9748c5198b 
					 
					
						
						
							
							[CI] Fix broken compile tests due to unsupported SiluMul+Nvfp4Quant fusion ( #23973 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-08-30 00:14:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee52a32705 
					 
					
						
						
							
							[CI] Move testing image from remote URL to S3 ( #23980 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-08-29 21:41:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8fb85b7bb6 
					 
					
						
						
							
							Add routed_scaling_factor to MoE grouped topk ( #23123 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xin Yang <xyangx@amazon.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-29 21:36:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b31cb1781 
					 
					
						
						
							
							[Bugfix] Fix --config arg expansion called from api_server.py ( #23944 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jean-Francois Dube <dubejf+gh@gmail.com >
Co-authored-by: Jean-Francois Dube <dubejf+gh@gmail.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-29 21:36:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d660c98c1b 
					 
					
						
						
							
							[CI] Fix unavailable image remote URL ( #23966 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-08-29 15:40:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5674a40366 
					 
					
						
						
							
							[Misc] Make download_weights_from_hf more reliable ( #23863 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-29 12:37:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c3e199998 
					 
					
						
						
							
							Revert gemma3n fast prefill changes ( #23897 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-08-29 12:16:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1c26b42296 
					 
					
						
						
							
							[Docs] [V1] [Hybrid] Add new documentation re: contributing mamba-based models  ( #23824 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-29 18:47:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b7adf94c4a 
					 
					
						
						
							
							Tuned H100/H200 triton fp8 block configs for fused_qkv_a_proj ( #23939 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-29 10:28:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d7fe40fc0 
					 
					
						
						
							
							[RL][BugFix] Fix missing tokenizer error for token-in-token-out ( #23904 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-30 01:09:55 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0dc9532065 
					 
					
						
						
							
							[BUGFIX ] fix undefined silu_and_mul_nvfp4_quant ( #23929 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: hongchao <hongchao@msh.team >
Signed-off-by: Richard Zou <zou3519@gmail.com >
Co-authored-by: hongchao <hongchao@msh.team >
Co-authored-by: Richard Zou <zou3519@gmail.com >
Co-authored-by: Richard Zou <zou3519@users.noreply.github.com > 
						
						
					 
					
						2025-08-29 09:36:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72a69132dc 
					 
					
						
						
							
							[CI]  Add aiter to matching list of issue auto labeller for rocm tag ( #23942 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-08-29 15:29:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d90d8eb674 
					 
					
						
						
							
							[BugFix] Async scheduling and PP compatibility with DP ( #23770 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-29 08:17:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0a2f4c0793 
					 
					
						
						
							
							[Models] Use in-place adds in Idefics2Vision ( #23932 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-08-29 07:42:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1cf3753b90 
					 
					
						
						
							
							[MODEL] Apertus and XIELU ( #23068 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: EduardDurech <39579228+EduardDurech@users.noreply.github.com >
Co-authored-by: AllenHaoHuang <allenhuangdd@gmail.com > 
						
						
					 
					
						2025-08-29 20:29:18 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f7cde7272 
					 
					
						
						
							
							Adds json_count_leaves utility function  ( #23899 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: aditchawdhary <aditxy@hotmail.com > 
						
						
					 
					
						2025-08-29 05:28:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67c14906aa 
					 
					
						
						
							
							Update PyTorch to 2.8.0 ( #20358 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-29 18:57:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						69f46359dd 
					 
					
						
						
							
							[Multimodal] Consolidate mm inputs into MultiModalFeatureSpec ( #23779 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com > 
						
						
					 
					
						2025-08-29 18:36:57 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d9e00dbd1f 
					 
					
						
						
							
							[Performance] V1 Classify Models E2E Performance Optimization ( #23541 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-29 03:12:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad39106b16 
					 
					
						
						
							
							[CPU] Enable data parallel for CPU backend ( #23903 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-29 02:19:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2554b27baa 
					 
					
						
						
							
							[V0 Deprecation] Remove pooling model support in V0  ( #23434 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-29 00:04:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						934bebf192 
					 
					
						
						
							
							Better errors for Transformers backend missing features ( #23759 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-29 07:01:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						885ca6d31d 
					 
					
						
						
							
							[Misc] Fix warnings for mistral model ( #23552 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com >
Signed-off-by: Jiangyun Zhu <riverclouds.zhu@qq.com >
Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com > 
						
						
					 
					
						2025-08-29 06:58:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d0afcc9dc 
					 
					
						
						
							
							[mrope][Qwen2-VL] Fix edge case where getting index of image/video token can potentially throw in default vl mrope implementation.  ( #23895 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-08-28 23:29:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4f9e9631c 
					 
					
						
						
							
							[CI/Build] Clean up LoRA test ( #23890 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-28 23:28:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05d839c19e 
					 
					
						
						
							
							Fix(async): Add support for truncate_prompt_tokens in AsyncLLM ( #23800 )  
						
						 
						
						
						
						
					 
					
						2025-08-28 22:55:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6597d7a456 
					 
					
						
						
							
							[Platform] import activation_quant_fusion for CUDA only ( #23882 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-08-28 22:54:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5264015d74 
					 
					
						
						
							
							[BugFix][AMD][Deepseek] fix a dtype mismatch error for deepseek running on AMD ( #23864 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinghui Zhang <jinghuizhang0804@gmail.com > 
						
						
					 
					
						2025-08-28 22:54:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98ac0cb32d 
					 
					
						
						
							
							[Bugfix] Use ReplicatedLinear for SequenceClassification head ( #23836 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-29 04:41:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8b3b299c9 
					 
					
						
						
							
							[tests] Improve speed and reliability of test_transcription_api_correctness ( #23854 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-08-29 04:25:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						006477e60b 
					 
					
						
						
							
							[ROCm][Fix] Fix rocm build caused by  #23791  ( #23847 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-08-28 19:52:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de533ab2a1 
					 
					
						
						
							
							[Models] Improve iteration over layers ( #19497 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-08-29 09:26:34 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						235c9db8a7 
					 
					
						
						
							
							[XPU] support data parallel for MoE models on XPU ( #22887 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chzhang <chaojun.zhang@intel.com > 
						
						
					 
					
						2025-08-29 09:23:04 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b668055a11 
					 
					
						
						
							
							[V0 Deprecation] Remove V0 Samplers test ( #23862 )  
						
						 
						
						
						
						
					 
					
						2025-08-28 18:05:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3d2aad5a2 
					 
					
						
						
							
							[Log] Use Debug Once for DeepGEMM E8M0 When not Enabled ( #23858 )  
						
						 
						
						
						
						
					 
					
						2025-08-28 22:18:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb293f6a79 
					 
					
						
						
							
							[V1] Enable prefill optimization for Gemma3n ( #22628 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-08-28 14:54:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ffbf27239 
					 
					
						
						
							
							[BugFix][FlashInfer] Fix potential race condition for paged_kv_indptr_cpu ( #23737 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-28 14:22:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27e88cee74 
					 
					
						
						
							
							chore: build release image by default ( #23852 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Codex <codex@openai.com > 
						
						
					 
					
						2025-08-28 13:17:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						16a45b3a28 
					 
					
						
						
							
							[NVIDIA] Support SiluMul + NVFP4 quant fusion ( #23671 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jindih <jindih@nvidia.com >
Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com >
Co-authored-by: jindih <jindih@nvidia.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Luka Govedic <lgovedic@redhat.com > 
						
						
					 
					
						2025-08-28 19:36:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						57d4ede520 
					 
					
						
						
							
							[bugfix] [spec-decoding] fix data race in sample_recovered_tokens_kernel (vLLM v1) ( #23829 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: He-Jingkai <he-jingkai@outlook.com > 
						
						
					 
					
						2025-08-28 19:05:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04d1dd7f4a 
					 
					
						
						
							
							[ROCm][Aiter] Add triton fp8 bmm kernel for mla ( #23264 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Divakar Verma <divakar.verma@amd.com >
Co-authored-by: ShaoChunLee <Shao-Chun.Lee@amd.com > 
						
						
					 
					
						2025-08-28 18:18:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f32a5bc505 
					 
					
						
						
							
							Migrate Llama4ImagePatchInputs to TensorSchema ( #22021 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-28 17:29:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8805ad9fa9 
					 
					
						
						
							
							Add scale_config.yml file for Meta autoscalers for GH Actions ( #23840 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jean Schmidt <contato@jschmidt.me > 
						
						
					 
					
						2025-08-28 09:31:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0583578f42 
					 
					
						
						
							
							[ci] breaks down V1 Test into 3 groups of approx 30 minutes runtime ( #23757 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jean Schmidt <contato@jschmidt.me > 
						
						
					 
					
						2025-08-28 08:59:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						db74d60490 
					 
					
						
						
							
							[Bugfix] Add fake mode around passes ( #23349 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: angelayi <yiangela7@gmail.com > 
						
						
					 
					
						2025-08-28 11:25:56 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						95089607fa 
					 
					
						
						
							
							[Model][gpt-oss] Support DP+EP for GPT-OSS with FlashInfer trtllm-gen MoE ( #23819 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Po-Han Huang <pohanh@nvidia.com > 
						
						
					 
					
						2025-08-28 06:56:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f096f9b95 
					 
					
						
						
							
							[CI] Fix linting error on main ( #23835 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-28 06:52:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						66548f6603 
					 
					
						
						
							
							[Bugfix] Fix benchmark_moe.py for blockwise fp8. ( #23823 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: crischeng <420985011@qq.com >
Co-authored-by: cris <grace@guisenbindeMacBook-Pro.local > 
						
						
					 
					
						2025-08-28 21:44:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3da2eea54 
					 
					
						
						
							
							[Doc]: fix typos in Python scripts ( #23828 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-08-28 05:37:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bfab219648 
					 
					
						
						
							
							[Model] [gpt-oss] fix gpt-oss pp support ( #23815 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-08-28 05:36:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3432f18fd 
					 
					
						
						
							
							[BugFix][Spec Decode] Use float64 for uniform_probs ( #23803 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-28 12:26:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67cee40da0 
					 
					
						
						
							
							[CI/Build][Bugfix] Fix Qwen VL tests on CPU ( #23818 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-28 11:57:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d99c3a4f7b 
					 
					
						
						
							
							[Doc]: fix typos in .md files (including those of  #23751 ) ( #23825 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-08-28 04:38:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3462c1c522 
					 
					
						
						
							
							[FIXBUG] Add return_success parameter to moe_wna16_weight_loader function ( #22797 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: JartX <sagformas@epdcenter.es >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-28 09:03:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c5d004aaaf 
					 
					
						
						
							
							[Model] Add PP support and VLM backbone compatability for GPT-OSS ( #23680 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-28 16:03:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						11a7fafaa8 
					 
					
						
						
							
							[New Model]: Support GteNewModelForSequenceClassification ( #23524 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-28 15:36:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						186aced5ff 
					 
					
						
						
							
							[Kernel] cuda kernels for upcoming decode context parallel feature ( #23791 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: hongchao <hongchao@msh.team > 
						
						
					 
					
						2025-08-28 15:29:11 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						daa1273b14 
					 
					
						
						
							
							[Bugfix] when set offline model running error ( #23711 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-08-28 07:27:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c07a73317d 
					 
					
						
						
							
							[CI] enable idefics3 and fuyu-8b test in multimodal test ( #23790 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-08-28 14:51:24 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22feac8e95 
					 
					
						
						
							
							[Transform] [Quantization] Add transforms to compressed tensors ( #22486 )  
						
						 
						
						
						
						
					 
					
						2025-08-28 02:43:48 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8851a4723 
					 
					
						
						
							
							Add deprecation warning for lora_extra_vocab_size ( #23635 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinheng Li <ahengljh@gmail.com > 
						
						
					 
					
						2025-08-27 22:34:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f48a9af892 
					 
					
						
						
							
							[CI] make all multi-gpu weight loading tests run nightly ( #23792 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex Yun <alexyun04@gmail.com > 
						
						
					 
					
						2025-08-27 21:27:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a11adafdca 
					 
					
						
						
							
							Gracefully handle edge cases in harmony utils ( #23155 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jan Kessler <jakessle@uni-mainz.de >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-27 20:14:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a781e84ec2 
					 
					
						
						
							
							[Perf] Tune configs for triton block fp8 gemm H100/H200 ( #23748 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-28 11:12:53 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b7b161a09 
					 
					
						
						
							
							[Feature] models: pass layer prefix to replace_linear_class for per-layer quantization routing. Addresses  #23239  ( #23556 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shrey Gupta <shreyg1303@gmail.com > 
						
						
					 
					
						2025-08-27 20:12:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a69693e38f 
					 
					
						
						
							
							Migrate Qwen inputs to TensorSchema ( #23473 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-28 10:43:26 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5da4f5d857 
					 
					
						
						
							
							[Bugfix] Fix for V1 priority scheduling crashes at preemption ( #23713 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hanchenli <lihanc2002@gmail.com > 
						
						
					 
					
						2025-08-28 00:44:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						321938e9ac 
					 
					
						
						
							
							[Feature] Add VLLM_DISABLE_PAD_FOR_CUDAGRAPH to Avoid Hang Issue ( #23595 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-27 21:52:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f9ca2b40a0 
					 
					
						
						
							
							[Bugfix] Fix Marlin NVFP4 for modelopt ( #23659 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-27 17:48:16 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						082cc07ef8 
					 
					
						
						
							
							DP/EP Support for gpt-oss with deepep-ht comm kernel on SM100 ( #23608 )  
						
						 
						
						
						
						
					 
					
						2025-08-27 17:33:21 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						853c371fc3 
					 
					
						
						
							
							[V1][Mamba] - Enable V1 by default for Mamba Models ( #23650 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com > 
						
						
					 
					
						2025-08-27 20:53:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8bf6266a17 
					 
					
						
						
							
							[Multimodal] Generate mm_hash based on request metadata when caching is turned off ( #23690 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-08-27 20:24:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0585a9e73c 
					 
					
						
						
							
							Disable torch.compile for dynamic rope models in Transformers backend ( #23738 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-27 19:03:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c0ef769ba 
					 
					
						
						
							
							ci: Add arm64 docker build to release pipeline ( #23210 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eli Uriegas <eliuriegas@meta.com >
Signed-off-by: Eli Uriegas <1700823+seemethere@users.noreply.github.com > 
						
						
					 
					
						2025-08-27 10:41:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e4d017b6f 
					 
					
						
						
							
							[Docs] Fix warnings in mkdocs build (continued) ( #23743 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com >
Signed-off-by: Hyogeun Oh (오효근) <ohg3417@gmail.com > 
						
						
					 
					
						2025-08-27 17:17:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd58932280 
					 
					
						
						
							
							[V1] [Hybrid] Enable compile and piecewise CUDA graph for MiniMax-Text models ( #22589 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-27 10:05:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						52883ed084 
					 
					
						
						
							
							[Model] Merge SupportsMultiModalWithRawInput with SupportsMultiModal ( #23749 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-27 10:01:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f35be10a9 
					 
					
						
						
							
							[BugFix] Fix topk_softmax assert ( #19764 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedic <lgovedic@redhat.com > 
						
						
					 
					
						2025-08-27 09:47:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b61d2e22f 
					 
					
						
						
							
							[Docs] Remove in-tree Gaudi install instructions ( #23628 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-27 09:22:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ce8285d6d 
					 
					
						
						
							
							[LogitsProcs] Deduplicate built-in LP implementation logic ( #23362 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-27 23:11:33 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83f555f637 
					 
					
						
						
							
							[Doc]: upgrade version of crate-ci tool for improved typo detection ( #23755 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-08-27 07:59:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						841490434a 
					 
					
						
						
							
							[Model] Enable native HF format InternVL support ( #23742 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-27 14:45:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3af47c3cc6 
					 
					
						
						
							
							[Feature] Add Hopper DeepGEMM E8M0 for DeepSeekV3.1 scale_fmt ( #23666 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Signed-off-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-27 14:09:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						513c1fe255 
					 
					
						
						
							
							Only run get_attr_docs if generating help text ( #23723 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-27 13:55:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe8d7b6f03 
					 
					
						
						
							
							[Model] Interface to enable batch-level DP support ( #23733 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-27 06:41:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						16dc4052b0 
					 
					
						
						
							
							Fix pre-commit on main ( #23747 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-27 06:39:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8dd2baa597 
					 
					
						
						
							
							Add vLLM Korea Meetup in the README.md and meetups.md ( #23746 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rebel-hongseok <hongseok@rebellions.ai >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-27 06:25:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5eeef1b908 
					 
					
						
						
							
							[Model] Explicit default_pooling_type interface ( #23736 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-27 13:24:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						704432af3c 
					 
					
						
						
							
							[V1] [Hybrid] Disable prefix caching by default for hybrid or mamba-based models  ( #23716 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-27 12:51:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a403d0fa41 
					 
					
						
						
							
							[Misc] Remove unnecessary _send_reconfig_message() in core_client.py ( #23127 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-27 05:50:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c13820f0b 
					 
					
						
						
							
							[Bugfix] Fix task field initialization when PYTHONOPTIMIZE is enabled ( #23718 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cndoit18 <cndoit18@outlook.com > 
						
						
					 
					
						2025-08-27 12:42:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d30de4469 
					 
					
						
						
							
							[model] Support MiniCPM-V 4.5 ( #23586 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tc-mb <caitianchi@modelbest.cn >
Signed-off-by: Xin Yang <xyangx@amazon.com >
Signed-off-by: Abatom <abzhonghua@gmail.com >
Signed-off-by: chzhang <chaojun.zhang@intel.com >
Signed-off-by: Pate Motter <patemotter@google.com >
Signed-off-by: Terrencezzj <terrence@cohere.ai >
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai >
Signed-off-by: simon-mo <simon.mo@hey.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Siyuan Fu <siyuanf@nvidia.com >
Signed-off-by: siyuanf <siyuanf@nvidia.com >
Signed-off-by: Weiliang Liu <weiliangl@nvidia.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Signed-off-by: yewentao256 <zhyanwentao@126.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com >
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com >
Signed-off-by: Zijing Liu <liuzijing2014@users.noreply.github.com >
Signed-off-by: jiabin.00 <jiabin.00@bytedance.com >
Signed-off-by: zjy0516 <riverclouds.zhu@qq.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Signed-off-by: tc-mb <157115220+tc-mb@users.noreply.github.com >
Signed-off-by: Roger Wang <hey@rogerw.me >
Signed-off-by: Roger Wang <hey@rogerw.io >
Signed-off-by: Huy Do <huydhn@gmail.com >
Signed-off-by: Matúš Námešný <matus.namesny@ameria.com >
Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com >
Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Signed-off-by: oye93 <en.ouyang93@outlook.com >
Signed-off-by: Julien Lin <jullin@nvidia.com >
Signed-off-by: Didier Durand <durand.didier@gmail.com >
Signed-off-by: Tianyu Li <tianyu.li@arm.com >
Signed-off-by: Hongxia Yang <hongxia.yang@amd.com >
Signed-off-by: Yuekai Zhang <zhangyuekai@foxmail.com >
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Signed-off-by: jiang1.li <jiang1.li@intel.com >
Signed-off-by: Zerohertz <ohg3417@gmail.com >
Signed-off-by: Hyogeun Oh (오효근) <ohg3417@gmail.com >
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Signed-off-by: Russell Bryant <rbryant@redhat.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Signed-off-by: Huzaifa Sidhpurwala <huzaifas@redhat.com >
Signed-off-by: Federico <65908512+coval3nte@users.noreply.github.com >
Signed-off-by: Zixuan Zhang <zixuanzhang@bytedance.com >
Signed-off-by: wuhang <wuhang6@huawei.com >
Signed-off-by: czhu-cohere <conway.zhu@cohere.com >
Signed-off-by: Wei Wei <wwei6@meta.com >
Signed-off-by: Yiheng Xu <charlesyihengxu@gmail.com >
Signed-off-by: Chenheli Hua <huachenheli@outlook.com >
Signed-off-by: wangyafeng <wangyafeng@baidu.com >
Co-authored-by: Xin Yang <105740670+xyang16@users.noreply.github.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com >
Co-authored-by: Zhonghua Deng <abzhonghua@gmail.com >
Co-authored-by: Chaojun Zhang <chaojun.zhang@intel.com >
Co-authored-by: Pate Motter <p@temotter.com >
Co-authored-by: Terrence Zhao <32208165+Terrencezzj@users.noreply.github.com >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: Simon Mo <simon.mo@hey.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: weiliang <weiliangl@nvidia.com >
Co-authored-by: Siyuan Fu <siyuanf@nvidia.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk >
Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com >
Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com >
Co-authored-by: Zijing Liu <liuzijing2014@users.noreply.github.com >
Co-authored-by: Bin Jia <45593998+FoolPlayer@users.noreply.github.com >
Co-authored-by: Jiangyun Zhu <riverclouds.zhu@qq.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Raghavan <oneraghavan@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Roger Wang <hey@rogerw.io >
Co-authored-by: Roger Wang <hey@rogerw.me >
Co-authored-by: knlnguyen1802 <knlnguyen1802@gmail.com >
Co-authored-by: Huy Do <huydhn@gmail.com >
Co-authored-by: Matúš Námešný <matus@namesny.com >
Co-authored-by: Guillaume Calmettes <gcalmettes@scaleway.com >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: En Ouyang <en.ouyang93@outlook.com >
Co-authored-by: Li, Jiang <jiang1.li@intel.com >
Co-authored-by: nvjullin <jullin@nvidia.com >
Co-authored-by: Didier Durand <2927957+didier-durand@users.noreply.github.com >
Co-authored-by: TianyuLi0 <116711075+TianyuLi0@users.noreply.github.com >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Yuekai Zhang <zhangyuekai@foxmail.com >
Co-authored-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: Hyogeun Oh (오효근) <ohg3417@gmail.com >
Co-authored-by: Thomas Parnell <tpa@zurich.ibm.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Lukas Geiger <lukas.geiger94@gmail.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Huzaifa Sidhpurwala <huzaifas@redhat.com >
Co-authored-by: Federico <65908512+coval3nte@users.noreply.github.com >
Co-authored-by: zixuanzhang226 <zixuanzhang@bytedance.com >
Co-authored-by: wuhang <wuhang6@huawei.com >
Co-authored-by: yzds <41983536+youzhedian@users.noreply.github.com >
Co-authored-by: hongchao <hongchao@msh.team >
Co-authored-by: czhu-cohere <conway.zhu@cohere.com >
Co-authored-by: Wei <weiweinpu@gmail.com >
Co-authored-by: Yiheng Xu <charlesyihengxu@gmail.com >
Co-authored-by: Aaron Pham <contact@aarnphm.xyz >
Co-authored-by: Chenheli Hua <huachenheli@outlook.com >
Co-authored-by: CSWYF3634076 <58356743+CSWYF3634076@users.noreply.github.com > 
						
						
					 
					
						2025-08-27 05:38:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f7a9c95e4 
					 
					
						
						
							
							[Docs] Fix a 1-2-3 list and style issues in tpu.md ( #23729 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-08-27 05:37:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f0d7eaea8 
					 
					
						
						
							
							[XPU] Fix OOM issue for data parallel with Ray backend ( #22500 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Fanli Lin <fanli.lin@intel.com >
Signed-off-by: Fanli Lin <fanli0116@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-27 19:57:38 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e03940762b 
					 
					
						
						
							
							[CI/Build] Reduce LoRA layer test cases ( #23721 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-27 10:59:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						11eddf02f0 
					 
					
						
						
							
							[FlashInfer] Cache hyper params in metadata builder ( #23732 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-27 03:45:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04ff1e43fb 
					 
					
						
						
							
							[Misc] Move CpuGpuBuffer to vllm/v1/utils.py ( #23728 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-27 03:25:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6578e87365 
					 
					
						
						
							
							Optimize input preparation for FlashInfer [2/N] ( #23174 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-27 02:52:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5bd9f84158 
					 
					
						
						
							
							[Docs] Fix an admonition important ( #23726 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-08-27 02:50:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						91e382c935 
					 
					
						
						
							
							[CI/Build] Remove redundant register in model init tests ( #23715 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-27 08:11:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6446677839 
					 
					
						
						
							
							[XPU]fix cuda event used in XPU model runner ( #23708 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-08-27 07:27:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						69244e67e6 
					 
					
						
						
							
							[Core] Use key-only cache for BaseMultiModalProcessor ( #23018 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-27 14:19:13 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8dbf6ed7be 
					 
					
						
						
							
							[Bugfix] fix when config.yaml config value is list parse error ( #23528 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-08-27 05:54:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9de25c294b 
					 
					
						
						
							
							[CI/Build] Remove redundant LoRA model tests ( #23706 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-27 05:51:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fce10dbed5 
					 
					
						
						
							
							[XPU] Add xpu torch.compile support ( #22609 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-08-27 05:33:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d272415e57 
					 
					
						
						
							
							[Quantization] Expand compressed-tensors MoE matching logic to support NFP4 + FP8 MoEs ( #22674 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com >
Signed-off-by: Dipika <dipikasikka1@gmail.com > 
						
						
					 
					
						2025-08-27 05:00:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						142ac08030 
					 
					
						
						
							
							[Frontend] Optimize beam search performance by limiting concurrency ( #23599 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-27 04:59:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3210264421 
					 
					
						
						
							
							[Frontend] Add --log-error-stack to print stack trace for error response ( #22960 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-27 04:58:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						644d57d531 
					 
					
						
						
							
							[Model] Add Ernie4.5 VL Model Support ( #22514 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangyafeng <wangyafeng@baidu.com > 
						
						
					 
					
						2025-08-26 21:02:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c905684cfe 
					 
					
						
						
							
							[Core] Asynchronous h2d in merge_multimodal_embeddings via pinned memory. ( #23686 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-08-26 20:05:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						786835807b 
					 
					
						
						
							
							[Bugfix]: Qwen3 Coder Tool Parser ( #23099 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yiheng Xu <charlesyihengxu@gmail.com >
Co-authored-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-08-26 19:58:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fecbb7c782 
					 
					
						
						
							
							[Bugfix][gpt-oss] passing the cache config in gpt-oss ( #23613 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wei Wei <wwei6@meta.com > 
						
						
					 
					
						2025-08-27 02:54:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6dab89b8ec 
					 
					
						
						
							
							[Docs] Fix math rendering in docs ( #23676 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 18:47:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de02b07db4 
					 
					
						
						
							
							[Bugfix] Lazy import gpt_oss_triton_kernels_moe for mxfp4 ( #23678 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-27 09:34:57 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eb1995167e 
					 
					
						
						
							
							[gpt-oss] Enable unit test for response API harmony integration ( #23533 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-26 18:23:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c2b140ae8 
					 
					
						
						
							
							[quantization] use channel scales for w4a8 + misc fixes ( #23570 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: czhu-cohere <conway.zhu@cohere.com > 
						
						
					 
					
						2025-08-26 18:23:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7c80af084 
					 
					
						
						
							
							fix pynccl reduce_scatter ( #23648 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: hongchao <hongchao@msh.team > 
						
						
					 
					
						2025-08-26 18:21:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6891205b16 
					 
					
						
						
							
							[Feature][Responses API] Support MCP tool in background mode ( #23494 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wuhang <wuhang6@huawei.com > 
						
						
					 
					
						2025-08-27 01:06:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b1625dbe9c 
					 
					
						
						
							
							feat: add triton fused moe config for GLM-4.5-Air-FP8 on B200 ( #23695 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zixuan Zhang <zixuanzhang@bytedance.com > 
						
						
					 
					
						2025-08-26 18:06:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						585e0bde36 
					 
					
						
						
							
							[Bugfix] UnboundLocalError when GptOss reasoning specified ( #23054 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Federico <65908512+coval3nte@users.noreply.github.com > 
						
						
					 
					
						2025-08-27 00:29:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						714872f1a9 
					 
					
						
						
							
							[Compile] Fix Cmake Warning ( #23689 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-26 23:48:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f1af97f86 
					 
					
						
						
							
							[V1] [Hybrid] Enable Full CUDA graph by default for hybrid models in V1 ( #22594 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-26 23:28:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3b0fd1ee6 
					 
					
						
						
							
							[V1][P/D]P2pNcclConnector supports flashinfer ( #23536 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abatom <abzhonghua@gmail.com >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-08-26 22:56:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6421b66bf4 
					 
					
						
						
							
							[Docs] Move quant supported hardware table to README ( #23663 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 22:26:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f13319f47 
					 
					
						
						
							
							Enhance the pre-notification policy ( #23532 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huzaifa Sidhpurwala <huzaifas@redhat.com > 
						
						
					 
					
						2025-08-26 20:41:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d696f86e7b 
					 
					
						
						
							
							[doc] Hybrid KV Cache Manager design doc ( #22688 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 20:19:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9816b81f5f 
					 
					
						
						
							
							[Model] Enable video support for InternVL3.5 models ( #23658 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-26 19:46:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c37c0af990 
					 
					
						
						
							
							[Misc] Fix comments in tests/kernels/quantization ( #23675 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-08-26 19:31:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9715f7bb0f 
					 
					
						
						
							
							[Bugfix] Fix incorrect original shape in hashing ( #23672 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-08-26 19:01:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98aa16ff41 
					 
					
						
						
							
							[v1] Add cross-attention KV cache support for encoder-decoder models ( #23664 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-08-26 18:49:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						227e231b55 
					 
					
						
						
							
							[Docs] [V1] [Hybrid] Update docs to remove FlashInfer constraint for hybrid models ( #23665 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-26 18:33:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						730d0ac8b9 
					 
					
						
						
							
							[Docs] Fix warnings in mkdocs build ( #23649 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com >
Signed-off-by: Hyogeun Oh (오효근) <ohg3417@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 18:19:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9b0187003e 
					 
					
						
						
							
							[Bugfix] Fix cuda event usage with CPU model runner ( #23643 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-26 17:10:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						44ac25eae2 
					 
					
						
						
							
							[CI] [Doc]: Add GH Action for auto labeling issues with rocm tag ( #20988 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-26 16:20:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ea22e42d5 
					 
					
						
						
							
							[Misc] Add override for allreduce fusion thresholds ( #23639 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Julien Lin <jullin@nvidia.com > 
						
						
					 
					
						2025-08-26 15:53:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d4183dd2e 
					 
					
						
						
							
							[model] support qwen2audio embedding input ( #23625 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yuekai Zhang <zhangyuekai@foxmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-26 23:48:08 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						513298f1b4 
					 
					
						
						
							
							[Bugfix] fix bf16 multimodal model hash ( #23623 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yuekai Zhang <zhangyuekai@foxmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Roger Wang <hey@rogerw.io >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-26 23:47:50 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						379f828fba 
					 
					
						
						
							
							[Docs] Reduce requirements for docs build ( #23651 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 15:43:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1fdc732419 
					 
					
						
						
							
							[ROCm] Starting to add AMD code reviewers for ROCm components ( #23496 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hongxia Yang <hongxia.yang@amd.com > 
						
						
					 
					
						2025-08-26 07:32:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f58675bfb3 
					 
					
						
						
							
							[CPU] add cpu fused moe pytorch native implementation ( #23146 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tianyu Li <tianyu.li@arm.com >
Co-authored-by: Li, Jiang <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-26 14:09:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c04779afa 
					 
					
						
						
							
							[Doc]: fix various spelling issues in multiple files ( #23636 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-08-26 14:05:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f66673a39d 
					 
					
						
						
							
							[Kernel] Added flashinfer fp8 per-tensor gemms ( #22895 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Julien Lin <jullin@nvidia.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-26 06:54:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b78bed1bc5 
					 
					
						
						
							
							[Hardware][Mac] Fix the installation fail for Apple Silicon (CPU)  ( #23565 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: oye93 <en.ouyang93@outlook.com >
Co-authored-by: Li, Jiang <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-26 13:04:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						164b2273c8 
					 
					
						
						
							
							[Docs] Fix broken links to docs/api/summary.md ( #23637 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 13:00:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b4fc9bd9b 
					 
					
						
						
							
							Support FlashAttention Backend for Hybrid SSM Models ( #23299 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-26 12:41:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebd5a77bb5 
					 
					
						
						
							
							feat: add usage to TranscriptionResponse (text and json response_format) ( #23576 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-08-26 05:26:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						384dd1b0a8 
					 
					
						
						
							
							[Bugfix] Add missing enable_log_outputs parameter to init_app_state function ( #23634 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matúš Námešný <matus.namesny@ameria.com > 
						
						
					 
					
						2025-08-26 12:13:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fdeb3dac13 
					 
					
						
						
							
							[Model] fix DeepSeek e_score_correction_bias dtype to fp32 ( #23640 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-26 20:09:47 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d52358c1e0 
					 
					
						
						
							
							[Perf] Remove duplicated NVFP4 blockscales to save memory ( #23379 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-26 19:16:33 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ace2f72b0 
					 
					
						
						
							
							Fix writing benchmark results with tuple keys ( #23633 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-08-26 19:16:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b00e69f8ca 
					 
					
						
						
							
							Fix nits from  #20059  ( #23548 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 03:27:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						50fede6634 
					 
					
						
						
							
							[V1] Enable V1 for compute capability < 8.0 + FP32 ( #23614 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-26 03:00:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b5d34af328 
					 
					
						
						
							
							[Bugfix] Fix scheduling when repeated images in one request ( #23544 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Signed-off-by: Roger Wang <hey@rogerw.io >
Co-authored-by: Roger Wang <hey@rogerw.me >
Co-authored-by: knlnguyen1802 <knlnguyen1802@gmail.com > 
						
						
					 
					
						2025-08-26 09:46:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9b5f64238f 
					 
					
						
						
							
							[Bugfix] Fix Qwen25VL packed_modules_mapping ( #23604 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-26 01:09:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ff77764f86 
					 
					
						
						
							
							Fix CLI parameter documentation inconsistency in pooling_models.md ( #23630 )  
						
						 
						
						
						
						
					 
					
						2025-08-26 01:05:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bfc1edc9f5 
					 
					
						
						
							
							[Docs] Fix titles for multi-file examples that are rendered in the docs ( #23573 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 00:16:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ecbb14b81 
					 
					
						
						
							
							[Benchmarks] add benchmark for embedding models ( #23000 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-08-25 23:57:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7d67a9d9f9 
					 
					
						
						
							
							[mypy] Fix incorrect type hint for EAGLE3 support ( #23617 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-25 23:50:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						959783fb99 
					 
					
						
						
							
							[fix] fix seed-oss-parser ( #23560 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiabin.00 <jiabin.00@bytedance.com > 
						
						
					 
					
						2025-08-25 23:16:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce0e9dbd43 
					 
					
						
						
							
							[CI/Build] Fix typo in  #23561  ( #23616 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-25 23:13:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b395b3b0a3 
					 
					
						
						
							
							[Disagg][Perf] Use CUDA event sync instead of blocking tolist to avoid unintentional copy ops blocking across different CUDA streams, improving disagg TTIT/TTFT ( #22760 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zijing Liu <liuzijing2014@gmail.com >
Signed-off-by: Zijing Liu <liuzijing2014@users.noreply.github.com > 
						
						
					 
					
						2025-08-25 21:06:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6fad29b11b 
					 
					
						
						
							
							Remove graph_pool as member of VllmBackend and argument to CUDAGraphWrapper ( #23385 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com >
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com >
Co-authored-by: ProExpertProg <11367180+ProExpertProg@users.noreply.github.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-08-25 19:34:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6fd45e7b8a 
					 
					
						
						
							
							[CI/Build] Use vLLM client's user agent to fetch images ( #23561 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-25 19:34:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56dcf4e7e9 
					 
					
						
						
							
							[Bug] Fix DeepGEMM Env Control ( #23591 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-25 18:41:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ae067888d6 
					 
					
						
						
							
							Update Flashinfer to  0.2.14.post1 ( #23537 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Fu <siyuanf@nvidia.com >
Signed-off-by: siyuanf <siyuanf@nvidia.com >
Signed-off-by: Weiliang Liu <weiliangl@nvidia.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Siyuan Fu <siyuanf@nvidia.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-25 18:30:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						906e461ed6 
					 
					
						
						
							
							[CI Fix] Pin deepep and pplx tags in tools/ep_kernels/, gate multigpu tests ( #23568 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-25 18:29:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2a97ffc33d 
					 
					
						
						
							
							[Misc] Add release note draft to PR template ( #23598 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-08-25 16:44:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						efc88cf64a 
					 
					
						
						
							
							[Misc] Simplify FlashInfer attention metadata ( #23585 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-08-25 15:42:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b6a837275 
					 
					
						
						
							
							[Docs] Update Documentation of Cohere Command-A Models ( #23584 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Terrencezzj <terrence@cohere.ai >
Signed-off-by: Abatom <abzhonghua@gmail.com >
Co-authored-by: Zhonghua Deng <abzhonghua@gmail.com > 
						
						
					 
					
						2025-08-25 21:53:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c34c82b7fe 
					 
					
						
						
							
							[TPU][Bugfix] Fixes prompt_token_ids error in tpu tests. ( #23574 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pate Motter <patemotter@google.com > 
						
						
					 
					
						2025-08-25 14:29:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a044754bd 
					 
					
						
						
							
							[XPU] Delay BF16 check to worker init for spawn compatibility ( #22979 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chzhang <chaojun.zhang@intel.com > 
						
						
					 
					
						2025-08-25 13:09:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9188ae7cb5 
					 
					
						
						
							
							[Bugfix][V1][P/D]Fix the issue where repeated requests for the same input produce abnormal outputs for P2pNcclConnector ( #23403 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abatom <abzhonghua@gmail.com > 
						
						
					 
					
						2025-08-25 12:57:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a3cd90af5 
					 
					
						
						
							
							[Kernel] Add fused grouped_topk kernel for MoE ( #23274 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xin Yang <xyangx@amazon.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-08-25 11:47:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2a167b2eeb 
					 
					
						
						
							
							[test][RL] Add sleep level 2 test and fix reload with sleep mode ( #23521 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-08-26 00:25:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ff902f3b4 
					 
					
						
						
							
							[Refactor] Refactor persistent buffers with CpuGpuBuffer  ( #23515 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-25 08:44:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a9082a4d14 
					 
					
						
						
							
							[Bugfix] Fix Qwen3 MoE GPTQ inference ( #23490 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-25 06:40:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0329ed4b4 
					 
					
						
						
							
							Updates to Flex + VLLm integration ( #21416 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: drisspg <drisspguessous@gmail.com > 
						
						
					 
					
						2025-08-25 09:32:42 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6879cd80ae 
					 
					
						
						
							
							[Refactor] Pass tokenizer explicitly instead of binding to prompt update ( #23542 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-25 06:31:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e269be2ba2 
					 
					
						
						
							
							[Doc] Add caution for API server scale-out ( #23550 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-25 06:14:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c4b6e66fe 
					 
					
						
						
							
							[Attention] Unify mamba and attention backend selection ( #23171 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ayush Satyam <ayushsatyam146@gmail.com > 
						
						
					 
					
						2025-08-25 09:09:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d0a4a3f645 
					 
					
						
						
							
							[misc] add shanghai meetup ( #23535 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-25 17:00:03 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebafb0936d 
					 
					
						
						
							
							[Bugfix] Allow dynamic number of patches for llava_onevision ( #23525 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-25 08:34:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0cb7b065c3 
					 
					
						
						
							
							Feature/benchmark/random mm data/images ( #23119 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: breno.skuk <breno.skuk@hcompany.ai > 
						
						
					 
					
						2025-08-25 01:28:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2da02dd0d8 
					 
					
						
						
							
							[Fix] DeepSeek V3.1 tool parser error message ( #23492 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-08-25 00:56:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d765cf01fe 
					 
					
						
						
							
							[Core][Multimodal] Track encode cache entries by mm_hash and enable embedding sharing between requests ( #22711 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: knlnguyen1802 <knlnguyen1802@gmail.com >
Signed-off-by: Roger Wang <hey@rogerw.io >
Co-authored-by: knlnguyen1802 <knlnguyen1802@gmail.com >
Co-authored-by: Roger Wang <hey@rogerw.io > 
						
						
					 
					
						2025-08-25 00:41:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						712d0f88d8 
					 
					
						
						
							
							[Refactor] Dynamic target and content for prompt updates ( #23411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-24 23:39:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						49ab23b3cc 
					 
					
						
						
							
							[gpt-oss] use reasoning channel for reasoning text in serving_chat ( #22920 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yu Guo <yuguo@meta.com > 
						
						
					 
					
						2025-08-25 06:29:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9abb10489 
					 
					
						
						
							
							[Bugfix] Fix Dense module loading for sentence-transformers embedding models (simplified V2) ( #23408 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: FFFfff1FFFfff <yifanli0919@gmail.com > 
						
						
					 
					
						2025-08-25 05:39:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						787cdb3829 
					 
					
						
						
							
							Migrate DonutImagePixelInputs to TensorSchema ( #23509 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-25 05:02:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a5203d04df 
					 
					
						
						
							
							Migrate skyworkr1v inputs to TensorSchema ( #23499 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-25 04:43:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						99f8094400 
					 
					
						
						
							
							Migrate tarsier inputs to TensorSchema ( #23500 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-25 04:42:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						170e8ea9ea 
					 
					
						
						
							
							[Misc] Unified linear print info ( #23516 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-24 20:13:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a71e4765cc 
					 
					
						
						
							
							[Bugfix] Fix Qwen2.5-VL quantized model weights loading ( #23512 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zifei Tong <zifeitong@gmail.com > 
						
						
					 
					
						2025-08-25 10:40:22 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						39971db3aa 
					 
					
						
						
							
							Frontend: Adding LM Format Enforcer support to V1 engine ( #22564 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Noam Gat <noamgat@gmail.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-24 19:31:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						504d914314 
					 
					
						
						
							
							[Perf] Add Triton config for DeepSeek V3 FP8 EP32 H200 ( #23504 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-08-24 18:06:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						47455c424f 
					 
					
						
						
							
							[Doc: ]fix various typos in multiple files ( #23487 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-25 00:04:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7fc6b1354 
					 
					
						
						
							
							fix incompatibililty with non cuda platform for nvfp4 ( #23478 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com >
Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com > 
						
						
					 
					
						2025-08-24 15:35:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad78868450 
					 
					
						
						
							
							[Misc] Remove unused slot_mapping buffer ( #23502 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-24 14:03:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e2db1164a1 
					 
					
						
						
							
							[Model] Enable BLOOM on V1 ( #23488 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-24 13:30:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						416f05929a 
					 
					
						
						
							
							[New Model]Donut model ( #23229 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-08-24 12:52:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e021b4981 
					 
					
						
						
							
							(Misc): add missing test for zero truncation size. ( #23457 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: teekenl <teekenlau@gmail.com > 
						
						
					 
					
						2025-08-24 18:12:47 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b9b16649c 
					 
					
						
						
							
							[Misc] update dict parse to EPLBConfig from json dumps to dict unpacking ( #23305 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-08-24 08:06:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e76e233540 
					 
					
						
						
							
							[kernel] Support W4A8 on Hopper ( #23198 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: czhu-cohere <conway.zhu@cohere.com > 
						
						
					 
					
						2025-08-24 06:18:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a75277285b 
					 
					
						
						
							
							Migrate Paligemma inputs to TensorSchema ( #23470 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-24 04:56:56 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9dc30b7068 
					 
					
						
						
							
							[Bugfix] Add strong reference to CUDA pluggable allocator callbacks ( #23477 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Signed-off-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: Eric Marcus <eric.marcus@kaiko.ai >
Co-authored-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-24 12:56:17 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						053278a5dc 
					 
					
						
						
							
							Migrate Pixtral inputs to TensorSchema ( #23472 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-24 04:55:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c55c028998 
					 
					
						
						
							
							[gpt-oss] Streaming Output for Python Tool ( #23409 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-08-24 04:42:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65197a5fb3 
					 
					
						
						
							
							[Misc] Modify CacheConfig import ( #23459 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-23 06:05:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8f17f5d98 
					 
					
						
						
							
							Support DeepSeek-V3.1 tool call ( #23454 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xu Wenqing <xuwq1993@qq.com > 
						
						
					 
					
						2025-08-23 05:50:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d9a55204ba 
					 
					
						
						
							
							fix(tests): Correct unreachable assertion in truncation test ( #23425 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: AzizCode92 <azizbenothman76@gmail.com > 
						
						
					 
					
						2025-08-23 05:23:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4e9fd811f 
					 
					
						
						
							
							Revert "[PERF] Use faster way of decode in tokenizer: avoid useless list-to-list conversion ( #20000 )" ( #23396 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-23 04:16:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						308fa287a8 
					 
					
						
						
							
							Add glm4.5v tp2,4 fp8 config on H100_80GB ( #23443 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Chenxi Yang <cxyang@meta.com > 
						
						
					 
					
						2025-08-23 02:54:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fa78de9dc3 
					 
					
						
						
							
							Quantization: support FP4 quantized models on AMD CDNA2/CDNA3 GPUs ( #22527 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: feng <fengli1702@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-22 20:53:21 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f6818a92cb 
					 
					
						
						
							
							[UX] Move Dockerfile DeepGEMM install to tools/install_deepgemm.sh ( #23360 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-22 20:52:50 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23c939fd30 
					 
					
						
						
							
							[Model] Support DP for ViT on MiniCPM-V-4 ( #23327 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ycyaw66 <497410282@qq.com >
Co-authored-by: ycyaw66 <497410282@qq.com > 
						
						
					 
					
						2025-08-23 02:14:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						add1adfec7 
					 
					
						
						
							
							[BugFix] Fix MinPLogitsProcessor.update_states() ( #23401 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-23 08:22:11 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c80c53a30f 
					 
					
						
						
							
							[BugFix] Fix batch updates for pooling models ( #23398 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-23 08:20:41 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24d0c9e6ed 
					 
					
						
						
							
							[NVIDIA][torch.compile] Support Flashinfer TRTLLM FP8-q/kv NVFP4-out Attention Kernel ( #22703 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-08-22 22:09:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc7ae5e7ca 
					 
					
						
						
							
							[BugFix][AMD][Quantization] Fix torch.compile issue where wvSplitKQ not being called when it should when using quantized FP8 model ( #22281 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-08-22 21:47:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0313cf854d 
					 
					
						
						
							
							[PERF] PyTorch Symmetric Memory All-Reduce ( #20759 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ilmarkov <imarkov@redhat.com >
Signed-off-by: ilmarkov <markovilya197@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: ilmarkov <imarkov@redhat.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-22 15:39:08 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0483fabc74 
					 
					
						
						
							
							[CI/Build] add EP dependencies to docker ( #21976 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-08-22 13:34:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da65bec309 
					 
					
						
						
							
							add an env var for path to pre-downloaded flashinfer cubin files ( #22675 )  
						
						 
						
						
						
						
					 
					
						2025-08-22 19:25:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4645024d3a 
					 
					
						
						
							
							[Quantization] Allow GGUF quantization to skip unquantized layer ( #23188 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-22 13:04:22 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd7a3df26f 
					 
					
						
						
							
							[Bugfix] Fix broken Florence-2 model ( #23426 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-08-22 17:50:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32d2b4064f 
					 
					
						
						
							
							[Model] Add Ovis2.5 PP support ( #23405 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-22 17:46:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22cf679aad 
					 
					
						
						
							
							[Doc]: fix various typos in multiple files ( #23179 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Didier Durand <durand.didier@gmail.com > 
						
						
					 
					
						2025-08-22 10:38:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6d7d34fc6 
					 
					
						
						
							
							Add unit tests for batched guided and non-guided requests ( #23389 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-08-22 10:31:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						341923b982 
					 
					
						
						
							
							fix(tests): Ensure reliable CUDA cache clearing in MoE test ( #23416 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: AzizCode92 <azizbenothman76@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-22 17:20:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						424fb7a5d2 
					 
					
						
						
							
							[BugFix] Fix the issue where image embeddings were incorrectly split.… ( #23366 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: bppps <bpppsaka@gmail.com >
Co-authored-by: zouyu.zzx <zouyu.zzx@alibaba-inc.com >
Co-authored-by: bppps <bpppsaka@gmail.com > 
						
						
					 
					
						2025-08-22 16:56:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88491c1b6b 
					 
					
						
						
							
							[Speculators][Speculative Decoding] Fix Qwen 2 Eagle3 Support ( #23337 )  
						
						 
						
						
						
						
					 
					
						2025-08-22 16:39:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						613a23b57f 
					 
					
						
						
							
							[Bugfix]: Installing dev environment due to pydantic incompatible version ( #23353 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com > 
						
						
					 
					
						2025-08-22 16:22:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51a215300b 
					 
					
						
						
							
							[Fix] Bump triton version in rocm-build requirements ( #21630 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Burkhard Ringlein <ngl@zurich.ibm.com > 
						
						
					 
					
						2025-08-22 15:13:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebe14621e3 
					 
					
						
						
							
							[Bug fix] Dynamically setting the backend variable for genai_perf_tests in the run-nightly-benchmark script ( #23375 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Naman Lalit <nl2688@nyu.edu > 
						
						
					 
					
						2025-08-22 15:12:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						325aa3dee9 
					 
					
						
						
							
							[Misc] local import code clean ( #23420 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-22 14:01:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a073be6d87 
					 
					
						
						
							
							[Doc] Update the doc for log probs + prefix caching ( #23399 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-22 13:20:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						695e7adcd2 
					 
					
						
						
							
							[misc] Remove outdate comment about runai_model_streamer ( #23421 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: carlory <baofa.fan@daocloud.io > 
						
						
					 
					
						2025-08-22 13:08:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						281710ef9a 
					 
					
						
						
							
							[Attention] Allow V1 flash_attn to support cross-attention ( #23297 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-08-22 12:10:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						808d2e9aa0 
					 
					
						
						
							
							[Misc] Move M-RoPE init logic to _init_mrope_positions ( #23422 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-22 03:07:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						285178b3b8 
					 
					
						
						
							
							[V0 Deprecation] Remove V0 LoRA test ( #23418 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-22 09:56:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88016c372a 
					 
					
						
						
							
							[Bugfix] Fix pooling models on CPU backend ( #23392 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-22 09:47:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						998720859c 
					 
					
						
						
							
							Migrate MiniCPMOAudioInputs to TensorSchema ( #21847 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-22 16:43:29 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ba1b54ac6 
					 
					
						
						
							
							[gpt-oss] add input/output usage in responses api when harmony context is leveraged ( #22667 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-08-22 08:32:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53415653ff 
					 
					
						
						
							
							[P/D][Nixl] Make kv cache register compatible with hybrid memory allocator ( #23079 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com > 
						
						
					 
					
						2025-08-21 22:30:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						17373dcd93 
					 
					
						
						
							
							[Attention] Refactor AttentionMetadata Preparation for Encoder-only Models ( #23154 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-22 05:05:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5964069367 
					 
					
						
						
							
							[New Model] Add Seed-Oss model ( #23241 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiabin.00 <jiabin.00@bytedance.com >
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-22 04:58:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de9c085e17 
					 
					
						
						
							
							[Misc] Add gemma3 chat template with pythonic-style function calling ( #17149 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Philip Chung <philip.f.chung@gmail.com > 
						
						
					 
					
						2025-08-21 21:06:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						111692bb8c 
					 
					
						
						
							
							[CI] Add end-to-end V1 min_tokens test coverage ( #22495 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Arjun Reddy <189282188+arjunbreddy22@users.noreply.github.com >
Co-authored-by: Arjun Reddy <189282188+arjunbreddy22@users.noreply.github.com > 
						
						
					 
					
						2025-08-21 22:04:07 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						394591e343 
					 
					
						
						
							
							[Feature] Enable DeepGEMM Linear on B200; 1.5% E2E throughput improvement ( #23351 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-21 21:01:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ac849665d 
					 
					
						
						
							
							[CI/Build] Skip Idefics3 and SmolVLM generation test again ( #23356 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-22 03:39:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b9cc56fac 
					 
					
						
						
							
							Migrate MllamaImagePixelInputs to TensorSchema ( #22020 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-22 11:28:49 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8896eb72eb 
					 
					
						
						
							
							[Deprecation] Remove prompt_token_ids arg fallback in LLM.generate and LLM.embed ( #18800 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-22 10:56:57 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19fe1a0510 
					 
					
						
						
							
							[Kernel] Add FP8 support with FlashMLA backend ( #22668 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com > 
						
						
					 
					
						2025-08-22 02:26:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						480bdf5a7b 
					 
					
						
						
							
							[Core] Support custom executor qualname ( #23314 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-08-22 09:40:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5368f76855 
					 
					
						
						
							
							[Feature][Responses API] Support logprobs(non-stream) ( #23319 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-08-21 23:09:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8ef6b8a38c 
					 
					
						
						
							
							Always use cache mounts when installing vllm to avoid populating pip cache in the image. Also remove apt cache. ( #23270 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Valentyn Tymofieiev <valentyn@google.com > 
						
						
					 
					
						2025-08-21 18:01:03 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3bbe11cc13 
					 
					
						
						
							
							[Perf] Small optimizations for silu_mul_fp8_quant_deep_gemm ( #23265 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-21 17:56:15 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c5041f899f 
					 
					
						
						
							
							[CI] improve pr comments bot ( #23380 )  
						
						 
						
						
						
						
					 
					
						2025-08-21 14:49:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8b5fe6eb51 
					 
					
						
						
							
							[CI] Clean up actions: remove helm, publish workflows and improve pr … ( #23377 )  
						
						 
						
						
						
						
					 
					
						2025-08-21 14:29:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						800349c2a5 
					 
					
						
						
							
							[Structured Outputs] Refactor bitmask construction into get_grammar_bitmask ( #23361 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-21 20:53:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						044931f97b 
					 
					
						
						
							
							Make sure that vectorize_with_alignment produced vectorized global loads ( #23182 )  
						
						 
						
						
						
						
					 
					
						2025-08-21 20:06:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1d353b6352 
					 
					
						
						
							
							[Core] Always use tensor cores for Flashinfer Decode Wrapper ( #23214 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pavani Majety <pmajety@nvidia.com > 
						
						
					 
					
						2025-08-21 16:02:11 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3496274663 
					 
					
						
						
							
							[Misc] Convert VLLM_TORCH_PROFILER_DIR path to absolute ( #23191 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-21 15:49:09 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a19303173 
					 
					
						
						
							
							[BugFix][gpt-oss] Fix Chat Completion with Multiple Output Message ( #23318 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-21 10:31:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						603fbbbce0 
					 
					
						
						
							
							[Misc] Misc code cleanup/simplification ( #23304 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-21 17:22:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10f535c086 
					 
					
						
						
							
							[Bugfix] Fix port conflict by obtaining a list of open ports upfront ( #21894 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-08-21 10:22:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48bfb0c9b7 
					 
					
						
						
							
							[Bug] Fix R1 Accuracy 0 Bug ( #23294 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-21 13:11:28 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f8ce022948 
					 
					
						
						
							
							add tg-mxfp4-moe-test ( #22540 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: siyuanf <siyuanf@nvidia.com >
Signed-off-by: Siyuan Fu <siyuanf@nvidia.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-21 17:05:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0278f1ac3a 
					 
					
						
						
							
							Fix nvfp4 swizzling ( #23140 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yiliu30 <yi4.liu@intel.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-08-21 16:54:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a482e4e769 
					 
					
						
						
							
							Migrate MolmoImageInputs to TensorSchema ( #22022 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-21 16:54:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0b056e443 
					 
					
						
						
							
							[ci/build] Fix abi tag for aarch64 ( #23329 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-21 23:32:55 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						79f05e4436 
					 
					
						
						
							
							[Multimodal] Always enable hashing mm data ( #23308 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.io >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-21 07:23:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f8daddcc4c 
					 
					
						
						
							
							[Bugfix] set system_message in phi4mini chat template ( #23309 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zhuangqh <zhuangqhc@gmail.com > 
						
						
					 
					
						2025-08-21 14:22:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8e33c72c6 
					 
					
						
						
							
							[V1] Remove unnecessary check for main thread ( #23298 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Robert Shaw <robshaw@redhat.com >
Co-authored-by: Robert Shaw <robshaw@redhat.com > 
						
						
					 
					
						2025-08-21 14:08:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d70a16625d 
					 
					
						
						
							
							[Performance] V1 Pooling Models E2E Performance Optimization ( #23162 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-21 13:26:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5cc54f7c5b 
					 
					
						
						
							
							[Doc] Fix batch-level DP example ( #23325 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-21 06:16:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c6e40bbaa 
					 
					
						
						
							
							[Refactor] Simplify code for MM budget ( #23310 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-21 08:00:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e2000f352 
					 
					
						
						
							
							[Model] Add LFM2 architecture ( #22845 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Paul Pak <paulpak58@gmail.com > 
						
						
					 
					
						2025-08-21 09:35:07 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31282401b6 
					 
					
						
						
							
							[BugFix] Fix Python 3.9 Support ( #23306 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jared O'Connell <46976761+jaredoconnell@users.noreply.github.com >
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-20 23:23:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c31e28e95 
					 
					
						
						
							
							[Bugfix] Fix extra whitespace in strings caused by newline ( #23272 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-20 22:03:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f571ff8eb6 
					 
					
						
						
							
							[Sampler] Support returning final logprobs ( #22387 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Co-authored-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-20 21:28:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f64ee61d9e 
					 
					
						
						
							
							[CI] Block the cu126 wheel build while broken ( #23285 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-21 04:21:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8993073dc1 
					 
					
						
						
							
							[CI] Delete images older than 24h. ( #23291 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-08-20 21:15:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						655a09f653 
					 
					
						
						
							
							[Model][VLM] Support R-4B Model ( #23246 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yannqi <yannqi@qq.com >
Signed-off-by: 杨奇(yann qi) <51905299+yannqi@users.noreply.github.com >
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: yannqiyang <yannqiyang@tencent.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-21 04:08:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f94bf9b924 
					 
					
						
						
							
							[Compile] Fix Compile Warning SM100 Cutlass MLA ( #23287 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-21 03:09:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3663870c72 
					 
					
						
						
							
							[V1][Mamba1] - Full CUDA and Piecewise CUDA Graphs Support ( #23035 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: asafg <asafg@ai21.com >
Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com >
Co-authored-by: asafg <asafg@ai21.com > 
						
						
					 
					
						2025-08-20 20:08:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2461d9e562 
					 
					
						
						
							
							[CI/Build] Split out mm processor tests ( #23260 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-20 20:05:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7be5d113d8 
					 
					
						
						
							
							[CPU] Refactor CPU W8A8 scaled_mm ( #23071 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-21 09:34:24 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b029de9902 
					 
					
						
						
							
							[Optimization] Make new_block_ids None if empty ( #23262 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-08-20 18:25:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bbea1cefdd 
					 
					
						
						
							
							[CI Bugfix] Fix CI by fully removing --enable-prompt-adapter ( #23284 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-20 17:18:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f5aa307d77 
					 
					
						
						
							
							Remove duplicate entry in vllm.attention.__all__ ( #23296 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-08-20 17:14:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b795020ed 
					 
					
						
						
							
							[EP] Add logging for experts map ( #22685 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-08-20 23:46:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c86af22f31 
					 
					
						
						
							
							[Fix] remove is_marlin param in benchmark_moe ( #23286 )  
						
						 
						
						
						
						
					 
					
						2025-08-20 22:04:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10cc12ba66 
					 
					
						
						
							
							Feature/mla tests ( #23195 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com >
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com > 
						
						
					 
					
						2025-08-20 21:46:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a4fbb32fab 
					 
					
						
						
							
							Remove chunked_prefill_enabled flag in V1 MLA ( #23183 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com > 
						
						
					 
					
						2025-08-20 21:43:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b125004be 
					 
					
						
						
							
							[misc] fix multiple arch wheels for the nightly index ( #23110 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-20 14:15:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4fbda0b20c 
					 
					
						
						
							
							[Feature] use --eplb_config to set eplb param ( #20562 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Signed-off-by: rongfu.leng <lenronfu@gmail.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-20 14:07:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e51fa8cba 
					 
					
						
						
							
							Do not use eval() to convert unknown types ( #23266 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-08-20 13:28:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf7c99dfc4 
					 
					
						
						
							
							[Perf] Speed up function _convert_tokens_to_string_with_added_encoders by 13.7x ( #20413 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Saurabh Misra <misra.saurabh1@gmail.com >
Signed-off-by: Aseem Saxena <aseem.bits@gmail.com >
Co-authored-by: codeflash-ai[bot] <148906541+codeflash-ai[bot]@users.noreply.github.com>
Co-authored-by: Aseem Saxena <aseem.bits@gmail.com > 
						
						
					 
					
						2025-08-20 13:17:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b95697d731 
					 
					
						
						
							
							[Frontend] improve error logging of chat completion ( #22957 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-20 13:03:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						582bbe6bd7 
					 
					
						
						
							
							[Fix] correct tool_id for kimi-k2 when use tool_choice=required ( #21259 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: wangzhengtao <wangzhengtao@msh.team > 
						
						
					 
					
						2025-08-20 12:59:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0cdbf5e61c 
					 
					
						
						
							
							[Kernel/Quant] Remove the original marlin format and qqq ( #23204 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-20 15:13:36 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebe56a0064 
					 
					
						
						
							
							Small fix for Command-A-Vision ( #23268 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: donglu <donglu@cohere.com > 
						
						
					 
					
						2025-08-20 18:15:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f77a0802b7 
					 
					
						
						
							
							Limit HTTP header count and size ( #23267 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Taneem Ibrahim <taneem.ibrahim@gmail.com >
Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Taneem Ibrahim <taneem.ibrahim@gmail.com > 
						
						
					 
					
						2025-08-20 17:57:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c4477f55e5 
					 
					
						
						
							
							Migrate Mistral3ImagePixelInputs to TensorSchema ( #21945 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-20 17:37:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dfd2382039 
					 
					
						
						
							
							[torch.compile] Support conditional torch.compile per module ( #22269 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-08-20 16:52:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3b11b26b50 
					 
					
						
						
							
							[FIXBUG ] Allow disabling rocm_aiter_fa backend for ROCm GPUs not compatible with AITER ( #22795 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: JartX <sagformas@epdcenter.es >
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-08-20 09:08:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6d13bd49e 
					 
					
						
						
							
							[Misc] Add max_seq_len to CommonAttentionMetadata  ( #23216 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-20 09:05:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5efd6905bc 
					 
					
						
						
							
							[CLI][Doc] Formalize --mm-encoder-tp-mode ( #23190 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-20 23:42:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b17109beea 
					 
					
						
						
							
							[Kernel] CUTLASS MoE FP8: Integrate cuda moe permute/unpermute ( #23045 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shixian Cui <shixian@amazon.com > 
						
						
					 
					
						2025-08-20 10:35:26 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4449235843 
					 
					
						
						
							
							[Bugfix] Ensure correctness of HCXVision processing ( #23254 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-20 14:19:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						38217877aa 
					 
					
						
						
							
							[Fix] fix offline env use local mode path ( #22526 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-08-20 13:34:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6d80a7a96 
					 
					
						
						
							
							[Model] Improve olmo and olmo2 ( #23228 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-20 12:47:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7cd17e22d7 
					 
					
						
						
							
							[Model][V1] Support Ernie MTP ( #22169 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zhouchong <zhouchong03@baidu.com >
Co-authored-by: zhouchong <zhouchong03@baidu.com > 
						
						
					 
					
						2025-08-20 20:41:55 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						50df09fe13 
					 
					
						
						
							
							Update to flashinfer-python==0.2.12 and disable AOT compile for non-release image ( #23129 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-20 08:05:54 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68fcd3fa73 
					 
					
						
						
							
							[Bugfix] Ensure correctness of Cohere2Vision processing ( #23245 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-20 11:09:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83e69a09d6 
					 
					
						
						
							
							[Model] Support deepseek with eagle ( #21086 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xin Yang <xyangx@amazon.com > 
						
						
					 
					
						2025-08-20 19:01:31 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3aa8c10038 
					 
					
						
						
							
							Fix missing quotes ( #23242 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shiming Zhang <wzshiming@hotmail.com > 
						
						
					 
					
						2025-08-20 10:46:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						103f1ec8d3 
					 
					
						
						
							
							[Model] use autoWeightsLoader for gptoss ( #22446 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <wen.chen@dynamia.ai > 
						
						
					 
					
						2025-08-20 10:16:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d983769c41 
					 
					
						
						
							
							fix cuda graph ( #22721 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: fsx950223 <fsx950223@outlook.com > 
						
						
					 
					
						2025-08-20 06:24:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8fd920924c 
					 
					
						
						
							
							[BugFix] Fix stuck stats/metrics after requests are aborted ( #22995 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-20 13:50:29 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de7b67a023 
					 
					
						
						
							
							[CI/Build] Sync multimodal tests ( #23181 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-20 05:06:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f729023272 
					 
					
						
						
							
							[CI/Build] Also check DP in benchmarks throughput script ( #23038 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-08-20 04:09:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1a3079a15e 
					 
					
						
						
							
							chore: support pytorch format in lora  ( #22790 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jaeeun.kil <rha3122@naver.com >
Signed-off-by: 길재은 <rha3122@naver.com > 
						
						
					 
					
						2025-08-20 04:02:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						941f56858a 
					 
					
						
						
							
							Fix a performance comparison issue in Benchmark Suite ( #23047 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tsai, Louie <louie.tsai@intel.com >
Signed-off-by: Louie Tsai <louie.tsai@intel.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com > 
						
						
					 
					
						2025-08-20 03:14:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a634733f67 
					 
					
						
						
							
							[Attention] Optimize make_local_attention_virtual_batches for Flash Attention ( #23185 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: linzebing <linzebing1995@gmail.com > 
						
						
					 
					
						2025-08-20 02:57:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						64ab3c7253 
					 
					
						
						
							
							[Doc] Update V1 status of various pooling models ( #23189 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-20 10:33:41 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e58c5a9768 
					 
					
						
						
							
							[Core] Add torch profiler CPU traces for AsyncLLM. ( #21794 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-08-20 02:32:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d46d417b58 
					 
					
						
						
							
							[CI Perf] Only test bfloat16 for tests/compile/test_fusion_all_reduce.py ( #23132 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-19 20:18:52 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0167efe20d 
					 
					
						
						
							
							[Core] Optimize scheduler request removal for single completions ( #21917 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chiliu <chiliu@paypal.com >
Signed-off-by: chiliu <cliu_whu@yeah.net >
Co-authored-by: chiliu <chiliu@paypal.com > 
						
						
					 
					
						2025-08-19 18:25:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c32e6ad1f6 
					 
					
						
						
							
							[Quantization] Bump Compressed Tensors Version ( #23202 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyle Sayers <kylesayrs@gmail.com >
Co-authored-by: Dipika Sikka <dipikasikka1@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-20 00:39:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1630cc8d0f 
					 
					
						
						
							
							[Benchmarks] Add video inputs to ShareGPTDataset.  ( #23199 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-08-19 23:42:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14e2b0730b 
					 
					
						
						
							
							[BugFix] fix CUTLASS MLA full cudagraph  ( #23200 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-08-19 22:17:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f4f0191d8 
					 
					
						
						
							
							[CI/Build] Replace lm-eval gsm8k tests with faster implementation ( #23002 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-19 15:07:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a38b8af4c3 
					 
					
						
						
							
							[NVIDIA] Add SM100 Flashinfer Cutlass MoE fp8 backend ( #22357 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com > 
						
						
					 
					
						2025-08-19 18:01:53 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						21dce80ea9 
					 
					
						
						
							
							[CI/Build] Add support for Python 3.13 ( #13164 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <michael@neuralmagic.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-19 13:49:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e61bac87ee 
					 
					
						
						
							
							[Misc] Minor refactoring for FlashInfer backend ( #23147 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-19 13:11:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						80141bbf2f 
					 
					
						
						
							
							fix: use cache_salt for gpt-oss ( #23186 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com > 
						
						
					 
					
						2025-08-19 18:12:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b94faf9d50 
					 
					
						
						
							
							[Bugfix] Fix accuracy issue when using flashinfer cutlass moe, TP=1 and modelopt. ( #23125 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-19 14:00:51 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b5f350d67 
					 
					
						
						
							
							[Misc] Enable yapf for FlashInfer backend ( #23193 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-19 10:33:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f7cf5b512e 
					 
					
						
						
							
							[Frontend] Add /collective_rpc API endpoint ( #23075 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-08-19 17:29:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						03d4235fd2 
					 
					
						
						
							
							[Misc] Fix the benchmark's README and improve the error messages for the benchmark's argument checks ( #22654 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tanruixiang <tanruixiang0104@gmail.com > 
						
						
					 
					
						2025-08-19 10:18:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6a1a20973 
					 
					
						
						
							
							[CI/Build] Update transformers to v4.55.2 ( #23093 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-19 10:06:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a70d0bd0a3 
					 
					
						
						
							
							Migrate LlavaOnevisionMultiInputs to TensorSchema ( #21844 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-19 17:02:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24f4d1a224 
					 
					
						
						
							
							Add return_token_ids parameter to OpenAI API endpoints ( #22587 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yuge Zhang <scottyugochang@gmail.com >
Co-authored-by: Claude <noreply@anthropic.com >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-08-19 09:48:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f510bc2a1 
					 
					
						
						
							
							[Model] Removes redundant all-reduce operation in Qwen3MoeSparseMoeBlock ( #23169 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com > 
						
						
					 
					
						2025-08-19 16:18:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1298c67795 
					 
					
						
						
							
							[FEAT] [Performance] Enable DP for ViT in Qwen2.5VL ( #22742 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-19 15:25:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d9c61993a 
					 
					
						
						
							
							[Bugfix] Fix benchmark_moe.py  ( #23177 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-19 13:39:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b87cb97a53 
					 
					
						
						
							
							[Model] support new model ovis2.5 ( #23084 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: myselvess <244285088@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-19 13:12:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f856c33ce9 
					 
					
						
						
							
							[Model] Add multi_label_classification support ( #23173 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-19 12:54:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						03752dba8f 
					 
					
						
						
							
							[NVIDIA] Support Flashinfer TRTLLM FP8-q/kv/out Attention Kernel ( #21716 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-08-19 08:22:15 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						40f26734b9 
					 
					
						
						
							
							[Misc] Fix seq_lens for graph capture ( #23175 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-19 03:58:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c3f557f08 
					 
					
						
						
							
							[Doc] use power of 2 ( #23172 )  
						
						 
						
						
						
						
					 
					
						2025-08-19 03:16:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						21bcc8263f 
					 
					
						
						
							
							[Misc] Avoid accessing req_ids inside a loop ( #23159 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-19 09:39:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5bfe0dea7a 
					 
					
						
						
							
							[bug fix] Fix llama4 spec decoding ( #22691 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qizixi <qizixi@meta.com >
Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com > 
						
						
					 
					
						2025-08-19 08:53:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31fd3265c8 
					 
					
						
						
							
							[Bugfix] Fix broken Minimax-01-VL model ( #22116 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-19 08:49:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31436e8b4f 
					 
					
						
						
							
							[Misc] Add request_id into benchmark_serve.py ( #23065 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yangxia <yangxiast@gmail.com > 
						
						
					 
					
						2025-08-19 08:32:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4efd43e9b4 
					 
					
						
						
							
							Fix GLM-4.5V-FP8 numerical issue ( #22949 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qizixi <qizixi@meta.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-19 07:56:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c8a787247 
					 
					
						
						
							
							[Benchmark] Add flag --served-model-name to benchmark_serving_multi_turn ( #22889 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: daniels <daniels@pliops.com > 
						
						
					 
					
						2025-08-19 07:48:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01a08739e0 
					 
					
						
						
							
							[misc] split engine_model into json file for nsys profile tool ( #23117 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Grace Ho <grho@nvidia.com >
Signed-off-by: Grace Ho <146482179+gracehonv@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-19 15:44:53 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fda9537c5e 
					 
					
						
						
							
							[Model] Support Pipeline Parallelism for moonshotai/Kimi-VL-A3B-Thinking-2506 ( #23114 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-19 14:24:31 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90bbe0a5ad 
					 
					
						
						
							
							[Log] Warning Once for Cutlass MLA  ( #23137 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-18 23:24:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e75f342261 
					 
					
						
						
							
							Migrate InternVLImagePixelInputs (in nemotron_vl.py) to TensorSchema ( #22023 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-19 13:48:26 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78dba404ad 
					 
					
						
						
							
							[Hardware][IBM Z]Enable v1 for s390x and s390x dockerfile fixes ( #22725 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nikhil Suryawanshi <suryawanshin74@gmail.com > 
						
						
					 
					
						2025-08-19 04:40:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e9d6a3db69 
					 
					
						
						
							
							[TPU] make ptxla not imported when using tpu_commons ( #23081 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@gmail.com >
Signed-off-by: Chengji Yao <chengjiyao@google.com >
Co-authored-by: Chengji Yao <chengjiyao@gmail.com > 
						
						
					 
					
						2025-08-19 11:46:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a4454e9401 
					 
					
						
						
							
							chore: disable enable_cpp_symbolic_shape_guards ( #23048 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiao Liu <xiszishu@gmail.com > 
						
						
					 
					
						2025-08-18 23:08:05 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14006840ea 
					 
					
						
						
							
							[V0 Deprecation] Remove V0 FlashInfer attention backend ( #22776 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-18 19:54:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6603288736 
					 
					
						
						
							
							[CI][V0 Deprecation] Removed V0 Only Chunked Prefill and Prefix Caching Tests ( #22871 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Robert Shaw <robshaw@redhat.com >
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: Robert Shaw <robshaw@redhat.com >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-18 17:39:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						95e3095136 
					 
					
						
						
							
							[Misc] Add @tdoublep as a maintainer of hybrid model and Triton-attention related code ( #23122 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-19 08:31:38 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9b38be8aa 
					 
					
						
						
							
							[Spec Decode] Make propose_draft_token_ids non-blocking for lower TTFT ( #23041 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-18 17:20:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0dd3f4f5ab 
					 
					
						
						
							
							[Misc] Minor refactoring for prepare_inputs ( #23116 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-18 16:58:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						498259ccce 
					 
					
						
						
							
							Install tpu_info==0.4.0 to fix core dump for TPU ( #23135 )  
						
						 
						
						
						
						
					 
					
						2025-08-18 16:23:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d25e3fd6e 
					 
					
						
						
							
							Use Blackwell FlashInfer MXFP4 MoE by default if available  ( #23008 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-18 15:25:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ac6eb49de3 
					 
					
						
						
							
							fix: OpenAI SDK compat (ResponseTextConfig) ( #23126 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: breno.skuk <breno.skuk@hcompany.ai >
Signed-off-by: Breno Baldas Skuk <breno.skuk@hcompany.ai >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-18 15:22:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf756321c7 
					 
					
						
						
							
							[CI Bugfix] Pin openai<1.100 to unblock CI ( #23118 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-18 12:14:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e3bb543f0 
					 
					
						
						
							
							[Bugfix] Support compile for Transformers multimodal ( #23095 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: raushan <raushan@huggingface.co > 
						
						
					 
					
						2025-08-18 13:35:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						569aefd134 
					 
					
						
						
							
							chore: remove unnecessary patch_padding_side for the chatglm model ( #23090 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: carlory <baofa.fan@daocloud.io >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-18 12:32:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3f71f1224 
					 
					
						
						
							
							[Refactor] Get prompt updates earlier ( #23097 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-18 12:31:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a30bd10d8 
					 
					
						
						
							
							[Bugfix] fix IntermediateTensors equal method ( #23027 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-18 02:58:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27e8d1ea3e 
					 
					
						
						
							
							[Refactor] Define MultiModalKwargsItems separate from MultiModalKwargs ( #23053 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-18 09:52:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c79b0d648 
					 
					
						
						
							
							[XPU][CI]add xpu env vars in CI scripts ( #22946 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-08-18 09:47:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f5664b3e4 
					 
					
						
						
							
							[XPU] Fix compile size for xpu ( #23069 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-08-18 00:04:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						89657a557c 
					 
					
						
						
							
							[Misc] Fix backward compatibility from  #23030  ( #23070 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-17 23:33:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08d5f7113a 
					 
					
						
						
							
							[Misc] refactor function name ( #23029 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-17 22:16:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2fd0b81e0 
					 
					
						
						
							
							[Bugfix][CI] Machete kernels: deterministic ordering for more cache hits ( #23055 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Lo <andy@mistral.ai > 
						
						
					 
					
						2025-08-17 22:10:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9f1c642254 
					 
					
						
						
							
							[Bugfix] fix Qwen2.5-Omni processor output mapping ( #23058 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: double7 <33449816+DoubleVII@users.noreply.github.com >
Co-authored-by: 杨森 <yangsen.double7@bytedance.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-17 22:09:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7be3a59d8e 
					 
					
						
						
							
							[Misc] enhance static type hint ( #23059 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-17 22:09:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8ea0c2753a 
					 
					
						
						
							
							[Misc] Minor code cleanup for _get_prompt_logprobs_dict ( #23064 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-17 18:16:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0fc8fa751a 
					 
					
						
						
							
							fix: gptq marlin weight loading failure ( #23066 )  
						
						 
						
						
						
						
					 
					
						2025-08-17 15:56:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						21e39436c8 
					 
					
						
						
							
							[XPU] fix xpu to set cudagraph batch sizes ( #23044 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <wen.chen@dynamia.ai > 
						
						
					 
					
						2025-08-17 21:45:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d243efeda 
					 
					
						
						
							
							[Misc] Convert use_structured_output property into constant ( #23060 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-17 12:41:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c55bc1db26 
					 
					
						
						
							
							[Misc] Remove dead return ( #23061 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-17 10:36:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						292084e72a 
					 
					
						
						
							
							[BugFix] Fix for IMA in FA3 varlen combine ( #22967 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-08-17 08:52:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						16bff144be 
					 
					
						
						
							
							[Misc] fix typo in the multimodal doc ( #23051 )  
						
						 
						
						
						
						
					 
					
						2025-08-17 01:56:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe0411fc6f 
					 
					
						
						
							
							[Bugfix] should use stack instead of concat ( #22972 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 947132885 <947132885@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-17 08:46:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d4061b6e7 
					 
					
						
						
							
							[Kernel] Add cuda kernel for gpt_oss activation ( #22951 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-17 05:03:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87f48623a5 
					 
					
						
						
							
							[Misc] method name typo fix ( #23042 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-16 21:49:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c32143b9d 
					 
					
						
						
							
							[Refactor] Defer tensor data construction in MultiModalKwargs ( #23030 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-16 21:05:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94096a47c9 
					 
					
						
						
							
							[UX] Separate marlin moe config logic from triton moe ( #23006 )  
						
						 
						
						
						
						
					 
					
						2025-08-16 22:16:42 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a258ad8bcc 
					 
					
						
						
							
							[Bugfix] fix qwen3 moe fp8 accuracy issue ( #23031 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com > 
						
						
					 
					
						2025-08-16 17:41:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf7f470b22 
					 
					
						
						
							
							[V1] Logits processors extensibility ( #19912 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Feldman <afeldman@redhat.com >
Signed-off-by: Andrew Feldman <afeld2012@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Andrew Feldman <afeld2012@gmail.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-16 12:59:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4fc722eca4 
					 
					
						
						
							
							[Kernel/Quant] Remove AQLM ( #22943 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-08-16 19:38:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3253ae765e 
					 
					
						
						
							
							[Flaky CI] Increase timeout tolerance for test_mp_crash_detection+test_default_mm_lora_chat_completions ( #23028 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-16 18:33:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						000cceca8c 
					 
					
						
						
							
							[Bugfix gpt-oss] Fix float32 convert for flashinfer sink support ( #23016 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-16 11:16:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68373d3126 
					 
					
						
						
							
							[Frontend] Added support for HermesToolParser for models without special tokens ( #16890 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: minpeter <kali2005611@gmail.com > 
						
						
					 
					
						2025-08-16 17:38:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						52ce1420e9 
					 
					
						
						
							
							Fix handling of max_num_batched_tokens for pooling tasks ( #23004 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-08-16 17:36:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						829bbd7882 
					 
					
						
						
							
							[New Model]mBART model ( #22883 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-08-16 12:16:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4dff91c93d 
					 
					
						
						
							
							[Refactor] Allow optional MultiModalKwargsItem in IPC ( #23022 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-16 11:30:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de9cb61763 
					 
					
						
						
							
							Add docs for PrefixRepetitionDataset + enable usage with vllm bench throughput ( #23012 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-16 10:21:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2dbccce8a6 
					 
					
						
						
							
							[CI][Bugfix] Skip Ovis2 generation test because of broken remote code ( #22954 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-16 09:44:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						933f45334a 
					 
					
						
						
							
							[Core] Make cudagraph check cuda platform only ( #23005 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@gmail.com >
Signed-off-by: Chengji Yao <chengjiyao@google.com >
Co-authored-by: Chengji Yao <chengjiyao@gmail.com >
Co-authored-by: Li, Jiang <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-16 07:46:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc826a202b 
					 
					
						
						
							
							[Multimodal] Update Tensor schema test to cover arbitrary shape mm inputs ( #22867 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-16 00:44:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d3da472bc 
					 
					
						
						
							
							[Misc] Add --save-dir option to benchmark_moe ( #23020 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-16 07:26:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78863f8c5c 
					 
					
						
						
							
							[BugFix] Add support for loading prompt embeds tensors serialized on unavailable devices and sparse tensors ( #22962 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Sansom <andrew@protopia.ai > 
						
						
					 
					
						2025-08-16 06:25:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5157827cfc 
					 
					
						
						
							
							[Build] Env var to disable sccache ( #22968 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-08-16 05:36:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7caec10e7b 
					 
					
						
						
							
							[XPU]avoid circular import during XPU init ( #23017 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-08-16 05:16:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f83e7d849 
					 
					
						
						
							
							[misc] nsys profile output kernel classifier and visualizer ( #22971 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Grace Ho <grho@nvidia.com > 
						
						
					 
					
						2025-08-16 02:52:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e4e37ded56 
					 
					
						
						
							
							[V1] support min_tokens for detokener ( #22014 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <wen.chen@dynamia.ai >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-16 02:28:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f6b5040590 
					 
					
						
						
							
							[Frontend] Avoid list copies in serving_chat.py ( #22947 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-16 02:06:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fbd88728b3 
					 
					
						
						
							
							[Bugfix] Fix DeepSeek MTP ( #22934 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai > 
						
						
					 
					
						2025-08-16 01:25:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						070da660c1 
					 
					
						
						
							
							[Kernel] Simplify get_kv_cache_layout and cache use_trtllm_attention env-dependent bit ( #22735 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-16 00:14:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad0297d113 
					 
					
						
						
							
							[Misc] Support passing multiple request ids at once to AsyncLLM.abort() ( #22944 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-15 17:00:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						236b864e4f 
					 
					
						
						
							
							[BugFix] Make run_once thread-safe ( #22978 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: <wenji.yyc@alibaba-inc.com >
Signed-off-by: Yichen Yan <wenji.yyc@alibaba-inc.com > 
						
						
					 
					
						2025-08-15 16:56:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e2f7985a2 
					 
					
						
						
							
							Support multiple attention groups for KV sharing ( #22672 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-08-15 16:54:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c280066f9d 
					 
					
						
						
							
							[v1] Move block_hashes from KVCacheManager to Request.block_hashes ( #19728 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Or Ozeri <oro@il.ibm.com > 
						
						
					 
					
						2025-08-15 16:52:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9dc9d2607 
					 
					
						
						
							
							[BugFix] Handle case where async utility call is cancelled ( #22996 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Yinghai Lu <yinghai@thinkingmachines.ai > 
						
						
					 
					
						2025-08-15 17:38:42 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1fc375dc05 
					 
					
						
						
							
							[Structured Outputs] [Bug] Fix misalignment in apply_grammar_bitmask causing unintended masking and NaN logits ( #22963 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rishitdholakia13 <rishit+github@cohere.com > 
						
						
					 
					
						2025-08-15 23:25:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						76144adf76 
					 
					
						
						
							
							ci: Add CUDA + arm64 release builds ( #21201 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eli Uriegas <eliuriegas@meta.com > 
						
						
					 
					
						2025-08-15 23:16:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f5d412bafb 
					 
					
						
						
							
							[BugFix] Fix regression caused by mamba state dtype PR ( #22998 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-15 22:55:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						177e55e3bd 
					 
					
						
						
							
							[Attention] FA3 Attention Sinks Perf Boost ( #22478 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-08-15 17:41:07 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1723ef1aae 
					 
					
						
						
							
							minor: zero workspace buffer init for flashinfer trtllm-gen attn ( #22603 )  
						
						 
						
						
						
						
					 
					
						2025-08-15 21:38:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						00d6cba0cf 
					 
					
						
						
							
							Add PrefixRepetitionRandomDataset to vllm bench serve datasets ( #20638 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-08-15 14:09:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f89ed248f 
					 
					
						
						
							
							[Fix] enable swap_ab for pplx problem size computation ( #22991 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shixian Cui <shixian@amazon.com >
Co-authored-by: Shixian Cui <shixian@amazon.com > 
						
						
					 
					
						2025-08-15 14:02:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a87cd27d9 
					 
					
						
						
							
							[CI] Speed up Whisper tests by reusing server ( #22859 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-15 16:56:31 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a344a1a7da 
					 
					
						
						
							
							Use regex in convert-results-json-to-markdown.py ( #22989 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-15 20:54:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						79899b63f6 
					 
					
						
						
							
							[Bugfix] Added more env vars to hash ( #22449 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Julien Lin <jullin@nvidia.com > 
						
						
					 
					
						2025-08-15 20:08:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e670778cd 
					 
					
						
						
							
							[Core] direct indexing on self.block_table_np in compute_slot_mapping ( #22940 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: linzebing <linzebing1995@gmail.com > 
						
						
					 
					
						2025-08-15 12:12:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						df5afa82e5 
					 
					
						
						
							
							[Log] Debug Once for Randomizing dummy data for DP Rank ( #22860 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-15 11:51:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6cd69f51bf 
					 
					
						
						
							
							[Model] Granite-4 support loading quantized checkpoint ( #22925 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com > 
						
						
					 
					
						2025-08-15 18:47:56 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8ad7285ea2 
					 
					
						
						
							
							[Kernels] Clean up FusedMoeMethodBase and modular kernel setup.  Remove extra arguments from modular kernel methods. ( #22035 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-15 14:46:00 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48b01fd4d4 
					 
					
						
						
							
							[Structured Output] Make the output of structured output example more complete ( #22481 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shen-shanshan <467638484@qq.com > 
						
						
					 
					
						2025-08-15 18:29:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						993d3d122b 
					 
					
						
						
							
							[Benchmarks] Include image data when ShareGPT4V dataset is used. ( #22955 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-08-15 18:23:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68af77e51c 
					 
					
						
						
							
							[FIXBUG] Correctly Apply Grammar Bitmask in Mixed Batches ( #22896 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: JartX <sagformas@epdcenter.es > 
						
						
					 
					
						2025-08-15 17:42:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b04039a72 
					 
					
						
						
							
							[BugFix] Skip the Q component for QKVParallelLinear in the case of QKVCrossParallelLinear since its width is 0 ( #22369 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sstamenk <sstamenk@amd.com > 
						
						
					 
					
						2025-08-15 17:17:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1c859a1387 
					 
					
						
						
							
							[V0 Deprecation] Remove advance_step ( #22969 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-15 08:22:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						74f441f4b5 
					 
					
						
						
							
							[Core] Allow full cudagraph with separate attention routines and orthogonal to compilation, add support for FA2 and FlashInfer ( #20059 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: fhl <2410591650@qq.com >
Signed-off-by: fhl2000 <63384265+fhl2000@users.noreply.github.com >
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com >
Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com >
Co-authored-by: Lucas Wilkinson <lwilkins@redhat.com >
Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com > 
						
						
					 
					
						2025-08-15 10:01:39 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0632a3e03 
					 
					
						
						
							
							[Frontend] Expose do_log_stats interval to env ( #22905 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Csrayz <jover@cmbchina.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-15 13:00:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e8b40c7fa2 
					 
					
						
						
							
							[CI] Remove duplicated docs build from buildkite ( #22924 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-15 05:58:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48f4636927 
					 
					
						
						
							
							[Misc] Ignore ep_kernels_workspace ( #22807 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-15 05:58:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						75531a6c13 
					 
					
						
						
							
							[V1] [Hybrid] Support using float32 for state in Hybrid Models (Mamba2, Mamba1, Minimax) ( #22928 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniel Afrimi <danielafrimi8@gmail.com >
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Daniel Afrimi <danielafrimi8@gmail.com >
Co-authored-by: Burkhard Ringlein <ngl@zurich.ibm.com >
Co-authored-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-15 12:57:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22341b996e 
					 
					
						
						
							
							Improve multimodal hasher performance for re-used Image prompts ( #22825 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Staszek Pasko <staszek@gmail.com > 
						
						
					 
					
						2025-08-15 12:32:56 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						49252cf59e 
					 
					
						
						
							
							[MM] Allow skipping memory profiling for multimodal models. ( #22950 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-15 11:41:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e6dd40016 
					 
					
						
						
							
							[Bugfix] fix cuda 12.6 and 11.8 build ( #22952 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com > 
						
						
					 
					
						2025-08-15 10:10:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa300c438d 
					 
					
						
						
							
							[Bugfix] Unquote file uri before reading image ( #22912 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sayandip Dutta <sayandip199309@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-15 09:28:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe91ce9591 
					 
					
						
						
							
							[V1] - Split Prefill and Decode for Mamba1 models ( #22653 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: amirk <amirk@ai21.com >
Signed-off-by: asafg <asafg@ai21.com >
Co-authored-by: asafg <asafg@ai21.com >
Co-authored-by: Asaf Joseph Gardin <39553475+Josephasafg@users.noreply.github.com > 
						
						
					 
					
						2025-08-15 08:59:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5406ebf5c9 
					 
					
						
						
							
							[CI] Pooling models mteb test uses enforce_eager ( #22878 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-15 01:16:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2c06509e5 
					 
					
						
						
							
							[P/D]Provide bucket algorithm rate limiter  for proxy_server ( #22643 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: frankie-ys <yongshengwang@cmbchina.com >
Signed-off-by: frankie <wangyongsheng686@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Kuntai Du <kuntai@uchicago.edu > 
						
						
					 
					
						2025-08-15 07:01:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2f6c247a9 
					 
					
						
						
							
							Revert "[ROCm][AITER] Support AITER Rope ops in RotaryEmbedding Module." ( #22956 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-08-15 06:39:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d232dbd19 
					 
					
						
						
							
							[Mamba] - refactor: Renamed mamba_attn to mamba2_attn ( #22818 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: asafg <asafg@ai21.com >
Co-authored-by: asafg <asafg@ai21.com > 
						
						
					 
					
						2025-08-15 06:38:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c3fbfe46b 
					 
					
						
						
							
							[Feature] Full Cuda Graph Support for Cutlass MLA and 6% E2E Throughput Improvement ( #22763 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-15 06:27:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4cef5e6c7 
					 
					
						
						
							
							refactor: Change scaling factors calculation for flashinfer FusedMoE ( #22812 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-15 06:19:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0fe85087a9 
					 
					
						
						
							
							[CI Perf] Prune tests in tests/kernels/attention/ ( #22936 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-14 21:34:53 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d2b0e97ea6 
					 
					
						
						
							
							[CI Perf] Prune tests in tests/kernels/moe/ ( #22939 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-14 21:33:42 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						590bddbfc5 
					 
					
						
						
							
							[CI Perf] Prune tests in tests/kernels/quantization/ ( #22942 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-14 21:25:34 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ae05a6d83d 
					 
					
						
						
							
							[BugFix] Fix port lookup in internal DP LB tests ( #22252 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-15 11:17:11 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0933f9d518 
					 
					
						
						
							
							[BugFix][KVConn] Fix use of get_required_kvcache_layout ( #22734 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-15 01:39:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f1f0d2fab8 
					 
					
						
						
							
							Revert "[Kernel]  Add cuda kernel for gpt_oss activation" ( #22948 )  
						
						 
						
						
						
						
					 
					
						2025-08-14 17:38:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						81f4b96481 
					 
					
						
						
							
							[Kernel]  Add cuda kernel for gpt_oss activation ( #22538 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-14 17:21:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						39cd09dc86 
					 
					
						
						
							
							[Bugfix] use flash attn on sm90 ( #22933 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yongye Zhu <zyy1102000@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-14 16:37:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						919234fe17 
					 
					
						
						
							
							[BugFix] Fix initial DP request load imbalance ( #22910 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-14 15:20:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebcce2cd36 
					 
					
						
						
							
							[Core] Return final response for aborted requests from AsyncLLM.generate ( #22283 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-14 14:49:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4121de512e 
					 
					
						
						
							
							[Quantization]: Support compressed-tensors mixed-precision model loading ( #22468 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com > 
						
						
					 
					
						2025-08-14 17:32:09 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						279a5f31b3 
					 
					
						
						
							
							[Kernel] Add nvfp4 gemm flashinfer backends ( #22346 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Julien Lin <jullin@nvidia.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-14 16:03:55 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8ff05361a 
					 
					
						
						
							
							[CI] Temporarily disable flaky test  ( #22930 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-08-14 19:59:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						637093ae26 
					 
					
						
						
							
							docs: update fastsafetensors usage instructions ( #22891 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nir Levy <bhr166@gmail.com > 
						
						
					 
					
						2025-08-14 19:56:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						33c63e9547 
					 
					
						
						
							
							[Kernel] [Quantization] Add MXFP4 and bias support for marlin kernel ( #22428 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io >
Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com >
Signed-off-by: Huzaifa Sidhpurwala <huzaifas@redhat.com >
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Animesh Jain <anijain@umich.edu >
Signed-off-by: Rui Qiao <ruisearch42@gmail.com >
Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Signed-off-by: yewentao256 <zhyanwentao@126.com >
Signed-off-by: kf <kuanfu.liu@embeddedllm.com >
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Signed-off-by: NickLucche <nlucches@redhat.com >
Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com >
Signed-off-by: Sage Moore <sage@neuralmagic.com >
Signed-off-by: tjtanaavllm <tunjian.tan@amd.com >
Signed-off-by: Yong Hoon Shin <yhshin@meta.com >
Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com >
Signed-off-by: Roger Wang <hey@rogerw.me >
Signed-off-by: Vadim Gimpelson <vadim.gimpelson@centml.ai >
Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com >
Signed-off-by: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Signed-off-by: yan <yan.ma@intel.com >
Signed-off-by: Yan Ma <yan.ma@intel.com >
Signed-off-by: Xiao Liu <xiszishu@gmail.com >
Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com >
Signed-off-by: LopezCastroRoberto <roberto.lopez.castro@udc.es >
Signed-off-by: Andy Xie <andy.xning@gmail.com >
Signed-off-by: Haibin Lin <haibin.lin@bytedance.com >
Signed-off-by: David Ben-David <davidb@pliops.com >
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Signed-off-by: jiang1.li <jiang1.li@intel.com >
Signed-off-by: Seiji Eicher <seiji@anyscale.com >
Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com >
Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Signed-off-by: Abirdcfly <fp544037857@gmail.com >
Signed-off-by: Giancarlo Delfin <gdelfin@meta.com >
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com >
Signed-off-by: huangweixiao <huangweixiao@msh.team >
Signed-off-by: alyosha-swamy <raghav@arcee.ai >
Signed-off-by: Eric Hanley <ericehanley@google.com >
Signed-off-by: Abatom <abzhonghua@gmail.com >
Signed-off-by: CLFutureX <775523362@qq.com >
Signed-off-by: Linkun Chen <github@lkchen.net >
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com >
Signed-off-by: tlipoca9 <tlipoca9@gmail.com >
Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com >
Signed-off-by: zitian zhao <zitian.zhao@tencentmusic.com >
Signed-off-by: mgoin <michael@neuralmagic.com >
Signed-off-by: wang.yuqi <noooop@126.com >
Signed-off-by: Benji Beck <benjibeck@meta.com >
Signed-off-by: Siyuan Liu <lsiyuan@google.com >
Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai >
Signed-off-by: isotr0py <2037008807@qq.com >
Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Signed-off-by: simon-mo <xmo@berkeley.edu >
Signed-off-by: LucasWilkinson <lwilkinson@neuralmagic.com >
Signed-off-by: Zhang Jason <ning.zhang2@amd.com >
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com >
Signed-off-by: asafg <asafg@ai21.com >
Signed-off-by: Siyuan Fu <siyuanf@nvidia.com >
Signed-off-by: Lain <fusiyuan2000@hotmail.com >
Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com >
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com >
Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Signed-off-by: QscQ <qscqesze@gmail.com >
Signed-off-by: qingjun <qingjun@minimaxi.com >
Signed-off-by: Syed Muhammad Bin Asif <syedmba7@connect.hku.hk >
Signed-off-by: Lionel Villard <villard@us.ibm.com >
Signed-off-by: ycyaw66 <497410282@qq.com >
Signed-off-by: David Chen <530634352@qq.com >
Signed-off-by: Linkun <github@lkchen.net >
Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com >
Signed-off-by: Ming Yang <minos.future@gmail.com >
Signed-off-by: Adrian Garcia <adrian.garcia@inceptionai.ai >
Signed-off-by: shaojunqi <shaojunqi.sjq@alibaba-inc.com >
Signed-off-by: Ricardo Decal <rdecal@anyscale.com >
Signed-off-by: Andrew Chan <andrewkchan.akc@gmail.com >
Signed-off-by: Felix Marty <Felix.Marty@amd.com >
Signed-off-by: Andrew Sansom <andrew@protopia.ai >
Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com >
Signed-off-by: Shu Wang <shuw@nvidia.com >
Signed-off-by: Po-Han Huang <pohanh@nvidia.com >
Signed-off-by: Shu Wang. <shuw@nvidia.com >
Signed-off-by: XIn Li <xinli@nvidia.com >
Signed-off-by: Junhao Li <junhao@ubicloud.com >
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com >
Signed-off-by: iAmir97 <Amir.balwel@embeddedllm.com >
Signed-off-by: iAmir97 <71513472+iAmir97@users.noreply.github.com >
Signed-off-by: <zyy1102000@gmail.com >
Signed-off-by: Guy Stone <guys@spotify.com >
Signed-off-by: <yyweiss@gmail.com >
Signed-off-by: yyw <yyweiss@gmail.com >
Signed-off-by: Russell Bryant <rbryant@redhat.com >
Signed-off-by: Pradyun Ramadorai <pradyunr@amazon.com >
Signed-off-by: Pradyun92 <142861237+Pradyun92@users.noreply.github.com >
Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com >
Co-authored-by: rongfu.leng <rongfu.leng@daocloud.io >
Co-authored-by: Huzaifa Sidhpurwala <huzaifas@redhat.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <varunsundar08@gmail.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Animesh Jain <jainanimesh2305@yahoo.com >
Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com >
Co-authored-by: XiongfeiWei <isaacwxf23@gmail.com >
Co-authored-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com >
Co-authored-by: JartX <sagformas@gmail.com >
Co-authored-by: fhl2000 <63384265+fhl2000@users.noreply.github.com >
Co-authored-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: kf <kuanfu.liu@embeddedllm.com >
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com >
Co-authored-by: Dipika Sikka <dipikasikka1@gmail.com >
Co-authored-by: Sage Moore <sage@neuralmagic.com >
Co-authored-by: tjtanaavllm <tunjian.tan@amd.com >
Co-authored-by: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com >
Co-authored-by: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com >
Co-authored-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Vadim Gimpelson <156319763+vadiklyutiy@users.noreply.github.com >
Co-authored-by: Yuxuan Zhang <2448370773@qq.com >
Co-authored-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk >
Co-authored-by: Thomas Parnell <tpa@zurich.ibm.com >
Co-authored-by: Yan Ma <yan.ma@intel.com >
Co-authored-by: Xiao <xiszishu@gmail.com >
Co-authored-by: jiahanc <173873397+jiahanc@users.noreply.github.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Ye (Charlotte) Qi <yeq@meta.com >
Co-authored-by: Roberto L. Castro <38211239+LopezCastroRoberto@users.noreply.github.com >
Co-authored-by: Ning Xie <andy.xning@gmail.com >
Co-authored-by: H <linhaibin.eric@gmail.com >
Co-authored-by: David Ben-David <sdavidbd@gmail.com >
Co-authored-by: David Ben-David <davidb@pliops.com >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: Li, Jiang <jiang1.li@intel.com >
Co-authored-by: TankNee <nee@tanknee.cn >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com >
Co-authored-by: ZiTian.Zhao <zitian.zhao@tencentmusic.com >
Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Co-authored-by: Abirdcfly <fp544037857@gmail.com >
Co-authored-by: Giancarlo Delfin <32987265+TheEpicDolphin@users.noreply.github.com >
Co-authored-by: Chenxi Yang <cxyang@cs.utexas.edu >
Co-authored-by: Chenxi Yang <cxyang@meta.com >
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com >
Co-authored-by: Weixiao Huang <hwx.simle@gmail.com >
Co-authored-by: Raghav Ravishankar <113712354+alyosha-swamy@users.noreply.github.com >
Co-authored-by: ericehanley <ericehanley@google.com >
Co-authored-by: Zhonghua Deng <abzhonghua@gmail.com >
Co-authored-by: Po-Han Huang (NVIDIA) <53919306+nvpohanh@users.noreply.github.com >
Co-authored-by: PiteXChen <44110731+CLFutureX@users.noreply.github.com >
Co-authored-by: lkchen <github@lkchen.net >
Co-authored-by: TJian <tunjian.tan@embeddedllm.com >
Co-authored-by: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com >
Co-authored-by: tlipoca9 <160737620+tlipoca9@users.noreply.github.com >
Co-authored-by: elvischenv <219235043+elvischenv@users.noreply.github.com >
Co-authored-by: wang.yuqi <noooop@126.com >
Co-authored-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: Siyuan Liu <lsiyuan@google.com >
Co-authored-by: Benjamin Chislett <chislett.ben@gmail.com >
Co-authored-by: LiuXiaoxuanPKU <lilyliupku@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Minseok Lee <47620120+minseokl@users.noreply.github.com >
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com >
Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com >
Co-authored-by: Zhang Jason <ning.zhang2@amd.com >
Co-authored-by: Asaf Joseph Gardin <39553475+Josephasafg@users.noreply.github.com >
Co-authored-by: asafg <asafg@ai21.com >
Co-authored-by: Lain <siyuanf@nvidia.com >
Co-authored-by: tc-mb <157115220+tc-mb@users.noreply.github.com >
Co-authored-by: imning3 <hbning@pku.edu.cn >
Co-authored-by: Maximilien de Bayser <mbayser@br.ibm.com >
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com >
Co-authored-by: Tao He <linzhu.ht@alibaba-inc.com >
Co-authored-by: qscqesze <qingjun@minimaxi.com >
Co-authored-by: Syed Muhammad Bin Asif <92625830+syedmba@users.noreply.github.com >
Co-authored-by: Lionel Villard <villard@us.ibm.com >
Co-authored-by: WeiQing Chen <40507679+david6666666@users.noreply.github.com >
Co-authored-by: ycyaw66 <497410282@qq.com >
Co-authored-by: Moritz Sanft <58110325+msanft@users.noreply.github.com >
Co-authored-by: Ming Yang <minos.future@gmail.com >
Co-authored-by: Adrián García García <adrigarvk8@gmail.com >
Co-authored-by: Michael Goin <mgoin@redhat.com >
Co-authored-by: JaceyShao <65159281+JaceyShao@users.noreply.github.com >
Co-authored-by: shaojunqi <shaojunqi.sjq@alibaba-inc.com >
Co-authored-by: Ricardo Decal <crypdick@users.noreply.github.com >
Co-authored-by: Andrew Chan <andrewkchan.akc@gmail.com >
Co-authored-by: fxmarty-amd <felmarty@amd.com >
Co-authored-by: Andrew Sansom <andrew@protopia.ai >
Co-authored-by: Zhiyu <zhiyuc@nvidia.com >
Co-authored-by: Shu Wang <shuw@nvidia.com >
Co-authored-by: XIn Li <xinli@nvidia.com >
Co-authored-by: Junhao Li <streaver91@gmail.com >
Co-authored-by: Chauncey <chaunceyjiang@gmail.com >
Co-authored-by: iAmir97 <71513472+iAmir97@users.noreply.github.com >
Co-authored-by: iAmir97 <Amir.balwel@embeddedllm.com >
Co-authored-by: Hong Hanh <hanh.usth@gmail.com >
Co-authored-by: Daniel Serebrenik <74646983+pliops-daniels@users.noreply.github.com >
Co-authored-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: Guy Stone <guys@spotify.com >
Co-authored-by: yyweiss <70619747+yyweiss@users.noreply.github.com >
Co-authored-by: Pradyun92 <142861237+Pradyun92@users.noreply.github.com >
Co-authored-by: Pradyun Ramadorai <pradyunr@amazon.com >
Co-authored-by: Nicolò Lucchesi <nicolo.lucchesi@gmail.com > 
						
						
					 
					
						2025-08-14 11:23:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ab9f2cfd19 
					 
					
						
						
							
							[CI] [Hybrid]  Bump min transformers version for Bamba and Jamba ( #22908 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-14 11:01:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dbe298046c 
					 
					
						
						
							
							[Bugfix] Fix parsing of --disable-mm-preprocessor-cache ( #22909 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-14 08:09:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						625ccd1c4d 
					 
					
						
						
							
							[Bugfix] Replace custom Encoding class with BatchEncoding in MistralTokenizer ( #22786 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zjy0516 <riverclouds.zhu@qq.com > 
						
						
					 
					
						2025-08-14 08:09:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						92ff41abea 
					 
					
						
						
							
							[Model] Modify the gate implementation of glm4_moe ( #22832 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-14 05:28:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						829b9a62d0 
					 
					
						
						
							
							[Perf] Dont create unnecessary pooling params ( #22876 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-08-14 05:28:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						540d54ca8d 
					 
					
						
						
							
							[CI] Re-enable transcriptions test_long_audio_request ( #22890 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-14 11:34:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0783f13960 
					 
					
						
						
							
							[Doc] fix dead link ( #22898 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com > 
						
						
					 
					
						2025-08-14 04:06:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7655dc3e45 
					 
					
						
						
							
							[Bugfix] Add reset prefix cache for online serving ( #22726 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: iAmir97 <Amir.balwel@embeddedllm.com >
Signed-off-by: iAmir97 <71513472+iAmir97@users.noreply.github.com >
Co-authored-by: iAmir97 <Amir.balwel@embeddedllm.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-14 04:04:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4efda821d 
					 
					
						
						
							
							Remove Phi 4 Flash configuration workaround ( #22723 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-14 04:03:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eb08487b18 
					 
					
						
						
							
							[BugFix] Threadsafe close async zmq sockets ( #22877 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-14 03:44:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c3a0741c6 
					 
					
						
						
							
							[Bugfix] Fix PixtralHFImagePixelInputs dynamic shape check ( #22827 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-14 02:35:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						00e3f9da46 
					 
					
						
						
							
							vLLM Benchmark suite improvement ( #22119 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tsai, Louie <louie.tsai@intel.com >
Signed-off-by: Louie Tsai <louie.tsai@intel.com >
Co-authored-by: Li, Jiang <bigpyj64@gmail.com > 
						
						
					 
					
						2025-08-14 07:12:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a353bd083d 
					 
					
						
						
							
							[CI] remove flaky v0 test ( #22864 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Robert Shaw <robshaw@redhat.com >
Co-authored-by: Robert Shaw <robshaw@redhat.com > 
						
						
					 
					
						2025-08-13 21:41:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1d20c34717 
					 
					
						
						
							
							[CI] Fix tests/distributed/test_ca_buffer_sharing.py ( #22849 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ilmarkov <imarkov@redhat.com >
Co-authored-by: ilmarkov <imarkov@redhat.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-08-13 20:09:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6af24fba7 
					 
					
						
						
							
							[CI][Entrypoints]: add filter to generation to filter out invalid tool calls ( #22826 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Will Eaton <weaton@redhat.com > 
						
						
					 
					
						2025-08-13 20:09:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ca2393b47 
					 
					
						
						
							
							[CI/Build] Increase pooling tolerance to pass CI ( #22844 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-13 18:52:48 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31a500c86f 
					 
					
						
						
							
							[Core] [N-gram SD Optimization][1/n] Propose tokens with a single KMP ( #22437 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com > 
						
						
					 
					
						2025-08-13 14:44:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e8614e88b 
					 
					
						
						
							
							Move checklist in PR template ( #22852 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedic <lgovedic@redhat.com > 
						
						
					 
					
						2025-08-13 21:38:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6cd5ca3d3 
					 
					
						
						
							
							[ROCm][Bugfix] Fix compilation error in topk softmax fused kernel ( #22819 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kliuae <kuanfu.liu@embeddedllm.com > 
						
						
					 
					
						2025-08-13 13:45:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						df0e0f023e 
					 
					
						
						
							
							[CI/Build] Skip gpt_big model test because of broken HF model ( #22848 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-13 20:36:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4b78d6317 
					 
					
						
						
							
							[CI/Build] Fix param mismatch in test_eagle_correctness ( #22847 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-13 10:55:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						12817a8ac7 
					 
					
						
						
							
							[CI] Fix tests/v1/e2e/test_kv_sharing_fast_prefill.py import on test ( #22815 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-13 10:35:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9232d41f4 
					 
					
						
						
							
							[CI/Build] Update VLM common tests ( #22841 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-13 10:03:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9bd9294f0e 
					 
					
						
						
							
							[Bugfix] Fix MiniCPMV Image input inference failed ( #22813 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: HWH <67449739+jio-H@users.noreply.github.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-13 09:41:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da2705198f 
					 
					
						
						
							
							[Misc] clear and separate error messages for input too long and input + max-tokens too long ( #22803 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-13 07:22:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19b927e52d 
					 
					
						
						
							
							[Core] Use individual MM items in P0/P1 cache and model runner ( #22570 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-13 07:18:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						20d65aa755 
					 
					
						
						
							
							[Frontend] Multithreaded async multimodal load_bytes ( #22710 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexandre Milesi <30204471+milesial@users.noreply.github.com >
Co-authored-by: Alexandre Milesi <30204471+milesial@users.noreply.github.com > 
						
						
					 
					
						2025-08-13 06:09:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b159c0a67a 
					 
					
						
						
							
							Fix GGUF loader for Qwen3 MoE. ( #22785 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gh0u1L5 <Gh0u1L5@outlook.com > 
						
						
					 
					
						2025-08-13 06:08:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6772bb0f7d 
					 
					
						
						
							
							Remove unnecessary CUDA sync of qwen image and video preprocess ( #22792 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cyy <cyyever@outlook.com >
Signed-off-by: Yuanyuan Chen <cyyever@outlook.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-13 06:07:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fceafaf582 
					 
					
						
						
							
							[Bugfix][mamba] Fix type annotation of Mamba2Metadata ( #22787 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-13 06:07:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b794c756c 
					 
					
						
						
							
							[Nixl][CI] Fix tests ( #22806 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-13 06:03:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98deac3879 
					 
					
						
						
							
							[FEATURE] support custom vllm tuned config path for fused moe triton kernels ( #22791 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chi Zhang <zhangchi.usc1992@bytedance.com > 
						
						
					 
					
						2025-08-13 20:27:25 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						653124bd46 
					 
					
						
						
							
							[Frontend] Add chunked processing to handle long inputs in embedding models ( #22280 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: x22x22 <wadeking@qq.com >
Signed-off-by: Kdump <rootshellexp@gmail.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Maximilien de Bayser <maxdebayser@gmail.com >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-13 04:14:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b1bdac6af 
					 
					
						
						
							
							[Platform] Custom ops support for FusedMoe ( #22509 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-08-13 04:12:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d94e3026de 
					 
					
						
						
							
							[V1] Add tree drafting tests for eagle spec decoding ( #22705 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Giancarlo Delfin <gdelfin@meta.com > 
						
						
					 
					
						2025-08-13 04:11:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3f52738dce 
					 
					
						
						
							
							[Doc] Add max_lora_rank configuration guide ( #22782 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chiliu <cliu_whu@yeah.net > 
						
						
					 
					
						2025-08-13 04:10:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a01e0018b5 
					 
					
						
						
							
							[Bugfix] Fix Nemotron VL image processing ( #22739 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: ducviet00-h2 <viet.d.hoang@h2corporation.jp > 
						
						
					 
					
						2025-08-13 03:11:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e7e5baaa8 
					 
					
						
						
							
							[Model] Add missing prefix to glm4_1v ( #22716 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com > 
						
						
					 
					
						2025-08-13 01:23:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d16aa3dae4 
					 
					
						
						
							
							[Model] Add option to run Step3VisionEncoder in DP ( #22697 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zzh142857 <chaorenzhaozhenghao@gmail.com > 
						
						
					 
					
						2025-08-13 00:09:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6807af8f46 
					 
					
						
						
							
							[gpt-oss] upgrade gpt-oss to v0.0.3 and add version check ( #22768 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-12 21:37:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c558cf62e 
					 
					
						
						
							
							[Perf] Support topk softmax fused kernel for broader num_experts ( #22211 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shixian Cui <shixian@amazon.com >
Co-authored-by: Shixian Cui <shixian@amazon.com > 
						
						
					 
					
						2025-08-12 21:34:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						77a6bf07ae 
					 
					
						
						
							
							[Bug] Fix Unexpected Keyword Argument 'w1_bias' ( #22757 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-12 21:31:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4082338a25 
					 
					
						
						
							
							Remove unneeded ROCm platform import when using CUDA ( #22765 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-12 21:26:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6b928798e 
					 
					
						
						
							
							Force TRTLLM attention for gpt-oss on SM100 ( #22678 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-12 21:22:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b1361c7273 
					 
					
						
						
							
							[Bugfix] Fix default enable for CUTLASS MLA on SM100 ( #22738 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-12 21:22:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f0f844b16 
					 
					
						
						
							
							Fix cuda illegal mem access with Llama4 TP8 + rms_norm custom op ( #22701 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Po-Han Huang <pohanh@nvidia.com > 
						
						
					 
					
						2025-08-12 21:21:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c5830381af 
					 
					
						
						
							
							[V0 Deprecation] Remove args for multi-step scheduling ( #22779 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-08-12 20:38:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d31f97cf57 
					 
					
						
						
							
							[Misc] Remove tests/multi_step/__init__.py ( #22778 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-08-12 20:21:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71683ca6f6 
					 
					
						
						
							
							[V0 Deprecation] Remove multi-step scheduling ( #22138 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-08-12 20:18:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e18859298d 
					 
					
						
						
							
							Add hardware plugins to installation doc ( #22732 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michael Goin <mgoin64@gmail.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-12 17:14:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fde0b611a3 
					 
					
						
						
							
							[Model] Decouple glm4v ( #22751 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-12 17:13:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d0a6301588 
					 
					
						
						
							
							Fix Transformers backend tensor parallel for multimodal models ( #22673 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-12 17:12:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						45c3936e94 
					 
					
						
						
							
							[Docs] Hide the navigation and toc sidebars on home page ( #22749 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-12 17:12:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba81acbdc1 
					 
					
						
						
							
							[Bugfix] Bump DeepGEMM Version to Fix SMXX Layout Issues ( #22606 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: frankwang28 <frank.wbb@hotmail.com > 
						
						
					 
					
						2025-08-12 15:43:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53c730286c 
					 
					
						
						
							
							[Misc] parametrize 'dtype' in test_flash_mla ( #22641 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: RUTHLESS-BOT <wujiafeng@cmbchina.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-12 16:31:48 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6534d2fc97 
					 
					
						
						
							
							Fix torch version check for SM100 mxfp4  ( #22535 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zifei Tong <zifeitong@gmail.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-12 12:54:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						422f22e012 
					 
					
						
						
							
							[CI][Nixl] Check kv cache layout during handshake ( #22745 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-12 12:53:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6bd8ebf026 
					 
					
						
						
							
							[Kernel][AMD] Avoid D2H copy and cumsum kernel ( #22683 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiaozhu <mxz297@gmail.com >
Signed-off-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-12 12:53:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dab4f9f764 
					 
					
						
						
							
							[Chore] Update CODEOWNERS to include @yewentao256 for CUDA kernels, attention backends, quantization, and related tests ( #22741 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-13 00:50:31 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c42fe0b63a 
					 
					
						
						
							
							Add more test scenario for tensor schema ( #22733 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: teekenl <teekenlau@gmail.com > 
						
						
					 
					
						2025-08-12 16:34:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a4b4b3729 
					 
					
						
						
							
							Add: SupportsEagle3 interface for explicit EAGLE3 support ( #22642 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rahul Tuli <rtuli@redhat.com > 
						
						
					 
					
						2025-08-12 09:24:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e5d3d63c42 
					 
					
						
						
							
							[Benchmark] Fix terminal colors in benchmark_serving_multi_turn (python 3.12) ( #22730 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: daniels <daniels@pliops.com > 
						
						
					 
					
						2025-08-12 14:41:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d9d40efde 
					 
					
						
						
							
							[Bugfix][CI] Fix test_remote_decode_lifecycle.py::test_short_prompt_lifecycle ( #22727 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-12 07:30:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67c153b88a 
					 
					
						
						
							
							Fix Llama4 FlashInfer FP4 MoE issues ( #22511 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Po-Han Huang <pohanh@nvidia.com > 
						
						
					 
					
						2025-08-12 05:50:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f7ad6a1eb3 
					 
					
						
						
							
							[CI Failure] fix tests/entrypoints/openai/test_skip_tokenizer.py ( #22708 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-12 05:42:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						80bb1e8afe 
					 
					
						
						
							
							Officially support SmolLM3 using the Transformers backend ( #22665 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-12 05:38:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d030b01548 
					 
					
						
						
							
							[BugFix][Nixl][PD] Fix heterogenous TP ( #22663 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-12 05:37:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						767e63b860 
					 
					
						
						
							
							[Docs] Improve docs navigation ( #22720 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-12 04:25:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						007dd90859 
					 
					
						
						
							
							[gpt-oss] Enable gpt-oss on ampere ( #22714 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-12 03:21:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8a9d0e429 
					 
					
						
						
							
							[Misc] remove GH discussions link ( #22722 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-12 03:15:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						50f2aae1b4 
					 
					
						
						
							
							[LMCache][Example] Align the PYTHONHASHSEED for prefillers and decoders for KV chunks hashing ( #21161 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zejunchen-zejun <zejun.chen@amd.com > 
						
						
					 
					
						2025-08-12 02:05:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						46ae7f6666 
					 
					
						
						
							
							[Bugfix] Mamba2 SSD varlen bug fix initstates decay, improve test, assert chunk pwr 2 ( #21783 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rishi Astra <40644327+RishiAstra@users.noreply.github.com > 
						
						
					 
					
						2025-08-12 02:04:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1ece7f30ba 
					 
					
						
						
							
							Fix: AWQ Marlin get_quant_method does not recognize "modules_to_not_convert" ( #21888 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: JunHowie <JunHowie@aliyun.com >
Co-authored-by: JunHowie <JunHowie@aliyun.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-12 02:03:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bc8372efc3 
					 
					
						
						
							
							[Bugfix] Fix erroneous randomly generated cases in bad word testing ( #22170 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: phantomlei <phantomlei3@gmail.com > 
						
						
					 
					
						2025-08-12 02:03:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d17fa633e 
					 
					
						
						
							
							[V0] Correct CUDA Graph capture for encoder-decoder models ( #22630 )  
						
						 
						
						
						
						
					 
					
						2025-08-12 02:01:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9f909b8996 
					 
					
						
						
							
							[New Model] Support Command-A-Vision ( #22660 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: donglu <donglu@cohere.com > 
						
						
					 
					
						2025-08-12 01:39:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59f3b93636 
					 
					
						
						
							
							[DOC] update v1_guide with INTEL HW ( #22679 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chendi.Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-08-12 01:22:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78077d5417 
					 
					
						
						
							
							Move SchedulerConfig from config/__init__.py to config/scheduler.py ( #22626 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-12 00:23:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d729c43fb 
					 
					
						
						
							
							[Bugfix] Fix ModernBert load & Enable sliding window attention for bidirectional attention. ( #22637 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com >
Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Co-authored-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-08-12 00:23:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f4657952b 
					 
					
						
						
							
							[doc] Update x86 CPU-inference installation doc to reflect optionality of AVX512f  ( #22707 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sooraj S <94284954+sooraj-satheesh@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com > 
						
						
					 
					
						2025-08-12 00:21:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a7e3bbdd2 
					 
					
						
						
							
							[Doc] Added unmentioned required option "method" in the usage of EAGLE-3 based models ( #21737 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dilute-l <dilu2333@163.com >
Co-authored-by: Dilute-l <dilu2333@163.com > 
						
						
					 
					
						2025-08-12 00:14:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4fbd8bb597 
					 
					
						
						
							
							Fix passing SpeculativeConfig from the CLI ( #22652 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-11 22:13:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad344ef552 
					 
					
						
						
							
							[gpt-oss] Small bug fixes for frontend ( #22512 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-11 22:04:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bbaf9e9cb1 
					 
					
						
						
							
							[gpt-oss] Fix mxfp4 support ( #22700 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-11 21:22:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4678503476 
					 
					
						
						
							
							Migrate MiniCPMVImageInputs to TensorSchema ( #21939 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-11 20:43:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93d0652433 
					 
					
						
						
							
							[CI] Increase timeout for test_completion_with_image_embeds ( #22670 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-11 20:31:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ea1292ad3e 
					 
					
						
						
							
							[CI Failure] Use float32 for tests/entrypoints/openai/test_audio.py ( #22686 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-11 20:20:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc5e4a653c 
					 
					
						
						
							
							Upgrade FlashInfer to v0.2.11 ( #22613 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Po-Han Huang <pohanh@nvidia.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-11 19:58:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						839ab00349 
					 
					
						
						
							
							Re-enable Xet on TPU tests now that hf_xet has been updated ( #22666 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-11 19:54:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9b94d6ec8f 
					 
					
						
						
							
							Enable 4bit bnb prequant MOE ( #21548 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-11 19:02:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1891a265d3 
					 
					
						
						
							
							[gpt-oss] Add test for response API + harmony (but skipped) ( #22554 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-11 17:47:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						95a935fc48 
					 
					
						
						
							
							[gpt-oss] Support streaming in response API ( #22431 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-11 17:46:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						458e74eb90 
					 
					
						
						
							
							Support more parallel styles in Transformers backend TP ( #22651 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-11 10:42:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65abe111a3 
					 
					
						
						
							
							[CI] Skip Tree Attn Test in test_max_len.py to unblock CI ( #22664 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-08-11 10:36:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						807d21b80d 
					 
					
						
						
							
							[BugFix] [Spec Decode] Remove LlamaForCausalLMEagle3 to fix CI ( #22611 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-08-11 10:31:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c90fb03df5 
					 
					
						
						
							
							[CI/Build] Skip Mllama HF runner tests with Transformers v4.55.0 ( #22659 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-11 10:00:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						84cf78acee 
					 
					
						
						
							
							[Model] Pooling models default to using chunked prefill & prefix caching if supported. ( #20930 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-11 09:41:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						16fb668b61 
					 
					
						
						
							
							fix: NIXL connector transfers partial block to pass full multi-modal context ( #21074 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: GuanLuo <gluo@nvidia.com > 
						
						
					 
					
						2025-08-11 09:40:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f7dcce7a4a 
					 
					
						
						
							
							[Feature] Add VLLM_USE_DEEP_GEMM_E8M0 Env to Control E8M0 Scale ( #21968 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-11 09:39:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e13d9fe6d 
					 
					
						
						
							
							[Misc] Further clean up some redundant config definitions ( #22649 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-11 09:22:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3fa5b25845 
					 
					
						
						
							
							Document aarch64 CPU support works ( #22646 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eric Curtin <ecurtin@redhat.com > 
						
						
					 
					
						2025-08-11 07:22:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14a5d903ab 
					 
					
						
						
							
							[Model] NemotronH Support  ( #22349 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniel Afrimi <danielafrimi8@gmail.com > 
						
						
					 
					
						2025-08-11 04:09:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						951b038298 
					 
					
						
						
							
							[Misc] Move jsontree to utils ( #22622 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-11 03:49:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebf7605b0d 
					 
					
						
						
							
							[Misc] Move tensor schema tests ( #22612 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-11 00:15:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bc1d02ac85 
					 
					
						
						
							
							[Docs] Add comprehensive CLI reference for all large vllm subcommands ( #22601 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-11 00:13:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1e55dfa7e5 
					 
					
						
						
							
							[BUGFIX] KeyError 'layers.14.mlp.gate.g_idx' for Qwen3-MoE with GPTQ on ROCm ( #22017 )  
						
						 
						
						
						
						
					 
					
						2025-08-11 00:13:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						384a052971 
					 
					
						
						
							
							[Misc] benchmark_moe supports expert parallel ( #22251 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-11 00:13:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						39052dbca8 
					 
					
						
						
							
							Support token_type_ids in V1 with less code changes ( #21985 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-08-10 22:54:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9c97a1c349 
					 
					
						
						
							
							[ROCm][AITER] Support AITER Rope ops in RotaryEmbedding Module. ( #22521 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-08-10 22:52:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f919d4cb8f 
					 
					
						
						
							
							[BugFix] Fix logits repetition penalty cuda check ( #22592 )  
						
						 
						
						
						
						
					 
					
						2025-08-10 22:52:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						afa5b7ca0b 
					 
					
						
						
							
							[Misc][gpt-oss] guard import when triton kernel when not up to date  ( #22584 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zhewenli <zhewenli@meta.com > 
						
						
					 
					
						2025-08-10 21:29:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b99028069 
					 
					
						
						
							
							[Misc][gpt-oss] Add rules to label gpt-oss related PRs ( #22600 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lifan Shen <lifans@meta.com > 
						
						
					 
					
						2025-08-10 19:49:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5898b135ab 
					 
					
						
						
							
							[BugFix] Fix KVConnectorOutput TPU breakage ( #22598 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-10 19:33:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b799f4b9ea 
					 
					
						
						
							
							[CI/Build] Fix tensorizer test for load_format change ( #22583 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-08-10 19:30:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						06da44f0cb 
					 
					
						
						
							
							Migrate LlavaImageInputs to TensorSchema ( #21770 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-10 19:29:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a554991748 
					 
					
						
						
							
							Migrate LlavaNextVideoPixelInputs to TensorSchema ( #21843 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-10 19:29:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1af8b7be9 
					 
					
						
						
							
							enable Docker-aware precompiled wheel setup ( #22106 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dougbtv <dosmith@redhat.com > 
						
						
					 
					
						2025-08-10 16:29:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68b254d673 
					 
					
						
						
							
							Fix TensorSchema validation test for symbolic dims ( #22366 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-08-10 17:16:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c50d62f5a 
					 
					
						
						
							
							Remove redundant row_indices unsqueeze operation in MiniCPMO ( #22528 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-08-10 09:20:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4e2916721 
					 
					
						
						
							
							Migrate LlavaNextImageInputs to TensorSchema ( #21774 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-10 09:05:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65a7917be4 
					 
					
						
						
							
							Fix(benchmarks): allow multiple mm contents in OpenAI Chat Completion Benchmarks ( #22534 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: breno.skuk <breno.skuk@hcompany.ai > 
						
						
					 
					
						2025-08-10 09:03:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b76753f0b5 
					 
					
						
						
							
							[Bugfix][Kernel] Support partial rotary embedding for MRoPE triton kernel ( #22593 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-10 09:00:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b81fe83b2c 
					 
					
						
						
							
							[doc] add alibaba cloud as sponsor ( #22597 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-10 23:13:47 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0757551c96 
					 
					
						
						
							
							[doc] add beijing meetup links ( #22596 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-10 22:51:36 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8290d15d2c 
					 
					
						
						
							
							Move CacheConfig from config/__init__.py to config/cache.py ( #22586 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-10 07:36:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						049c245143 
					 
					
						
						
							
							[Misc] Replace flaky image urls in pixtral test ( #22574 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-10 06:18:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						00976db0c3 
					 
					
						
						
							
							[Docs] Fix warnings in docs build ( #22588 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-10 05:49:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d411df0296 
					 
					
						
						
							
							[Misc] Further refine type annotations in parallel state ( #22499 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-10 05:49:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						010e0e39ea 
					 
					
						
						
							
							[Doc] Fix API doc link in side navigation ( #22585 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-08-10 01:35:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						326976291b 
					 
					
						
						
							
							[Misc] code clean duplicate set_current_vllm_config in _set_vllm_config ( #22566 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-10 00:08:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e8d685775 
					 
					
						
						
							
							[Minor] Fix pre-commit error on main ( #22579 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-10 00:08:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c49848396d 
					 
					
						
						
							
							Refactor sliding window configuration to Transformers best practice ( #21927 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-09 20:50:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2a84fb422f 
					 
					
						
						
							
							[TPU] kv cache update kernel doesn't need to be padded slices to multiple of num_slices_per_block ( #22394 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@gmail.com >
Co-authored-by: Chengji Yao <chengjiyao@gmail.com > 
						
						
					 
					
						2025-08-09 20:49:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						534c45b962 
					 
					
						
						
							
							Improve fast_topk function with type hints and documentation ( #22530 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-08-09 20:25:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d7363e61c 
					 
					
						
						
							
							[Config] add "qwen" as a native eagle3 target supported model ( #22333 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: lechen <lecself@163.com >
Signed-off-by: LeChen <lecself@163.com > 
						
						
					 
					
						2025-08-09 20:21:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c5254b82a 
					 
					
						
						
							
							[oss] Init gpt-oss bf16 support ( #22508 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-09 20:19:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61f67d8acd 
					 
					
						
						
							
							[V1] [Hybrid] Enable Full CUDA Graph (decode-only) for Mamba layers ( #21401 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-09 20:16:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						42172ad18f 
					 
					
						
						
							
							[FEAT] [Performance] Add triton mrope to replace the torch code path ( #22375 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-08-09 11:50:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fbd8595c5c 
					 
					
						
						
							
							[Bugfix] Fix basic models tests hanging due to mm processor creation ( #22571 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-09 11:42:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a16fa614c 
					 
					
						
						
							
							[Model] Gemma3n MM ( #20495 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ShriKode <shrikode@gmail.com >
Signed-off-by: NickLucche <nlucches@redhat.com >
Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: ShriKode <shrikode@gmail.com >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-09 09:56:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d18256e47 
					 
					
						
						
							
							Move ParallelConfig from config/__init__.py to config/parallel.py ( #22565 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-09 08:33:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56186474f6 
					 
					
						
						
							
							[Docs] Reduce noise in docs and --help from the JSON tip ( #22567 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-09 08:31:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1bf5e1f25b 
					 
					
						
						
							
							[CI] [Hybrid] Speed up hybrid models test by removing large models  ( #22563 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-09 02:04:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6022e6fbc 
					 
					
						
						
							
							GLM-4.5V with new class name at transformers ( #22520 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-09 00:50:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2be07a0db1 
					 
					
						
						
							
							Update docs for Minimax-Text support ( #22562 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-09 00:18:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0edc0cd52b 
					 
					
						
						
							
							[Bugfix] Fix CI moe kernel failure ( #22556 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-09 00:03:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7920e9b1c5 
					 
					
						
						
							
							[Bugfix] Fix failing GPT-OSS initialization test ( #22557 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-09 00:03:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b7c0942b65 
					 
					
						
						
							
							[ROCm][Misc] Rename the context_len to seq_len in ROCm custom paged attention kernel ( #22097 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-08-08 23:15:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a0c5ded5a 
					 
					
						
						
							
							[TPU] Add support for online w8a8 quantization ( #22425 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyuyeun Kim <kyuyeunk@google.com > 
						
						
					 
					
						2025-08-08 23:12:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10a02535d4 
					 
					
						
						
							
							Fix loading of quantized BigCode models ( #22463 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eldar Kurtic <eldar@neuralmagic.com > 
						
						
					 
					
						2025-08-08 23:12:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65552b476b 
					 
					
						
						
							
							[Misc] Use config definitions from Transformers library ( #21913 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-08 23:10:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ad7adb67f 
					 
					
						
						
							
							v1: Pass KVConnectorOutput to scheduler-side ( #22157 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Or Ozeri <oro@il.ibm.com > 
						
						
					 
					
						2025-08-08 23:09:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ade99eafa 
					 
					
						
						
							
							[V1] [Hybrid] Support Minimax-Text-01 in V1  ( #22151 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-08 23:08:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3157aebb63 
					 
					
						
						
							
							[Log] Add Warning for Deprecation of DeepGEMM old version ( #22194 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-08 23:07:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a0ffd6285 
					 
					
						
						
							
							Remove mamba_ssm from vLLM requirements; install inside test container using --no-build-isolation ( #22541 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-08 23:05:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23472ff51c 
					 
					
						
						
							
							[Doc] Add usage of implicit text-only mode  ( #22561 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Flora Feng <4florafeng@gmail.com > 
						
						
					 
					
						2025-08-08 23:04:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08b751ba74 
					 
					
						
						
							
							Implicit language-model-only mode via limit-mm-per-prompt ( #22299 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Signed-off-by: Andy Xie <andy.xning@gmail.com >
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Signed-off-by: Andrew Sansom <andrew@protopia.ai >
Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com >
Signed-off-by: Shu Wang <shuw@nvidia.com >
Signed-off-by: Po-Han Huang <pohanh@nvidia.com >
Signed-off-by: Shu Wang. <shuw@nvidia.com >
Signed-off-by: XIn Li <xinli@nvidia.com >
Signed-off-by: Junhao Li <junhao@ubicloud.com >
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com >
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com >
Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com >
Signed-off-by: zitian zhao <zitian.zhao@tencentmusic.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: iAmir97 <Amir.balwel@embeddedllm.com >
Signed-off-by: iAmir97 <71513472+iAmir97@users.noreply.github.com >
Signed-off-by: Linkun <github@lkchen.net >
Co-authored-by: Ning Xie <andy.xning@gmail.com >
Co-authored-by: TJian <tunjian.tan@embeddedllm.com >
Co-authored-by: Andrew Sansom <andrew@protopia.ai >
Co-authored-by: Zhiyu <zhiyuc@nvidia.com >
Co-authored-by: Shu Wang <shuw@nvidia.com >
Co-authored-by: XIn Li <xinli@nvidia.com >
Co-authored-by: Junhao Li <streaver91@gmail.com >
Co-authored-by: Chauncey <chaunceyjiang@gmail.com >
Co-authored-by: Yuxuan Zhang <2448370773@qq.com >
Co-authored-by: ZiTian Zhao <zitian.zhao@tencentmusic.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk >
Co-authored-by: Po-Han Huang (NVIDIA) <53919306+nvpohanh@users.noreply.github.com >
Co-authored-by: iAmir97 <71513472+iAmir97@users.noreply.github.com >
Co-authored-by: iAmir97 <Amir.balwel@embeddedllm.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Hong Hanh <hanh.usth@gmail.com >
Co-authored-by: youkaichao <youkaichao@gmail.com >
Co-authored-by: lkchen <github@lkchen.net > 
						
						
					 
					
						2025-08-08 22:21:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						429e4e2d42 
					 
					
						
						
							
							[Bugfix] Fix ModernBert cuda graph capturing in v1 ( #21901 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-08 22:17:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35afe1b30b 
					 
					
						
						
							
							[BugFix] [P/D] Handle lookahead token count edge-case with Eagle Spec Decoding and P/D ( #22317 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pradyun Ramadorai <pradyunr@amazon.com >
Signed-off-by: Pradyun92 <142861237+Pradyun92@users.noreply.github.com >
Co-authored-by: Pradyun Ramadorai <pradyunr@amazon.com >
Co-authored-by: Nicolò Lucchesi <nicolo.lucchesi@gmail.com > 
						
						
					 
					
						2025-08-08 17:04:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						81c57f60a2 
					 
					
						
						
							
							[XPU] upgrade torch 2.8 on for XPU ( #22300 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-08-08 17:03:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						311d875614 
					 
					
						
						
							
							Drop flaky test_healthcheck_response_time ( #22539 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-08-08 16:56:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3edc0a7a8 
					 
					
						
						
							
							Extract CompilationConfig from config.py ( #22524 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-08 16:34:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						baece8c3d2 
					 
					
						
						
							
							[Frontend] Add unix domain socket support ( #18097 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: <yyweiss@gmail.com >
Signed-off-by: yyw <yyweiss@gmail.com > 
						
						
					 
					
						2025-08-08 16:23:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2fcf6b27b6 
					 
					
						
						
							
							[Docs] fix broken links in metrics.md ( #22315 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guy Stone <guys@spotify.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-08 16:22:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41b9655751 
					 
					
						
						
							
							Skip Qwen 1 in CI because remote code is no longer compatible with Transformers ( #22536 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-08 16:20:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bd875d2eb7 
					 
					
						
						
							
							[Bugfix] Update FA commit hash ( #22546 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-08 16:10:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f703b923f3 
					 
					
						
						
							
							[Misc] DeepGEMM : Avoid JIT generation in the hot-path ( #22215 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-08-08 16:09:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd9b9de1fb 
					 
					
						
						
							
							[BugFix] Fix IMA FlashMLA full cuda-graph and DP + Update FlashMLA ( #21691 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com >
Co-authored-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com > 
						
						
					 
					
						2025-08-08 16:09:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe6d8257a1 
					 
					
						
						
							
							[gpt-oss] Support tool call and implement MCP tool server ( #22427 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-08 15:06:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e290594072 
					 
					
						
						
							
							[Docs] Rename “Distributed inference and serving” to “Parallelism & Scaling” ( #22466 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-08-08 19:26:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f756a682d9 
					 
					
						
						
							
							[gpt-oss] guard import when triton kernel is not installed ( #22529 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yongye Zhu <zyy1102000@gmail.com >
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-08 11:18:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f0964e29cb 
					 
					
						
						
							
							[Benchmark] Add benchmark tool for multi turn conversations ( #20267 )  
						
						 
						
						
						
						
					 
					
						2025-08-08 10:28:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e789cad6b8 
					 
					
						
						
							
							[gpt-oss] triton kernel mxfp4 ( #22421 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: <zyy1102000@gmail.com >
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-08 08:24:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e5ebeeba53 
					 
					
						
						
							
							Remove exception for Python 3.8 typing from linter ( #22506 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-08 03:06:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7be7f3824a 
					 
					
						
						
							
							[Docs] Improve API docs (+small tweaks) ( #22459 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-08 03:02:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ccdae737a0 
					 
					
						
						
							
							[BugFix] Don't cancel asyncio tasks directly from destructors ( #22476 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-08 01:13:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						904063907c 
					 
					
						
						
							
							[Misc] fix openai version ( #22485 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-08-08 01:12:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						43c4f3d77c 
					 
					
						
						
							
							[Misc] Begin deprecation of get_tensor_model_*_group ( #22494 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-08 01:11:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1712543df6 
					 
					
						
						
							
							[CI/Build] Fix multimodal tests ( #22491 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-08 00:31:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						808a7b69df 
					 
					
						
						
							
							[bench] Fix benchmark/serve.py to ignore unavailable results ( #22382 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun <github@lkchen.net > 
						
						
					 
					
						2025-08-07 23:15:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						099c046463 
					 
					
						
						
							
							[Doc] Sleep mode documentation ( #22310 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: iAmir97 <Amir.balwel@embeddedllm.com >
Signed-off-by: iAmir97 <71513472+iAmir97@users.noreply.github.com >
Co-authored-by: iAmir97 <Amir.balwel@embeddedllm.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Hong Hanh <hanh.usth@gmail.com >
Co-authored-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-08-08 12:25:18 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						af473f0a85 
					 
					
						
						
							
							[bugfix] Fix Llama3/4 issues caused by FlashInfer 0.2.10 ( #22426 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Po-Han Huang <pohanh@nvidia.com > 
						
						
					 
					
						2025-08-07 20:25:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						157f9c1368 
					 
					
						
						
							
							Fix pre-commit ( #22487 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-07 20:21:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f287915d8 
					 
					
						
						
							
							Optimize MiniCPMO mask creation with vectorized implementation ( #22464 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com >
Signed-off-by: zitian zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-08-07 20:18:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c152e2a8a0 
					 
					
						
						
							
							not tie_word_embeddings for glm-4.5 and glm-4.5v ( #22460 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com > 
						
						
					 
					
						2025-08-07 19:37:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						17eaaef595 
					 
					
						
						
							
							[Bugfix] Fix RuntimeError: Index put requires the source and destination dtypes match ( #22065 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-08-07 19:20:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3303f134e0 
					 
					
						
						
							
							[Kernel] Add support for block FP8 on SM120 (NVIDIA 5090 and RTX PRO 6000) ( #22131 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Junhao Li <junhao@ubicloud.com > 
						
						
					 
					
						2025-08-07 19:18:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2c8ce57c6 
					 
					
						
						
							
							Fix Flashinfer CUTLASS MOE Allgather ( #21963 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shu Wang <shuw@nvidia.com > 
						
						
					 
					
						2025-08-07 19:18:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3b9c17b56 
					 
					
						
						
							
							Support Tensorrt-LLM MoE fp4 for low-latency ( #21331 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shu Wang <shuw@nvidia.com >
Signed-off-by: Po-Han Huang <pohanh@nvidia.com >
Signed-off-by: Shu Wang. <shuw@nvidia.com >
Signed-off-by: XIn Li <xinli@nvidia.com >
Co-authored-by: XIn Li <xinli@nvidia.com > 
						
						
					 
					
						2025-08-07 19:18:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d57dc2364e 
					 
					
						
						
							
							Add ModelOpt Qwen3 nvfp4 support ( #20101 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com > 
						
						
					 
					
						2025-08-07 19:18:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e2c8f1edec 
					 
					
						
						
							
							[PERF] Use pybase64 to more quickly decode prompt embeddings ( #22469 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Sansom <andrew@protopia.ai > 
						
						
					 
					
						2025-08-07 19:15:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1ee5ead5f8 
					 
					
						
						
							
							[ROCm] [V1] [SpecDec] Enable Speculative Decoding on ROCm V1 Engine ( #21496 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-08-07 19:13:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						acf8aeb79e 
					 
					
						
						
							
							[Misc] normalize multiprocessing Queue usage ( #22371 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-08 01:57:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e3a8dc906 
					 
					
						
						
							
							Remove from_dict from SpeculativeConfig ( #22451 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-07 10:13:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						139d155781 
					 
					
						
						
							
							[Frontend] Use engine argument to control MM cache size ( #22441 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-07 09:47:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c9da6be22 
					 
					
						
						
							
							[Core] Simplify mm processing cache ( #22457 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-07 09:47:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						399d2a10e2 
					 
					
						
						
							
							Fix pre-commit error in main ( #22462 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-07 08:54:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4815b00f54 
					 
					
						
						
							
							[gpt-oss] Generate ResponseOutputItem from Harmony Message ( #22410 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-07 08:33:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4da8bf20d0 
					 
					
						
						
							
							[Tool] Fix auto tool call ( #22434 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-07 07:03:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e0b121812 
					 
					
						
						
							
							[Bugfix] Add missing packed_modules_mapping to DeepseekV2ForCausalLM ( #22352 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Felix Marty <Felix.Marty@amd.com > 
						
						
					 
					
						2025-08-07 06:30:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						766bc8162c 
					 
					
						
						
							
							[Core] Store only the keys for multi-modal data in P0 ( #22198 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-07 01:45:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						289b18e670 
					 
					
						
						
							
							[Docs] Update features/disagg_prefill, add v1 examples and development ( #22165 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Chen <530634352@qq.com > 
						
						
					 
					
						2025-08-07 00:59:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35171b1172 
					 
					
						
						
							
							[Doc] update docs for nightly benchmarks ( #12022 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Chan <andrewkchan.akc@gmail.com > 
						
						
					 
					
						2025-08-07 00:29:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a2c6696bfe 
					 
					
						
						
							
							[Docs] Factor out troubleshooting to its own guide; add section for Ray Observability ( #21578 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-08-07 00:29:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e8398805e 
					 
					
						
						
							
							[Doc] Fix link to prefix caching design ( #22384 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-08-07 00:28:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						136825de75 
					 
					
						
						
							
							[Misc] Enhance code formatting in mxfp4.py  ( #22423 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-07 00:26:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c2dba2dba8 
					 
					
						
						
							
							Add H20-3e fused MoE kernel tuning configs for GLM-4.5 ( #22433 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shaojunqi <shaojunqi.sjq@alibaba-inc.com >
Co-authored-by: shaojunqi <shaojunqi.sjq@alibaba-inc.com > 
						
						
					 
					
						2025-08-07 00:24:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						434d2f3f7a 
					 
					
						
						
							
							[Docs] Add missing dependency for docs build ( #22435 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-07 00:22:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e8e0b6af1 
					 
					
						
						
							
							feat: Add --enable-log-outputs flag for logging model generations ( #20707 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Adrian Garcia <adrian.garcia@inceptionai.ai > 
						
						
					 
					
						2025-08-06 23:10:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82216dc21f 
					 
					
						
						
							
							[Misc] Support routing logic simulation ( #21990 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com >
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-08-06 23:06:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						370661856b 
					 
					
						
						
							
							[Frontend] Update OpenAI error response to upstream format ( #22099 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Moritz Sanft <58110325+msanft@users.noreply.github.com > 
						
						
					 
					
						2025-08-06 23:06:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cbc8457b26 
					 
					
						
						
							
							[Model] Switch to Fused RMS norm in Qwen2.5_VL model. ( #22184 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kf <kuanfu.liu@embeddedllm.com >
Signed-off-by: tjtanaavllm <tunjian.tan@amd.com >
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: kf <kuanfu.liu@embeddedllm.com > 
						
						
					 
					
						2025-08-06 23:05:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d4297e8fe 
					 
					
						
						
							
							[Bench] Split serve.py:main into async/async versions ( #22405 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun <github@lkchen.net > 
						
						
					 
					
						2025-08-06 23:05:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2a4c825523 
					 
					
						
						
							
							[CI] Skip the pooling models that do not support transformers v4.55 ( #22411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-06 23:05:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4be02a3776 
					 
					
						
						
							
							[Bugfix] EPLB load statistics problem ( #22167 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ycyaw66 <497410282@qq.com >
Signed-off-by: David Chen <530634352@qq.com >
Co-authored-by: ycyaw66 <497410282@qq.com > 
						
						
					 
					
						2025-08-07 04:07:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f6278b6243 
					 
					
						
						
							
							[gpt-oss] Convert user input to harmony format ( #22402 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-06 20:56:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad6c655dde 
					 
					
						
						
							
							preload heavy modules when mp method is forkserver ( #22214 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lionel Villard <villard@us.ibm.com > 
						
						
					 
					
						2025-08-06 20:33:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14bcf93a6a 
					 
					
						
						
							
							Optimize logger init performance by using module-level constants ( #22373 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-08-06 20:32:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ecbea55ca2 
					 
					
						
						
							
							Update hf_xet pin to resolve hangs ( #22356 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-06 20:31:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						609b533cb6 
					 
					
						
						
							
							[Bugfix] Add proper comparison for package versions ( #22314 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Syed Muhammad Bin Asif <syedmba7@connect.hku.hk > 
						
						
					 
					
						2025-08-06 20:31:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e9455ae8f 
					 
					
						
						
							
							[Bugfix]: Fix the streaming output for function calls in the minimax ( #22015 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: QscQ <qscqesze@gmail.com >
Signed-off-by: qingjun <qingjun@minimaxi.com > 
						
						
					 
					
						2025-08-06 20:30:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a00d8b236f 
					 
					
						
						
							
							Use float32 for test_completion.py ( #22385 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-07 11:07:47 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04cf435d95 
					 
					
						
						
							
							[Bugfix] Fix wrong method name in Intern-S1 image processor ( #22417 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-06 20:05:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7377131a2c 
					 
					
						
						
							
							[Qwen3] Enable dual-chunk-attention support for Qwen3 models. ( #21924 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com > 
						
						
					 
					
						2025-08-06 19:58:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b47ef24de 
					 
					
						
						
							
							[XPU]Fix flash_attn_varlen_func interface on xpu ( #22350 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-08-06 19:28:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1dc8a70b6d 
					 
					
						
						
							
							[Attention] Support multiple attention metadata builders per kv_cache_spec  + proper local attention no hybrid kv cache fix ( #21588 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-08-06 18:40:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f825c6bd22 
					 
					
						
						
							
							Support encoder_only attention for FlexAttention ( #22273 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-08-06 18:37:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41b67f4263 
					 
					
						
						
							
							[model] Support MiniCPM-V 4.0 ( #22166 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: imning3 <hbning@pku.edu.cn > 
						
						
					 
					
						2025-08-06 18:35:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e8961e963a 
					 
					
						
						
							
							Update flashinfer-python==0.2.10 ( #22389 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-06 18:10:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a3835aaa9 
					 
					
						
						
							
							Fix trtllm-gen attention env and add attention sink ( #22378 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Fu <siyuanf@nvidia.com >
Signed-off-by: Lain <fusiyuan2000@hotmail.com >
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-06 18:07:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c7cc33f4d 
					 
					
						
						
							
							[gpt-oss] fix model config with hf_config ( #22401 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-06 18:04:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19c9365aa4 
					 
					
						
						
							
							[gpt-oss] add demo tool server ( #22393 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-08-06 17:47:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eec890c1c1 
					 
					
						
						
							
							[Bug] Fix B200 DeepGEMM E8M0 Accuracy Issue ( #22399 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-06 17:03:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						46a13949d5 
					 
					
						
						
							
							[v1] - Mamba1 Attention Metadata ( #21249 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: asafg <asafg@ai21.com >
Co-authored-by: asafg <asafg@ai21.com > 
						
						
					 
					
						2025-08-06 17:03:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31f09c615f 
					 
					
						
						
							
							[gpt-oss] flashinfer mxfp4 ( #22339 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <xmo@berkeley.edu >
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu > 
						
						
					 
					
						2025-08-06 12:37:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31f5dc5b2a 
					 
					
						
						
							
							[gpt-oss] Enhance error msg on attention sink init ( #22335 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <xmo@berkeley.edu >
Signed-off-by: Yongye Zhu <zyy1102000@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu > 
						
						
					 
					
						2025-08-06 11:41:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec7cb19224 
					 
					
						
						
							
							[gpt-oss] Add loop for built-in tool call ( #22374 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: LiuXiaoxuanPKU <lilyliupku@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Minseok Lee <47620120+minseokl@users.noreply.github.com >
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-06 10:32:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2435ea7ed5 
					 
					
						
						
							
							[Bugfix] Make condition in triton kernel constexpr ( #22370 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-08-06 10:00:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4a6b72c2ab 
					 
					
						
						
							
							[BugFix] Fix triton compile error in kernel_unified_attention_2/3d caused by attention sinks ( #22368 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: LucasWilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-08-06 09:47:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4b9813b5e 
					 
					
						
						
							
							add the codes to check AMD Instinct GPU number ( #22367 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zhang Jason <ning.zhang2@amd.com > 
						
						
					 
					
						2025-08-06 08:58:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2cb6ef8996 
					 
					
						
						
							
							[BugFix] Fix FA2 RuntimeError when sinks is provided ( #22365 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: LucasWilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-08-06 08:03:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9edd1db02b 
					 
					
						
						
							
							[Minor] Fix type  ( #22347 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-06 02:22:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f263a4b53f 
					 
					
						
						
							
							[gpt-oss] Support chat completion api ( #22342 )  
						
						 
						
						
						
						
					 
					
						2025-08-06 01:57:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54991c548a 
					 
					
						
						
							
							[gpt-oss] add model to supported models doc ( #22336 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-06 01:49:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						178d03fbd6 
					 
					
						
						
							
							[gpt-oss] Add Tool/ConversationContext classes and harmony_utils ( #22340 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: LiuXiaoxuanPKU <lilyliupku@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Minseok Lee <47620120+minseokl@users.noreply.github.com >
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-06 01:08:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fa00c5d75b 
					 
					
						
						
							
							[Misc] Clean up duplicated hf overrides ( #22311 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-06 07:50:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						134a8ee8fd 
					 
					
						
						
							
							[gpt-oss] Add openai-harmony as default dependency ( #22332 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: LiuXiaoxuanPKU <lilyliupku@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Minseok Lee <47620120+minseokl@users.noreply.github.com >
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-06 00:10:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90ec006937 
					 
					
						
						
							
							[gpt-oss] flashinfer attention sink init ( #22330 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: LiuXiaoxuanPKU <lilyliupku@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Minseok Lee <47620120+minseokl@users.noreply.github.com > 
						
						
					 
					
						2025-08-05 23:48:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a47e6ffe93 
					 
					
						
						
							
							[GptOss] Add GptOss reasoning parser to support structure output ( #22322 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: LiuXiaoxuanPKU <lilyliupku@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Minseok Lee <47620120+minseokl@users.noreply.github.com >
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-05 23:39:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98a3a81024 
					 
					
						
						
							
							[ROCm] Add attention sink to use_rocm_custom_paged_attention ( #22329 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: LiuXiaoxuanPKU <lilyliupku@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Minseok Lee <47620120+minseokl@users.noreply.github.com >
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-05 23:30:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de98252f49 
					 
					
						
						
							
							Add GPT-OSS model code and config [1/N] ( #22327 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-05 23:26:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						796bae07c5 
					 
					
						
						
							
							Update transformers to v4.55 ( #21931 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-05 22:56:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e20924350 
					 
					
						
						
							
							Add attention sink in attention backends ( #22320 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Co-authored-by: LiuXiaoxuanPKU <lilyliupku@gmail.com >
Co-authored-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com >
Co-authored-by: Minseok Lee <47620120+minseokl@users.noreply.github.com >
Co-authored-by: Yongye Zhu <zyy1102000@gmail.com > 
						
						
					 
					
						2025-08-05 22:37:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd16bdc798 
					 
					
						
						
							
							Increase openai-python version ( #22316 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-05 21:43:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3c876dca3 
					 
					
						
						
							
							Upgrade FA3 for attention sink ( #22313 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-05 21:36:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5d5d419ca6 
					 
					
						
						
							
							[Bugfix][CI/Build][ROCm] Make sure to use the headers from the build folder on ROCm ( #22264 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-08-05 20:39:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						302962e806 
					 
					
						
						
							
							[Bugfix] Skip dead and non-GPU nodes for Ray DP engine allocation ( #22275 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-08-05 20:35:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e6544c797 
					 
					
						
						
							
							[Perf] Parallelize fill_bitmask to accelerate high-throughput guided decoding ( #21862 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai > 
						
						
					 
					
						2025-08-05 19:57:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e6c7e873f 
					 
					
						
						
							
							[Bugfix] Fix MoE BNB version ( #22260 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-05 19:56:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6a51530437 
					 
					
						
						
							
							[Bugfix] Fix 3D input passed into cutlass_scaled_mm ( #22278 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-06 10:35:20 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35509fc5be 
					 
					
						
						
							
							[Bugfix] Remove faulty test for oot attention backend ( #22286 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-06 00:05:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b29d2784b 
					 
					
						
						
							
							[CI][TPU] Fix docker clean up ( #22271 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com > 
						
						
					 
					
						2025-08-05 23:54:56 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59a0b8554b 
					 
					
						
						
							
							[bugfix] fix blackwell deepep installation ( #22255 )  
						
						 
						
						
						
						
					 
					
						2025-08-06 01:26:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						469b3ffaaa 
					 
					
						
						
							
							[V1] port xformers backend to v1 ( #21342 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Giancarlo Delfin <gdelfin@meta.com > 
						
						
					 
					
						2025-08-05 10:04:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ae87ddd040 
					 
					
						
						
							
							[Refactor] Remove Unused Environment Variable VLLM_NO_DEPRECATION_WARNING ( #22199 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-05 09:40:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a7cb6101ca 
					 
					
						
						
							
							[CI/Build] Update flashinfer to 0.2.9 ( #22233 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-05 09:39:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c494f96fbc 
					 
					
						
						
							
							Use UV_LINK_MODE=copy in Dockerfile to avoid hardlink fail ( #22128 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-05 06:57:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c275ad5ad 
					 
					
						
						
							
							[V0 Deprecation][TPU] Remove V1 flag check from tests ( #22248 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-05 06:53:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						74333ae2f6 
					 
					
						
						
							
							[Misc] correct static type check for GroupCoordinator ( #21946 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-05 03:17:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83156c7b89 
					 
					
						
						
							
							[NVIDIA] Support Flashinfer TRT-LLM Prefill Attention Kernel ( #22095 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-08-05 02:45:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4771df7b2b 
					 
					
						
						
							
							[Feature] Non-contiguous Support for FP8 Quantization ( #21961 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-05 02:36:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05fae02175 
					 
					
						
						
							
							Migrate KimiVLImagePixelInputs to TensorSchema ( #21769 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-05 02:36:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1bf1b9711 
					 
					
						
						
							
							[Docs][TPU] Highlight TPU Software version selection ( #22242 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-05 02:33:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						586f286789 
					 
					
						
						
							
							[Model] Pooling model activation supports per request control by PoolingParams ( #20538 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-08-05 00:37:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						811ac13d03 
					 
					
						
						
							
							[Core] Factor out common logic for MM budget calculation ( #22228 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-04 23:54:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e79a12fc3a 
					 
					
						
						
							
							[UX] Fail if an invalid attention backend is specified ( #22217 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <michael@neuralmagic.com > 
						
						
					 
					
						2025-08-04 23:54:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cdfd6871a5 
					 
					
						
						
							
							[Bugfix] Misaligned params in TreeAttentionImpl ( #22226 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-04 22:40:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b3e4474d7 
					 
					
						
						
							
							Optimize configuration access with LRU cache in custom ops ( #22204 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-08-04 21:43:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bd3db7f469 
					 
					
						
						
							
							[Misc] log more detailed message for ensure_model_parallel_initialized ( #22144 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-04 19:36:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						29b97c0995 
					 
					
						
						
							
							[Doc] add backend to doc string of initialize_model_parallel ( #22142 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-04 19:36:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b455cf1c0 
					 
					
						
						
							
							[Misc] Remove pass_config from CompilationConfig dump_json excluded ( #21911 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-08-04 19:17:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a6e108e76 
					 
					
						
						
							
							fix: kimi_k2 return empty tool call list ( #22149 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tlipoca9 <tlipoca9@gmail.com > 
						
						
					 
					
						2025-08-04 19:15:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d7b28f3415 
					 
					
						
						
							
							[Log] DeepGEMM Update Log for Unaligned Problem Size ( #22208 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-04 19:13:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6fa41e0c32 
					 
					
						
						
							
							self.gate dtype update for GLM-4.5 ( #22203 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com > 
						
						
					 
					
						2025-08-04 19:12:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						031ca762d7 
					 
					
						
						
							
							[ROCm][Bugfix] Compilation passes fix ( #22202 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-08-04 19:12:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ad6b8e115 
					 
					
						
						
							
							[FEAT] Refactor ROPE into module ( #22192 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-08-04 19:12:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4f4e7ef27 
					 
					
						
						
							
							[V0 deprecation][P/D] Deprecate v0 KVConnectorBase code (1/2) ( #21785 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun Chen <github@lkchen.net > 
						
						
					 
					
						2025-08-04 19:11:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5ea71ff46f 
					 
					
						
						
							
							[V1] reduce block size for tree attention correctness test to fix 'ou… ( #22207 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Giancarlo Delfin <gdelfin@meta.com > 
						
						
					 
					
						2025-08-04 19:11:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7175817637 
					 
					
						
						
							
							Revert "[Bugfix] V1 Fix the cursor leakage issue during request scheduling." ( #22223 )  
						
						 
						
						
						
						
					 
					
						2025-08-04 18:37:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2dffac464c 
					 
					
						
						
							
							[Bugfix] V1 Fix the cursor leakage issue during request scheduling. ( #21173 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: CLFutureX <775523362@qq.com > 
						
						
					 
					
						2025-08-04 18:34:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bdcb42e45d 
					 
					
						
						
							
							[NVIDIA] Auto detect modelopt quant and fix DSR1-FP4 weight loading ( #22073 )  
						
						 
						
						
						
						
					 
					
						2025-08-04 21:02:55 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c09efff976 
					 
					
						
						
							
							[Bugfix][V1][P/D]Fix the uneven polling issue in the toy proxy for P2pNcclConnector ( #21819 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abatom <abzhonghua@gmail.com > 
						
						
					 
					
						2025-08-04 20:17:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						309c1bb822 
					 
					
						
						
							
							[Bug] Update auto_tune.sh to separate benchmarking and profiling. ( #21629 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eric Hanley <ericehanley@google.com > 
						
						
					 
					
						2025-08-04 15:12:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9af654cc38 
					 
					
						
						
							
							[Responses API] Ignore store=True and process the request by default ( #22185 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-04 05:12:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a5fff3bd49 
					 
					
						
						
							
							Fix Arcee model weight loading: Add custom load_weights ( #21725 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: alyosha-swamy <raghav@arcee.ai > 
						
						
					 
					
						2025-08-04 04:09:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1539ced93a 
					 
					
						
						
							
							[Doc] Update pooling model docs ( #22186 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-04 03:37:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54de71d0df 
					 
					
						
						
							
							[Sampler] Support returning all logprobs or logits ( #21792 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-08-04 03:04:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fed5849d3f 
					 
					
						
						
							
							[Bugfix] Fix failing GGUF models test ( #22174 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-04 01:27:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c1b4eb048a 
					 
					
						
						
							
							[feat] move WEIGHT_SCALE_SUPPORTED into raise block to accelerate RLHF weight loading ( #21164 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: huangweixiao <huangweixiao@msh.team > 
						
						
					 
					
						2025-08-04 15:43:06 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a7b8788d2c 
					 
					
						
						
							
							[Misc] Modify the organization of GLM series  ( #22171 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-03 23:51:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8ecb3e9e93 
					 
					
						
						
							
							[CI Bugfix] Fix wNa16 kernel not found for test_shared_storage_connector_hashes ( #22163 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-08-03 22:19:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e5949e5ae0 
					 
					
						
						
							
							Remove index_put from MM embeddings merging ( #22105 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Chenxi Yang <cxyang@meta.com > 
						
						
					 
					
						2025-08-03 22:15:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						49bcd893e7 
					 
					
						
						
							
							[refactor] improve ConstantList exception specificity ( #22156 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-08-03 22:14:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa7012eb6d 
					 
					
						
						
							
							Add tree attention backend for v1 (part 1) ( #20401 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Giancarlo Delfin <gdelfin@meta.com > 
						
						
					 
					
						2025-08-03 22:13:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c2e75b3c11 
					 
					
						
						
							
							remove duplicate code within cleanup_dist_env_and_memory ( #22147 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-03 20:03:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d7db16a92 
					 
					
						
						
							
							[PD] add test for chat completions endpoint ( #21925 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abirdcfly <fp544037857@gmail.com > 
						
						
					 
					
						2025-08-03 19:57:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						845420ac2c 
					 
					
						
						
							
							[RLHF] Fix torch.dtype not serializable in example ( #22158 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-08-04 02:43:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e27d25a0dc 
					 
					
						
						
							
							[fix] fix correct assertion syntax error in attention utils. ( #22154 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-08-03 19:24:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f5478298d 
					 
					
						
						
							
							Use aiohttp connection pool for benchmarking ( #21981 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-08-03 19:23:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6a39ba85fe 
					 
					
						
						
							
							[Bugfix] Fix failing multimodal standard test ( #22153 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-08-03 19:04:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3c18c9cb0 
					 
					
						
						
							
							fuse fp32 for GLM-4.5 e_score_correction_bias ( #22143 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com > 
						
						
					 
					
						2025-08-03 09:04:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83f7bbb318 
					 
					
						
						
							
							Add chat doc in quick start ( #21213 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-08-03 07:47:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b5dfb94fa0 
					 
					
						
						
							
							[CI/Build][Bugfix] Fix Qwen2.5 tests in CPU CI via fallback silu_and_mul to torch native implementation ( #22145 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-08-03 05:34:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d98843b31 
					 
					
						
						
							
							[Responses API] Disable response store by default ( #22137 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-03 04:04:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aefeea0fde 
					 
					
						
						
							
							[V1] [P/D] Refactor KV Connector Path ( #21980 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Ben-David <davidb@pliops.com >
Co-authored-by: David Ben-David <davidb@pliops.com > 
						
						
					 
					
						2025-08-03 04:03:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24d1dffbeb 
					 
					
						
						
							
							[executor] feat: add supports_pp attr to executors ( #21786 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Haibin Lin <haibin.lin@bytedance.com > 
						
						
					 
					
						2025-08-03 18:04:45 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7de45db9a5 
					 
					
						
						
							
							[Misc] update doc comment for send ( #22026 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-08-03 00:55:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						789562c28c 
					 
					
						
						
							
							Support CUTLASS NVFP4 (w4a4) for Blackwell Geforce GPUs (SM120) ( #21309 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: LopezCastroRoberto <roberto.lopez.castro@udc.es > 
						
						
					 
					
						2025-08-03 00:54:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3f36c325fa 
					 
					
						
						
							
							[Benchmark] Support ready check timeout in vllm bench serve ( #21696 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-03 00:52:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3dddbf1f25 
					 
					
						
						
							
							[Misc] Add tensor schema test coverage for multimodal models ( #21754 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-03 00:52:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						337eb23bcc 
					 
					
						
						
							
							[Fix] Fix llama4 modelopt weight loading error ( #22107 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-03 00:50:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ff46b8826 
					 
					
						
						
							
							[Misc] Bump ray to 2.48.0 ( #22123 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-08-02 19:42:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						554df8a6a2 
					 
					
						
						
							
							Revert "[compile][startup] Disable C++ compilation of symbolic shapes" ( #22122 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiao Liu <xiszishu@gmail.com > 
						
						
					 
					
						2025-08-02 09:03:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						73e1b9b1d4 
					 
					
						
						
							
							[xpu]support moe models on XPU platform ( #21643 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yan <yan.ma@intel.com >
Signed-off-by: Yan Ma <yan.ma@intel.com > 
						
						
					 
					
						2025-08-02 07:49:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4abfd8796f 
					 
					
						
						
							
							[V1] [Hybrid] Validate compatibility of attention backend batch reordering at init time ( #21557 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-08-02 05:29:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f5d0f4784f 
					 
					
						
						
							
							[Frontend] Improve error message for too many mm items ( #22114 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-02 02:20:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b690e34824 
					 
					
						
						
							
							[Model] Mamba2 preallocate SSM output tensor to avoid d2d copy overhead ( #21075 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com >
Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com > 
						
						
					 
					
						2025-08-02 01:59:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						25373b6c6c 
					 
					
						
						
							
							for glm-4.1V update ( #22000 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com >
Co-authored-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-02 01:46:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						58eee5f2e0 
					 
					
						
						
							
							[PERF] Use faster way of decode in tokenizer: avoid useless list-to-list conversion ( #20000 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Vadim Gimpelson <vadim.gimpelson@centml.ai > 
						
						
					 
					
						2025-08-02 01:43:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						067c34a155 
					 
					
						
						
							
							docs: remove deprecated disable-log-requests flag ( #22113 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-02 00:19:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c64861d63c 
					 
					
						
						
							
							[Bugfix] Mamba2 remove bugged initial state condition in chunk scan ( #22034 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com > 
						
						
					 
					
						2025-08-01 23:55:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8564dc9448 
					 
					
						
						
							
							Fix test_kv_sharing_fast_prefill flakiness ( #22038 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-08-01 23:55:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ac8437352 
					 
					
						
						
							
							[Misc] Getting and passing ray runtime_env to workers ( #22040 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-08-01 23:54:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3a6f2120b 
					 
					
						
						
							
							[FEAT][ROCm] Enable running Flash Attention as ViT attn backend for Qwen-VL models on ROCm platform. ( #22069 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaavllm <tunjian.tan@amd.com >
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: tjtanaavllm <tunjian.tan@amd.com > 
						
						
					 
					
						2025-08-01 23:53:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0edaf752d7 
					 
					
						
						
							
							[Attention][DBO] Add support for "splitting" the CommonAttentionMetadata ( #21153 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sage Moore <sage@neuralmagic.com > 
						
						
					 
					
						2025-08-01 19:47:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e8d8c4afb 
					 
					
						
						
							
							[Test] Add Unit Test for Batched DeepGEMM ( #21559 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-02 10:45:46 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d524ce79f 
					 
					
						
						
							
							[BugFix] Improve internal DP load balancing ( #21617 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-01 19:45:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9f9c38c392 
					 
					
						
						
							
							[Speculators][Speculative Decoding] Add Qwen Eagle3 Support ( #21835 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com > 
						
						
					 
					
						2025-08-01 19:43:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a65f46be5e 
					 
					
						
						
							
							[Misc] DeepGemmExperts : Avoid JIT generation in the hot-path ( #21955 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-08-01 19:42:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						57393715e8 
					 
					
						
						
							
							[Misc] VLLM_TARGET_DEVICE.lower() ( #22101 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-08-01 19:41:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee2eb6ecd8 
					 
					
						
						
							
							[Model] Qwen2.5 VL SiLU-and-Mul ( #22066 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kf <kuanfu.liu@embeddedllm.com >
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: kf <kuanfu.liu@embeddedllm.com > 
						
						
					 
					
						2025-08-01 19:34:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23322431c8 
					 
					
						
						
							
							[V1][CUDA] Full cudagraph support for FlashInfer ( #21367 )  
						
						 
						
						
						
						
					 
					
						2025-08-01 21:49:34 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3654847db5 
					 
					
						
						
							
							feat: Add Support GPTQ Quantization MOE on ROCM vllm serve ( #21733 )  
						
						 
						
						
						
						
					 
					
						2025-08-01 21:12:19 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eefbf4a68b 
					 
					
						
						
							
							[Perf] Optimize reshape_and_cache_flash CUDA Kernel ( #22036 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-01 19:18:51 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88faa466d7 
					 
					
						
						
							
							[CI] Initial tests for SM100 Blackwell runner ( #21877 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-01 16:18:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						881e1af43a 
					 
					
						
						
							
							[BugFix] Harden distributed DP startup ( #21538 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-01 21:40:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d84b97a3e3 
					 
					
						
						
							
							Add lora test for tp>1 case for TPU. ( #21970 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com > 
						
						
					 
					
						2025-08-01 18:56:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d331759488 
					 
					
						
						
							
							Introduce RayPPCommunicator for ray-based PP ( #21660 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-08-01 11:50:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9659bc7f27 
					 
					
						
						
							
							[compile][startup] Disable C++ compilation of symbolic shapes ( #20836 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Animesh Jain <anijain@umich.edu > 
						
						
					 
					
						2025-08-01 10:38:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3277e8f9e1 
					 
					
						
						
							
							Fix pre-commit failure for SECURTIY.md ( #22102 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-08-01 10:36:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d705996df 
					 
					
						
						
							
							[Misc] Minor enhancement of benchmark_moe ( #22068 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-02 01:35:30 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						38c8bce8b6 
					 
					
						
						
							
							Enable headless models for pooling in the Transformers backend ( #21767 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-01 10:31:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ac45c44d98 
					 
					
						
						
							
							[Bugfix] [Performance] DeepEPHighThroughput + DeepSeek : Quant before Dispatch ( #21837 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-08-01 10:14:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6664664b4 
					 
					
						
						
							
							security policy: take 1 ( #21119 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huzaifa Sidhpurwala <huzaifas@redhat.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-08-01 10:09:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b879ecd6e2 
					 
					
						
						
							
							[Bugfix] fix when skip tokenizer init ( #21922 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-08-01 10:09:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3f8e952179 
					 
					
						
						
							
							[Bugfix] Fix glm4.1v video inference issue ( #22067 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-08-01 09:33:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						326a1b001d 
					 
					
						
						
							
							Improve documentation of ModelConfig.try_get_generation_config to prevent future confusion ( #21526 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-01 09:32:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d7b09b998 
					 
					
						
						
							
							Deprecate --disable-log-requests and replace with --enable-log-requests ( #21739 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-01 17:16:37 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						97608dc276 
					 
					
						
						
							
							[Docs] use uv in CPU installation docs ( #22089 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-08-01 07:55:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3146519add 
					 
					
						
						
							
							[BugFix] Don't change title of top-level process ( #22032 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-08-01 07:37:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8026a335a1 
					 
					
						
						
							
							[BugFix] Update AttnFusionPass cache key ( #21947 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com > 
						
						
					 
					
						2025-08-01 07:11:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a59cd9d9f7 
					 
					
						
						
							
							[Refactor] Fix Compile Warning #1444-D ( #21462 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-01 06:10:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c54d9759d 
					 
					
						
						
							
							[Bugfix][PD] set max_completion_tokens=1 if req has this value ( #21841 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abirdcfly <fp544037857@gmail.com > 
						
						
					 
					
						2025-08-01 06:08:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0a6d305e0f 
					 
					
						
						
							
							feat(multimodal): Add customizable background color for RGBA to RGB conversion ( #22052 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinheng Li <ahengljh@gmail.com >
Co-authored-by: Jinheng Li <ahengljh@gmail.com > 
						
						
					 
					
						2025-08-01 06:07:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f81c1bb055 
					 
					
						
						
							
							[Bugfix] Check NVIDIA artifactory is accessible before using flashinfer cubin kernels ( #21893 )  
						
						 
						
						
						
						
					 
					
						2025-08-01 08:28:45 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fb0e0d46fc 
					 
					
						
						
							
							Fix get_kwargs for case where type hint is list[Union[str, type]] ( #22016 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-01 05:26:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						26b5f7bd2a 
					 
					
						
						
							
							[BUG] [ROCm] Fix import bug on ROCm ( #22083 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-08-01 05:25:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dfbc1f8880 
					 
					
						
						
							
							[Speculative Decoding] Add speculators config support ( #21345 )  
						
						 
						
						
						
						
					 
					
						2025-08-01 08:25:18 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87c94bc879 
					 
					
						
						
							
							Revert "Update sampling_metadata.py ( #21937 )" ( #22088 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-08-01 05:24:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						28b18cc741 
					 
					
						
						
							
							[Quantization] Enable BNB support for InternS1 ( #21953 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-08-01 11:09:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4931486988 
					 
					
						
						
							
							[Doc] Added warning of speculating with draft model ( #22047 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dilute-l <dilu2333@163.com >
Co-authored-by: Dilute-l <dilu2333@163.com > 
						
						
					 
					
						2025-08-01 02:11:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f81b310db 
					 
					
						
						
							
							[Misc] Remove upper bound in openai package version ( #22060 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-08-01 02:11:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6680f9e25 
					 
					
						
						
							
							[Bugfix] Add log prefix in non-dp mode engine core ( #21889 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wuhang <wuhang6@huawei.com > 
						
						
					 
					
						2025-08-01 09:04:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27a145e893 
					 
					
						
						
							
							[Doc] Add example for Step3-VL ( #22061 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-08-01 08:35:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da31f6ad3d 
					 
					
						
						
							
							Revert precompile wheel changes ( #22055 )  
						
						 
						
						
						
						
					 
					
						2025-08-01 08:26:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98df153abf 
					 
					
						
						
							
							[Frontend] Align tool_choice="required" behavior with OpenAI when tools is empty ( #21052 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sungyoon Jeong <sungyoon.jeong@furiosa.ai > 
						
						
					 
					
						2025-08-01 07:54:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0f63e4a35 
					 
					
						
						
							
							[Core] Avoid repeated len(block_token_ids) check in hash_request_tokens ( #21781 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: linzebing <linzebing1995@gmail.com > 
						
						
					 
					
						2025-08-01 00:23:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4e081cb15 
					 
					
						
						
							
							[Bugfix] Disable multi-modal preprocessor cache for DP ( #21896 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-08-01 08:03:56 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						79731a79f0 
					 
					
						
						
							
							[Doc] Fix a syntax error of example code in structured_outputs.md ( #22045 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangzi <3220100013@zju.edu.cn >
Co-authored-by: wangzi <3220100013@zju.edu.cn > 
						
						
					 
					
						2025-08-01 00:01:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53d7c39271 
					 
					
						
						
							
							Update sampling_metadata.py ( #21937 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aviad Rossmann <aviadr@neureality.ai > 
						
						
					 
					
						2025-07-31 23:23:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61dcc280fa 
					 
					
						
						
							
							[Doc] Add Voxtral to Supported Models page ( #22059 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-31 23:10:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f46a780d4 
					 
					
						
						
							
							[Model] [Quantization] Support quantization for Gemma3n ( #21974 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyle Sayers <kylesayrs@gmail.com > 
						
						
					 
					
						2025-07-31 22:45:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e1a7fe4af5 
					 
					
						
						
							
							[BugFix] fix: aot passes kvcache dtype information ( #19750 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mickael Seznec <mickael@mistral.ai > 
						
						
					 
					
						2025-08-01 05:45:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82de9b9d46 
					 
					
						
						
							
							[Misc] Automatically resolve HF processor init kwargs ( #22005 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-31 22:44:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad57f23f6a 
					 
					
						
						
							
							[Bugfix] Fix: Fix multi loras with tp >=2 and LRU cache ( #20873 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charent <19562666+charent@users.noreply.github.com > 
						
						
					 
					
						2025-07-31 19:48:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3700642013 
					 
					
						
						
							
							[Refactor] Remove Duplicate per_block_cast_to_fp8, Remove Dependencies of DeepGEMM ( #21787 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-08-01 01:13:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0bd409cf01 
					 
					
						
						
							
							Move flashinfer-python to optional extra vllm[flashinfer] ( #21959 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-31 18:02:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e360316ab9 
					 
					
						
						
							
							Add DeepGEMM to Dockerfile in vllm-base image ( #21533 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-31 18:01:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3e0e9337e 
					 
					
						
						
							
							[Feature] Add Flashinfer MoE Support for Compressed Tensor NVFP4 ( #21639 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-31 15:26:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e672daf62 
					 
					
						
						
							
							Add FlashInfer allreduce RMSNorm Quant fusion ( #21069 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ilmarkov <imarkov@redhat.com >
Signed-off-by: ilmarkov <markovilya197@gmail.com >
Co-authored-by: ilmarkov <imarkov@redhat.com > 
						
						
					 
					
						2025-07-31 13:58:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2dff2e21d9 
					 
					
						
						
							
							[Bugfix] Fix MTP weight loading  ( #21941 )  
						
						 
						
						
						
						
					 
					
						2025-07-31 16:33:53 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71470bc4af 
					 
					
						
						
							
							[Misc] Add unit tests for chunked local attention ( #21692 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-07-31 11:39:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e0726e5bf 
					 
					
						
						
							
							[Meta] Official Eagle mm support, first enablement on llama4 ( #20788 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: morgendave <morgendave@gmail.com >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-07-31 10:35:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53c21e492e 
					 
					
						
						
							
							Update torch_xla pin to 20250730 ( #21956 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com > 
						
						
					 
					
						2025-07-31 17:26:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0780bb5783 
					 
					
						
						
							
							Removing amdproduction Tests ( #22027 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com > 
						
						
					 
					
						2025-07-31 09:53:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						58bb902186 
					 
					
						
						
							
							fix(setup): improve precompiled wheel setup for Docker builds ( #22025 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dougbtv <dosmith@redhat.com > 
						
						
					 
					
						2025-07-31 09:52:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7349d5268b 
					 
					
						
						
							
							[ez] Remove a trailing space from compilation/decorators.py ( #22028 )  
						
						 
						
						
						
						
					 
					
						2025-07-31 09:46:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9484641616 
					 
					
						
						
							
							[Model] Add step3 vl ( #21998 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: oliveryuan <yuansong@step.ai >
Co-authored-by: oliveryuan <yuansong@step.ai > 
						
						
					 
					
						2025-07-31 23:19:06 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						207b750e19 
					 
					
						
						
							
							[NVIDIA] Add SM100 Flashinfer MoE per tensor scale fp8 backend ( #21458 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-31 06:00:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5daffe7cf6 
					 
					
						
						
							
							[BugFix] Fix case where collective_rpc returns None ( #22006 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-31 12:51:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2836dd73f1 
					 
					
						
						
							
							[Model][CI] Let more pooling models support v1 ( #21747 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-07-31 01:51:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d2aab336ad 
					 
					
						
						
							
							[CI/Build] get rid of unused VLLM_FA_CMAKE_GPU_ARCHES ( #21599 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com > 
						
						
					 
					
						2025-07-31 15:00:08 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9532a6d563 
					 
					
						
						
							
							[Deprecation] Remove deprecated args and methods ( #21907 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-30 23:46:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e36fcbee6 
					 
					
						
						
							
							[Bugfix]: fix metadata file copy in test_sharded_state_loader ( #21830 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-07-31 06:22:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						055bd3978e 
					 
					
						
						
							
							[CI Bugfix] Fix CI OOM for test_shared_storage_connector_hashes ( #21973 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-31 11:45:29 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f7919fca0 
					 
					
						
						
							
							[Misc] Expand SUPPORTED_HIDDEN_SIZES  for DeepEP low-latency kernels ( #21818 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-30 20:41:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61445453df 
					 
					
						
						
							
							[UX] Rename CUTLASS_MLA_VLLM_V1 to CUTLASS_MLA ( #21966 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-30 20:40:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec02e536df 
					 
					
						
						
							
							[Bugfix] Relax lang pin for voxtral ( #21833 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sanchit Gandhi <sgandhi3141@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-30 20:38:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9cb497bfa3 
					 
					
						
						
							
							[Example] Add async_llm_streaming.py example for AsyncLLM streaming in python ( #21763 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-30 18:39:46 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca9e2be3ed 
					 
					
						
						
							
							[Core] Move EngineCoreRequest to Request conversion out of EngineCore ( #21627 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: linzebing <linzebing1995@gmail.com > 
						
						
					 
					
						2025-07-30 15:00:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						601f856d56 
					 
					
						
						
							
							[Bugfix] Fix None value handling in trace span creation for cancelled requests ( #20272 )  
						
						 
						
						
						
						
					 
					
						2025-07-30 14:44:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						287f527f54 
					 
					
						
						
							
							[Feature] Add async tensor parallelism for scaled mm ( #20155 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com > 
						
						
					 
					
						2025-07-30 17:23:41 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f12d9256b3 
					 
					
						
						
							
							[Misc] Use dracut on CentOS and skip clone if repo exists for EP kernel installation ( #21635 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-07-30 13:15:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9b753e7a7 
					 
					
						
						
							
							For VLLM_USE_PRECOMPILED, only compiled .so files should be extracted ( #21964 )  
						
						 
						
						
						
						
					 
					
						2025-07-30 13:04:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56bd537dde 
					 
					
						
						
							
							[Misc] Support more collective_rpc return types ( #21845 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-30 10:20:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f0d516715 
					 
					
						
						
							
							[TPU] Support Pathways in vLLM ( #21417 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wenxindongwork <wenxindong@google.com > 
						
						
					 
					
						2025-07-30 10:02:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4135232b9 
					 
					
						
						
							
							feat(distributed): add get_required_kvcache_layout class method to kv connector api ( #20433 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wxsm <wxsms@foxmail.com > 
						
						
					 
					
						2025-07-30 16:41:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4904e53c32 
					 
					
						
						
							
							[Bugfix] SharedStorage Connector for V1 PD multimodal ( #21611 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: fake0fan <645327136@qq.com >
Signed-off-by: herotai214 <herotai214@gmail.com >
Co-authored-by: herotai214 <herotai214@gmail.com > 
						
						
					 
					
						2025-07-30 09:18:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						004203e953 
					 
					
						
						
							
							[CI/Build] Fix registry tests ( #21934 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-30 09:10:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c765aec65 
					 
					
						
						
							
							[Bugfix] Fix TypeError in scheduler when comparing mixed request_id types ( #21816 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chiliu <chiliu@paypal.com >
Co-authored-by: chiliu <chiliu@paypal.com > 
						
						
					 
					
						2025-07-30 08:54:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad510309ee 
					 
					
						
						
							
							Override attention metadata for fast prefill in some KV sharing setups ( #21590 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-07-30 08:54:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						366f6b3a4d 
					 
					
						
						
							
							[Bugfix] Fix multi-api server not working for text models ( #21933 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-30 08:42:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e599eebe8 
					 
					
						
						
							
							[Bugfix] Fix OOM tests in initialization test ( #21921 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-30 07:35:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88edf5994c 
					 
					
						
						
							
							[Docs] Reduce the size of the built docs ( #21920 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-30 07:35:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ff08e51940 
					 
					
						
						
							
							[NVIDIA] Fix Llama4 Scout FP4 functionality issues ( #21499 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Po-Han Huang <pohanh@nvidia.com > 
						
						
					 
					
						2025-07-30 07:33:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f4a1c9a04 
					 
					
						
						
							
							[Misc] Improve code readability of KVCacheManager ( #21673 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tanruixiang <tanruixiang0104@gmail.com >
Signed-off-by: Ruixiang Tan <819464715@qq.com >
Signed-off-by: GitHub <noreply@github.com > 
						
						
					 
					
						2025-07-30 07:20:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						36ede45989 
					 
					
						
						
							
							Reduce time wasted in GitHub Actions using concurrency ( #21919 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-30 07:18:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e40b26073 
					 
					
						
						
							
							[CI/Build] Only run markdownlint in CI ( #21892 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-30 07:17:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0271c2ff2f 
					 
					
						
						
							
							[Test] Add Benchmark and Unit Test for per_token_group_quant ( #21860 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-30 07:15:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e91d3c9cda 
					 
					
						
						
							
							[misc] skip p2p check by default ( #21904 )  
						
						 
						
						
						
						
					 
					
						2025-07-30 22:05:04 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf668b5bf5 
					 
					
						
						
							
							[Feature] Support multiple api keys in server ( #18548 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yan Pashkovsky <yanp.bugz@gmail.com > 
						
						
					 
					
						2025-07-30 07:03:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da3e0bd6e5 
					 
					
						
						
							
							[Bugfix] we should use metavar is not choices ( #21902 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-07-30 06:51:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fcfd1eb9c5 
					 
					
						
						
							
							[Doc] Remove vLLM prefix and add citation for PagedAttention ( #21910 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-30 06:36:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d979dd6beb 
					 
					
						
						
							
							[Feature][EPLB] Add eplb support for Qwen3 ( #20815 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: aladerran <aladerran@gmail.com > 
						
						
					 
					
						2025-07-30 06:27:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b876860c62 
					 
					
						
						
							
							[Hardware][CPU] Build fix for ARM without BF16 ( #21848 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eric Curtin <ecurtin@redhat.com > 
						
						
					 
					
						2025-07-30 06:22:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						13986365a9 
					 
					
						
						
							
							Add @patrickvonplaten as maintainer of mistral's related files. ( #21928 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com > 
						
						
					 
					
						2025-07-30 20:42:51 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c8fe389d6 
					 
					
						
						
							
							[Docs] Fix the example code of streaming chat completions in reasoning ( #21825 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangzi <3220100013@zju.edu.cn >
Co-authored-by: wangzi <3220100013@zju.edu.cn >
Co-authored-by: Zi Wang <66560864+BruceW-07@users.noreply.github.com > 
						
						
					 
					
						2025-07-30 12:11:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5bbaf492a6 
					 
					
						
						
							
							[Doc] Update partial support ( #21916 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-30 01:32:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						533db0935d 
					 
					
						
						
							
							[benchmark] add max-concurrency in result table ( #21095 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Pan <Peter.Pan@daocloud.io > 
						
						
					 
					
						2025-07-30 01:15:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc91da5499 
					 
					
						
						
							
							[Model] Remove DSV2 unused code ( #21903 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-30 00:55:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						547795232d 
					 
					
						
						
							
							[Tests] Fixing bug inside MultiModalProfiler. ( #21842 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Shenoy <varun.vinayak.shenoy@oracle.com > 
						
						
					 
					
						2025-07-30 00:44:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						30ef30ed5a 
					 
					
						
						
							
							[CI] rollback lint-and-deploy pipeline using amd machine ( #21912 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-07-30 00:37:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						02f82fe438 
					 
					
						
						
							
							[Doc] Update Intern-S1 info  ( #21908 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-29 23:58:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ca5f82c2a 
					 
					
						
						
							
							[Misc] Remove redundant config definitions ( #21891 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-29 23:54:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f8d261882 
					 
					
						
						
							
							Update vLLM Benchmark Suite for Xeon based on 0.9.2 release  ( #21486 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tsai, Louie <louie.tsai@intel.com > 
						
						
					 
					
						2025-07-30 05:57:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4cd7fe6cea 
					 
					
						
						
							
							[Docs] Expand introduction to Ray in Multi-node deployment section ( #21584 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-29 22:07:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						16f3250527 
					 
					
						
						
							
							[CI/Build] Fix pre-commit failure in docs ( #21897 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-29 21:53:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3bc17ceea 
					 
					
						
						
							
							Add @sighingnow as maintainer of qwen's related files. ( #21895 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com > 
						
						
					 
					
						2025-07-29 21:30:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05cbbe20c5 
					 
					
						
						
							
							[XPU] use ZE_AFFINITY_MASK for device select on xpu ( #21815 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-07-30 03:56:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65f311ce59 
					 
					
						
						
							
							[Frontend] Add LLM.reward specific to reward models ( #21720 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-07-29 20:56:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b0a155534 
					 
					
						
						
							
							[Perf] Using __nv_fp8_e4m3 instead of c10::e4m3 for per_token_group_quant ( #21867 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-29 21:50:46 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						44bc46da60 
					 
					
						
						
							
							[Bugfix] Actually disable processing cache when API server is scaled out ( #21839 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-29 20:36:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b7b23da4d2 
					 
					
						
						
							
							[Bugfix] Fix comment typo of get_num_common_prefix_blocks() ( #21827 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: MingzhenHan <hanmingzhen2002@outlook.com > 
						
						
					 
					
						2025-07-29 20:35:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fdde18229e 
					 
					
						
						
							
							[Bugfix] Fix shape mismatch assertion error when loading Gemma3n model with BitsAndBytes quantization ( #21808 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sydarb <areebsyed237@gmail.com > 
						
						
					 
					
						2025-07-30 11:35:21 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b917da442b 
					 
					
						
						
							
							Expose PyTorch profiler configuration to environment variables ( #21803 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Csrayz <33659823+Csrayz@users.noreply.github.com > 
						
						
					 
					
						2025-07-29 19:46:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fb58e3a651 
					 
					
						
						
							
							[Docs] Update docker.md with HF_TOKEN, new model, and podman fix ( #21856 )  
						
						 
						
						
						
						
					 
					
						2025-07-29 19:45:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						76080cff79 
					 
					
						
						
							
							[DOC] Fix path of v1 related figures ( #21868 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-29 19:45:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba5c5e5404 
					 
					
						
						
							
							[Docs] Switch to better markdown linting pre-commit hook ( #21851 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-29 19:45:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						555e7225bc 
					 
					
						
						
							
							[v1][attention] Support Hybrid Allocator + FlashInfer ( #21412 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-07-30 01:45:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e36abf993 
					 
					
						
						
							
							[Bugfix] Correct max tokens for non-contiguous embeds ( #21798 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexandre Milesi <30204471+milesial@users.noreply.github.com >
Co-authored-by: Alexandre Milesi <30204471+milesial@users.noreply.github.com > 
						
						
					 
					
						2025-07-30 01:16:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						452b2a3180 
					 
					
						
						
							
							[ci] mark blackwell test optional for now ( #21878 )  
						
						 
						
						
						
						
					 
					
						2025-07-29 18:03:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d0cc9e150 
					 
					
						
						
							
							[ci] add b200 test placeholder ( #21866 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-07-29 17:11:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9266d98048 
					 
					
						
						
							
							[BugFix] Fix interleaved sliding window not set for Gemma3n ( #21863 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-07-29 16:34:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						176bbce1db 
					 
					
						
						
							
							Revert "[AMD][CI/Build] Fix the AMD issue caused by inappropriate of symbol exposure ( #21647 )" ( #21850 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-07-29 21:56:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a1873db23d 
					 
					
						
						
							
							docker: docker-aware precompiled wheel support ( #21127 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dougbtv <dosmith@redhat.com > 
						
						
					 
					
						2025-07-29 14:45:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a33ea28b1b 
					 
					
						
						
							
							Add flashinfer_python to CUDA wheel requirements ( #21389 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-29 12:51:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b49cb1c6b 
					 
					
						
						
							
							[Doc] update Contributing page's testing section ( #18272 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-07-29 10:32:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f03e9cf2bb 
					 
					
						
						
							
							[Doc] Add FusedMoE Modular Kernel Documentation ( #21623 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-29 10:32:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37f86d9048 
					 
					
						
						
							
							[Docs] use uv in GPU installation docs ( #20277 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-07-29 10:32:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						58b11b24a6 
					 
					
						
						
							
							[Bugfix] Fix workspace buffer None issue for Flashinfer TRTLLM Backend ( #21525 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-07-29 10:34:00 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad341c5194 
					 
					
						
						
							
							[Bugfix]fix mixed bits and visual language model quantization in AutoRound ( #21802 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wenhua Cheng <wenhua.cheng@intel.com > 
						
						
					 
					
						2025-07-29 07:26:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						759b87ef3e 
					 
					
						
						
							
							[TPU] Add an optimization doc on TPU ( #21155 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-29 07:23:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f693b067a2 
					 
					
						
						
							
							[Docs] Merge design docs for a V1 only future ( #21832 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-29 07:22:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04e38500ee 
					 
					
						
						
							
							[Bugfix] VLLM_V1 supports passing other compilation levels ( #19340 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com > 
						
						
					 
					
						2025-07-29 09:35:58 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ab714131e4 
					 
					
						
						
							
							[Doc] Update compatibility matrix for pooling and multimodal models ( #21831 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-29 06:29:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						755fa8b657 
					 
					
						
						
							
							[KVCache] Make KVCacheSpec hashable ( #21791 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-07-29 19:58:29 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2470419119 
					 
					
						
						
							
							[Docs] Fix the outdated URL for installing from vLLM binaries ( #21523 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kay Yan <kay.yan@daocloud.io >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-29 04:56:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61a6905ab0 
					 
					
						
						
							
							[Model] Refactor JambaForCausalLM ( #21394 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-29 18:25:07 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37efc63b64 
					 
					
						
						
							
							[V0 deprecation] Guided decoding ( #21347 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Reza Barazesh <rezabarazesh@meta.com >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-29 03:15:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a4528f0cac 
					 
					
						
						
							
							[Model]: Fused MoE for nomic-embed-text-v2-moe ( #18321 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-29 03:13:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a2480251ec 
					 
					
						
						
							
							[Doc] Link to RFC for pooling optimizations ( #21806 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-28 23:53:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7234fe2685 
					 
					
						
						
							
							[Misc] Rework process titles ( #21780 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-29 05:14:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f1e2c095ec 
					 
					
						
						
							
							Migrate InternVLImageInputs and InternVLVideoInputs to TensorSchema ( #21684 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-28 22:09:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						12a223ef9b 
					 
					
						
						
							
							[AMD][CI/Build][Bugfix] Guarding CUDA specific functions by ifndef ROCM ( #21766 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-07-29 03:35:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e18f085103 
					 
					
						
						
							
							skip fusedmoe layer for start_load_kv ( #21378 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <wen.chen@dynamia.ai > 
						
						
					 
					
						2025-07-28 18:59:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						afa2607596 
					 
					
						
						
							
							[CI] Parallelize Kernels MoE Test ( #21764 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-28 18:56:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48b763d6b5 
					 
					
						
						
							
							[Refactor] Merge Compressed Tensor FP8 CompressedTensorsW8A8Fp8MoEMethod and CompressedTensorsW8A8Fp8MoECutlassMethod ( #21775 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-28 19:47:21 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						947e982ede 
					 
					
						
						
							
							[Docs] Minimize spacing for supported_hardware.md table ( #21779 )  
						
						 
						
						
						
						
					 
					
						2025-07-28 18:46:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6c9122d50 
					 
					
						
						
							
							[Kernel] SM90 CUTLASS FP8 GEMM: add support for swap AB + kernel tuning ( #20396 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Faqin Zhong <faqin.zhong@gmail.com >
Co-authored-by: Duncan Moss <djm.moss@gmail.com > 
						
						
					 
					
						2025-07-28 23:13:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8aa1485fcf 
					 
					
						
						
							
							[Perf] Disable chunked local attention by default with llama4 ( #21761 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-07-28 18:49:04 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						89ac266b26 
					 
					
						
						
							
							[Feat]: Add support for Dynamic Quant 4 bit CPU kleidiai kernels ( #17112 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nikhil Gupta <nikhil.gupta2@arm.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-28 20:55:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6f36cfa26 
					 
					
						
						
							
							[Bugfix] DeepGEMM is not enabled on B200 due to _lazy_init() ( #21472 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Clayton Coleman <smarterclayton@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-28 20:51:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b18b417fbf 
					 
					
						
						
							
							Revert "[V1] Exception Handling when Loading KV Cache from Remote Store" ( #21778 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: KuntaiDu <kuntai@uchicago.edu > 
						
						
					 
					
						2025-07-28 20:15:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ba1c88a93 
					 
					
						
						
							
							[AMD][CI/Build] Fix the AMD issue caused by inappropriate of symbol exposure ( #21647 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-07-28 20:11:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0e58f9729 
					 
					
						
						
							
							[Bug] Enforce contiguous input for dynamic_scaled_fp8_quant and static_scaled_fp8_quant ( #21773 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-28 19:55:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b361f14e39 
					 
					
						
						
							
							[AMD][BugFix] Fix omission  of wvSplitK kernel for small batch sizes (1-4) due to torch.compile ( #21350 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-07-28 15:38:20 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01c753ed98 
					 
					
						
						
							
							update flashinfer to v0.2.9rc2 ( #21701 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Weiliang Liu <weiliangl@nvidia.com > 
						
						
					 
					
						2025-07-28 19:31:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94b71ae106 
					 
					
						
						
							
							Use metavar to list the choices for a CLI arg when custom values are also accepted ( #21760 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-28 19:31:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7d44c691b0 
					 
					
						
						
							
							[P/D] Log warnings related to prefill KV expiry ( #21753 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-28 18:40:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e17a4d3bf9 
					 
					
						
						
							
							[Bugfix] Fix granite speech shape validation ( #21762 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-28 14:19:21 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec261b0291 
					 
					
						
						
							
							[XPU] IPEX-optimized Punica Wrapper on XPU ( #21703 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chzhang <chaojun.zhang@intel.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-28 16:43:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04fe61aa3d 
					 
					
						
						
							
							[CI/Build] Fix plugin tests ( #21758 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-28 15:08:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						25708d317a 
					 
					
						
						
							
							[Bugfix] Mistral crashes on tool with no description ( #21167 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: HugoMichard <hugo@harfanglab.fr > 
						
						
					 
					
						2025-07-28 08:03:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e18a5d058 
					 
					
						
						
							
							[Misc] Reduce logs for model resolution ( #21765 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-28 07:59:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						34a20c49b3 
					 
					
						
						
							
							[Logs] Change flashinfer sampler logs to once ( #21759 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-28 06:59:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31084b3b1f 
					 
					
						
						
							
							[Bugfix][CI/Build] Update peft version in test requirement ( #21729 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-28 06:17:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bccc43c033 
					 
					
						
						
							
							[Bugfix]check health for engine core process exiting unexpectedly ( #21728 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wuhang <wuhang6@huawei.com > 
						
						
					 
					
						2025-07-28 06:17:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1395dd9c28 
					 
					
						
						
							
							[Docs] Add revision date to rendered docs ( #21752 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-28 06:12:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ace2eaf35 
					 
					
						
						
							
							[Bugfix] Improve JSON extraction in LlamaToolParser ( #19024 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: keru <keyang.ru@oracle.com >
Co-authored-by: keru <keyang.ru@oracle.com > 
						
						
					 
					
						2025-07-28 12:36:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						656c24f1b5 
					 
					
						
						
							
							[Ernie 4.5] Name Change for Base 0.3B Model ( #21735 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vasqu <antonprogamer@gmail.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-28 12:22:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63fe3a700f 
					 
					
						
						
							
							[PD] let p2p nccl toy proxy handle /chat/completions ( #21734 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-28 11:45:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ae970ed15 
					 
					
						
						
							
							[Bugfix] Fix glm4.1v video_grid_thw tensor shape scheme ( #21744 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-28 04:26:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65e8466c37 
					 
					
						
						
							
							[Bugfix] Fix environment variable setting in CPU Dockerfile ( #21730 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-28 11:02:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b769dccf3 
					 
					
						
						
							
							[Bugfix] Fix Ernie4_5_MoeForCausalLM shared experts ( #21717 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-28 11:02:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2cc571199b 
					 
					
						
						
							
							[feature] add log non default args in LLM ( #21680 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-07-28 02:21:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a4ed731546 
					 
					
						
						
							
							[Model] Prioritize Transformers fallback over suffix matching ( #21719 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-28 02:15:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d128d0d554 
					 
					
						
						
							
							Migrate KeyeImageInputs and KeyeVideoInputs to TensorSchema ( #21686 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-28 01:16:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6c050286a 
					 
					
						
						
							
							[v1][mamba] Added mamba_type into MambaSpec ( #21715 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: asafg <asafg@ai21.com >
Co-authored-by: asafg <asafg@ai21.com > 
						
						
					 
					
						2025-07-28 08:15:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						139a7f07bd 
					 
					
						
						
							
							[BugFix] Fix ChunkedLocalAttention when the hybrid kv-cache is disabled ( #21707 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-07-28 07:18:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						150d9e6337 
					 
					
						
						
							
							[Bugfix] fix max-file-size type from str to int ( #21675 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-07-28 00:06:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						139a97ec56 
					 
					
						
						
							
							[Bugfix] Fix shape checking for Fuyu ( #21709 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-28 00:05:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						18cc33dd60 
					 
					
						
						
							
							[bugfix] fix profile impact benchmark results ( #21507 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-07-27 22:44:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7656cf4cf3 
					 
					
						
						
							
							[Bugfix] [issue-21565] Fix the incompatibility issue with stream and named function calling when Thinking is disabled ( #21573 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangzi <3220100013@zju.edu.cn >
Co-authored-by: wangzi <3220100013@zju.edu.cn > 
						
						
					 
					
						2025-07-27 22:43:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ea57a56d9 
					 
					
						
						
							
							Migrate Idefics3ImagePixelInputs and Idefics3ImageEmbeddingInputs to … ( #21683 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-27 22:37:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						75856bc2cb 
					 
					
						
						
							
							Migrate GraniteSpeechAudioInputs to TensorSchema ( #21682 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-27 22:37:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						304dcdf575 
					 
					
						
						
							
							Migrate GLMVImagePixelInputs to TensorSchema ( #21679 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-27 22:36:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88e46c7c8d 
					 
					
						
						
							
							Migrate Glm4vImageInputs, Glm4vVideoInputs to TensorSchema ( #21678 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk 
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-27 22:36:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d8937de4c8 
					 
					
						
						
							
							Migrate Gemma3ImagePixelInputs to TensorSchema ( #21676 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-27 22:36:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e626d286f5 
					 
					
						
						
							
							[FEAT] [ROCm] [AITER]: Add AITER HIP block quant kernel ( #21242 )  
						
						 
						
						
						
						
					 
					
						2025-07-28 05:07:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7ffe93d9c 
					 
					
						
						
							
							[Model] Support TP/PP/mamba2 kernel for PLaMo2 ( #19674 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shinichi Hemmi <shemmi@preferred.jp >
Signed-off-by: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com >
Co-authored-by: Calvin Metzger <metzger@preferred.jp >
Co-authored-by: Sixue Wang <cecilwang@preferred.jp > 
						
						
					 
					
						2025-07-28 05:00:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						15a72ac478 
					 
					
						
						
							
							[V1] Exception Handling when Loading KV Cache from Remote Store ( #21534 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: liuyumoye <adeline_ly2023@outlook.com >
Co-authored-by: liuyumoye <adeline_ly2023@outlook.com > 
						
						
					 
					
						2025-07-27 20:34:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04ff4be310 
					 
					
						
						
							
							[Misc]  Add fused_moe configs for Qwen3-Coder-480B-A35B-Instruct-FP8 ( #21700 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-27 20:12:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93269bb43e 
					 
					
						
						
							
							Fix GLM tool parser ( #21668 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Chenhui Zhang <zhang.chenhui@outlook.com > 
						
						
					 
					
						2025-07-28 10:46:38 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82acf2184d 
					 
					
						
						
							
							Fix typo for limit-mm-per-prompt in docs ( #21697 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Joachim Studnia <joachim@mistral.ai > 
						
						
					 
					
						2025-07-27 19:45:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86ae693f20 
					 
					
						
						
							
							[Deprecation][2/N] Replace --task with --runner and --convert ( #21470 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-27 19:42:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f605ee309 
					 
					
						
						
							
							[Attention] Make CutlassMLA the default backend for SM100 (blackwell) ( #21626 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexander Matveev <amatveev@redhat.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-27 20:13:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a9b2a1d704 
					 
					
						
						
							
							[Misc] Refactor vllm config str ( #21666 )  
						
						 
						
						
						
						
					 
					
						2025-07-27 09:51:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						57c22e57f9 
					 
					
						
						
							
							Fix CUDA permute/unpermute for use with DeepGemm Moe ( #17934 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Caleb_Du <Caleb_Du@zju.edu.cn > 
						
						
					 
					
						2025-07-27 07:08:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bda9d0535f 
					 
					
						
						
							
							[Refactor] Refactor MOE NVFP4 Code Base: ModelOpt + Compressed Tensor ( #21631 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-27 05:25:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d847a3125 
					 
					
						
						
							
							[VLM] Add video support for Intern-S1 ( #21671 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-27 11:49:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f8c9a425e 
					 
					
						
						
							
							Migrate Florence2ImagePixelInputs to TensorSchema ( #21663 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-27 02:43:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1cbf951ba2 
					 
					
						
						
							
							[Misc] add default value for file pattern arg ( #21659 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-07-27 05:14:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8936e5193 
					 
					
						
						
							
							Refactor: Remove numpy dependency from LoggingStatLogger ( #20529 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zitian.zhao <zitian.zhao@tencentmusic.com > 
						
						
					 
					
						2025-07-27 04:06:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01a395e9e7 
					 
					
						
						
							
							[CI/Build][Doc] Clean up more docs that point to old bench scripts ( #21667 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-07-27 04:02:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						971948b846 
					 
					
						
						
							
							Handle non-serializable objects in vllm bench ( #21665 )  
						
						 
						
						
						
						
					 
					
						2025-07-27 03:35:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eed2f463b2 
					 
					
						
						
							
							[VLM] Support HF format Phi-4-MM model ( #17121 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-26 20:07:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						20950b29fb 
					 
					
						
						
							
							Migrate ChameleonImagePixelInputs to TensorSchema ( #21657 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-26 19:34:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3339cba3ff 
					 
					
						
						
							
							Migrate FuyuImagePatchInputs to TensorSchema ( #21662 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-26 19:34:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b8caf9095 
					 
					
						
						
							
							Migrate DeepseekVL2ImageInputs to TensorSchema ( #21658 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-26 19:34:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ccf27cc4d4 
					 
					
						
						
							
							Migrate Blip2ImagePixelInputs and Blip2ImageEmbeddingInputs to TensorSchema ( #21656 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-27 10:33:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c657369841 
					 
					
						
						
							
							support torch.compile for bailing moe ( #21664 )  
						
						 
						
						
						
						
					 
					
						2025-07-26 23:54:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6c66f28fa5 
					 
					
						
						
							
							Remove xformers requirement for Mistral-format Pixtral and Mistral3 ( #21154 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wenchen Lo <charles761013@gmail.com > 
						
						
					 
					
						2025-07-26 17:20:29 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de509ae8eb 
					 
					
						
						
							
							[NVIDIA] Explicitly disable shuffled weights for flashinfer blockscale moe fp8 kernels ( #21411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kaixih <kaixih@nvidia.com > 
						
						
					 
					
						2025-07-26 07:10:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e7c4f9ee86 
					 
					
						
						
							
							[CI/Build][Doc] Move existing benchmark scripts in CI/document/example to vllm bench CLI ( #21355 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-07-26 07:10:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9094d11c5d 
					 
					
						
						
							
							[Bugfix][Apple Silicon] fix missing symbols when build from source on Mac with Apple Silicon ( #21380 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yeju Zhou <yejuzhou@outlook.com > 
						
						
					 
					
						2025-07-26 07:09:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56e544f24b 
					 
					
						
						
							
							[Refactor] Remove moe_align_block_size_triton ( #21335 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-26 07:08:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						97d6c30cc9 
					 
					
						
						
							
							[BugFix] Fix shared storage connector load kv only load attention layer ( #21428 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Chen <530634352@qq.com > 
						
						
					 
					
						2025-07-26 07:07:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a40a8506df 
					 
					
						
						
							
							[Misc] Improve memory profiling debug message ( #21429 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-07-26 07:07:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c215f5c877 
					 
					
						
						
							
							[Bug] Fix has_flashinfer_moe Import Error when it is not installed ( #21634 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-26 07:06:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1cd6eaba54 
					 
					
						
						
							
							Support encoder-only models without KV-Cache ( #21270 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <maxdebayser@gmail.com >
Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-07-26 21:09:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f27fdfc3ed 
					 
					
						
						
							
							[Bugfix] Investigate Qwen2-VL failing test ( #21527 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-26 06:09:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de10ff0b7c 
					 
					
						
						
							
							Migrate AyaVisionImagePixelInputs to TensorSchema for shape validation ( #21622 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-26 06:08:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d197280fa 
					 
					
						
						
							
							Migrate AriaImagePixelInputs to TensorSchema for shape validation ( #21620 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-26 06:08:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e98def439c 
					 
					
						
						
							
							[Take 2] Correctly kill vLLM processes after benchmarks ( #21646 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-07-26 06:06:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05c1126f29 
					 
					
						
						
							
							[Misc] remove unused try-except in pooling config check ( #21618 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-26 12:20:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						875af38e01 
					 
					
						
						
							
							Support Intern-S1 ( #21628 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Your Name <you@example.com >
Co-authored-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-26 19:14:04 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7728dd77bb 
					 
					
						
						
							
							[TPU][Test] Divide TPU v1 Test into 2 parts. ( #21431 )  
						
						 
						
						
						
						
					 
					
						2025-07-26 06:20:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f6e6b33fb 
					 
					
						
						
							
							[Bugfix] Fix isinstance check for tensor types in _load_prompt_embeds to use dtype comparison ( #21612 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexandre Juan <a.juan@netheos.net > 
						
						
					 
					
						2025-07-25 20:11:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a55c95096b 
					 
					
						
						
							
							Correctly kill vLLM processes after finishing serving benchmarks ( #21641 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-07-25 19:06:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						97349fe2bc 
					 
					
						
						
							
							[Docs] add offline serving multi-modal video input expamle Qwen2.5-VL ( #21530 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Chen <530634352@qq.com > 
						
						
					 
					
						2025-07-25 18:37:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						62965de5fe 
					 
					
						
						
							
							[Model] Ultravox: Support Llama 4 and Gemma 3 backends ( #17818 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Farzad Abdolhosseini <farzad@fixie.ai >
Signed-off-by: Patrick Li <patrick8289@gmail.com >
Co-authored-by: Patrick Li <patrick8289@gmail.com > 
						
						
					 
					
						2025-07-25 18:12:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ae75fa6d0 
					 
					
						
						
							
							[Feature] Add support for MoE models in the calibration-free RTN-based quantization ( #20766 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex Kogan <alex.kogan@oracle.com > 
						
						
					 
					
						2025-07-25 18:09:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f1b286b2fb 
					 
					
						
						
							
							[TPU] Update ptxla nightly version to 20250724 ( #21555 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-07-25 17:09:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7742d6113 
					 
					
						
						
							
							[Bugfix] Always set RAY_ADDRESS for Ray actor before spawn ( #21540 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-25 17:08:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cea96a0156 
					 
					
						
						
							
							[Bugfix] Fix sync_and_slice_intermediate_tensors ( #21537 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-25 17:07:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2eddd437ba 
					 
					
						
						
							
							Add interleaved RoPE test for Llama4 (Maverick) ( #21478 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-07-25 17:07:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						75d29cf4e1 
					 
					
						
						
							
							[Perf] Cuda Kernel for Int8 Per Token Group Quant ( #21476 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-25 17:07:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41d3082c41 
					 
					
						
						
							
							Add Unsloth to RLHF.md ( #21636 )  
						
						 
						
						
						
						
					 
					
						2025-07-25 17:06:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7cfea0df39 
					 
					
						
						
							
							[TPU][Test] Rollback PR-21550. ( #21619 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-25 13:22:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5ac3168ee3 
					 
					
						
						
							
							[Docs] add auto-round quantization readme  ( #21600 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wenhua Cheng <wenhua.cheng@intel.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-25 08:52:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						396ee94180 
					 
					
						
						
							
							[CI] Unifying Dockerfiles for ARM and X86 Builds ( #21343 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-07-25 07:33:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e189b50f53 
					 
					
						
						
							
							Add support for Prithvi in Online serving mode ( #21518 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-07-25 07:01:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						136d750f5f 
					 
					
						
						
							
							[Kernel] Improve machete memory bound perf ( #21556 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: czhu-cohere <conway.zhu@cohere.com > 
						
						
					 
					
						2025-07-25 06:53:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b3caeb82e7 
					 
					
						
						
							
							[ROCm][AITER] Enable fp8 kv cache on rocm aiter backend. ( #20295 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: fsx950223 <fsx950223@outlook.com >
Signed-off-by: amd-ruitang3 <Rui.Tang2@amd.com >
Co-authored-by: amd-ruitang3 <Rui.Tang2@amd.com > 
						
						
					 
					
						2025-07-25 06:50:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eab2f3980c 
					 
					
						
						
							
							[Model] Replace Mamba2 RMSNorm Gated with Fused Triton Kernel ( #20839 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com >
Signed-off-by: Yu Chin Fabian Lim <fabian.lim@gmail.com >
Signed-off-by: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com >
Co-authored-by: Yu Chin Fabian Lim <fabian.lim@gmail.com > 
						
						
					 
					
						2025-07-25 06:49:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9fe98d4250 
					 
					
						
						
							
							[Frontend] Add request_id to the Request object so they can be controlled better via external load balancers ( #21009 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com > 
						
						
					 
					
						2025-07-25 06:49:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						29c6fbe58c 
					 
					
						
						
							
							[MODEL] New model support for naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B ( #20931 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: bigshanedogg <bigshane319@gmail.com > 
						
						
					 
					
						2025-07-25 06:05:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c72f049cb4 
					 
					
						
						
							
							[Model] Fix Ernie4.5MoE e_score_correction_bias parameter ( #21586 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zhouchong <zhouchong03@baidu.com >
Co-authored-by: zhouchong <zhouchong03@baidu.com > 
						
						
					 
					
						2025-07-25 06:02:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f3a683b7c9 
					 
					
						
						
							
							[Bugfix][Logprobs] Fix logprobs op to support more backend ( #21591 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: MengqingCao <cmq0113@163.com > 
						
						
					 
					
						2025-07-25 05:53:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						46d81d6951 
					 
					
						
						
							
							[V1] Get supported tasks from model runner instead of model config ( #21585 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-25 05:36:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c3f2628d5 
					 
					
						
						
							
							[Quantization] Enable BNB support for more MoE models ( #21370 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-25 03:57:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7311f74468 
					 
					
						
						
							
							[Bugfix] GGUF: fix AttributeError: 'PosixPath' object has no attribute 'startswith' ( #21579 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-07-25 03:42:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8ed01e32f7 
					 
					
						
						
							
							Add H20-3e fused MoE kernel tuning configs for Qwen3-Coder-480B-A35B-Instruct ( #21598 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 许文卿 <xwq391974@alibaba-inc.com > 
						
						
					 
					
						2025-07-25 02:36:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e38e96a3c0 
					 
					
						
						
							
							[Tests] Harden DP tests ( #21508 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-25 02:27:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						40d86ee412 
					 
					
						
						
							
							[TPU][Bugfix] fix OOM issue in CI test ( #21550 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-07-24 23:01:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						85d051f026 
					 
					
						
						
							
							[Misc] Removed undefined cmake variables MOE_PERMUTE_ARCHS ( #21262 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Chen <yangche@fb.com > 
						
						
					 
					
						2025-07-24 22:54:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5140f54b89 
					 
					
						
						
							
							[CI/Build] fix cpu_extension for apple silicon ( #21195 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ignaciosica <mignacio.sica@gmail.com > 
						
						
					 
					
						2025-07-24 22:53:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						947edd099e 
					 
					
						
						
							
							[Misc][Tools] make max-model-len a parameter in auto_tune script ( #21321 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-24 22:46:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fde60ee775 
					 
					
						
						
							
							[Model] Fix a check for None but the return value was empty list in Gemma3 MM vision_embeddings ( #21479 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hongmin Fan <fanhongmin@google.com > 
						
						
					 
					
						2025-07-25 13:46:06 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b38bc652ac 
					 
					
						
						
							
							[Model] Support tensor parallel for timm ViT in Deepseek_vl2 ( #21494 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wzqd <1057337859@qq.com > 
						
						
					 
					
						2025-07-24 22:45:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						adaf2c6d4f 
					 
					
						
						
							
							[Bugfix] fix modelscope snapshot_download serialization ( #21536 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-07-24 22:44:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						42343f1f89 
					 
					
						
						
							
							[CI] Update CODEOWNERS for CPU and Intel GPU ( #21582 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-24 21:58:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						965bc71b04 
					 
					
						
						
							
							Integrate TensorSchema with shape validation for Phi3VImagePixelInputs ( #21232 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benji Beck <benjibeck@meta.com > 
						
						
					 
					
						2025-07-24 21:43:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						807a328bb6 
					 
					
						
						
							
							[Docs] Add requirements/common.txt to run unit tests ( #21572 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zhou Fang <fang.github@gmail.com > 
						
						
					 
					
						2025-07-24 20:51:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0be2c4d09 
					 
					
						
						
							
							[TPU][Test] Temporarily suspend this MoE model in test_basic.py. ( #21560 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-24 20:44:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9c8b2c2a8a 
					 
					
						
						
							
							[DP] Support api-server-count > 0 in hybrid DP LB mode ( #21510 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-24 20:18:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2212cd6cfb 
					 
					
						
						
							
							[Bugfix] DeepGemm utils : Fix hardcoded type-cast ( #21517 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-24 20:17:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce3a9b1378 
					 
					
						
						
							
							[Kernel] adding fused_moe configs for upcoming granite4 ( #21332 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Burkhard Ringlein <ngl@zurich.ibm.com >
Co-authored-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-07-24 20:16:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ce90e5b01 
					 
					
						
						
							
							Fix GLM-4 PP Missing Layer When using with PP. ( #21531 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com > 
						
						
					 
					
						2025-07-24 20:07:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						633f6e804b 
					 
					
						
						
							
							[Bug] Fix DeepGemm Init Error ( #21554 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-24 20:07:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b57296bb9a 
					 
					
						
						
							
							[Docs] Fix site_url for RunLLM ( #21564 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 20:05:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						34ddcf9ff4 
					 
					
						
						
							
							[Frontend] run-batch supports V1 ( #21541 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-24 20:05:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe56180c7f 
					 
					
						
						
							
							[MoE] More balanced expert sharding ( #21497 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai > 
						
						
					 
					
						2025-07-24 15:56:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						07d80d7b0e 
					 
					
						
						
							
							[TPU][TEST] HF_HUB_DISABLE_XET=1 the test 3. ( #21539 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-24 15:33:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2dd72d23d9 
					 
					
						
						
							
							update flashinfer to v0.2.9rc1 ( #21485 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Weiliang Liu <weiliangl@nvidia.com > 
						
						
					 
					
						2025-07-24 14:06:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6c7fb8cff 
					 
					
						
						
							
							[Docs] Add Expert Parallelism Initial Documentation ( #21373 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 12:36:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a7272c23d0 
					 
					
						
						
							
							[Docs][minor] Fix broken gh-file link in distributed serving docs ( #21543 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-24 10:36:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6066284914 
					 
					
						
						
							
							[P/D] Support CPU Transfer in NixlConnector ( #18293 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Juncheng Gu <juncgu@gmail.com >
Signed-off-by: Richard Liu <ricliu@google.com >
Co-authored-by: Richard Liu <39319471+richardsliu@users.noreply.github.com >
Co-authored-by: Richard Liu <ricliu@google.com > 
						
						
					 
					
						2025-07-24 17:58:42 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1e9ea8e69d 
					 
					
						
						
							
							[P/D] Move FakeNixlWrapper to test dir ( #21328 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-24 08:53:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d9f9a3fd96 
					 
					
						
						
							
							[XPU] Conditionally import CUDA-specific passes to avoid import errors on xpu platform ( #21036 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chzhang <chaojun.zhang@intel.com > 
						
						
					 
					
						2025-07-24 23:23:36 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b25f1fe75 
					 
					
						
						
							
							Update flashinfer CUTLASS MoE Kernel ( #21408 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shu Wang. <shuw@nvidia.com > 
						
						
					 
					
						2025-07-24 08:13:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e8cb0d0495 
					 
					
						
						
							
							[Bug] Fix Compressed Tensor NVFP4 cutlass_fp4_group_mm illegal memory access ( #21465 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-24 08:13:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						684174115d 
					 
					
						
						
							
							[Docs] Rewrite Distributed Inference and Serving guide ( #20593 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com >
Co-authored-by: Simon Mo <simon.mo@hey.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 08:13:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cdb79ee63d 
					 
					
						
						
							
							[Docs] Update Tensorizer usage documentation ( #21190 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sanger Steel <sangersteel@gmail.com >
Signed-off-by: William Goldby <willgoldby@gmail.com >
Co-authored-by: William Goldby <willgoldby@gmail.com > 
						
						
					 
					
						2025-07-24 06:56:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a19a6c670 
					 
					
						
						
							
							[Fix] Update mamba_ssm to 2.2.5 ( #21421 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 03:25:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ded067fd2 
					 
					
						
						
							
							[Bugfix] Fix CUDA arch flags for MoE permute ( #21426 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-07-24 03:23:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						13abd0eaf9 
					 
					
						
						
							
							[Model] Officially support Emu3 with Transformers backend ( #21319 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 03:22:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61b8cea3b4 
					 
					
						
						
							
							[Attention] Optimize FlashInfer MetadataBuilder Build call ( #21137 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-07-24 03:21:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						526078a96c 
					 
					
						
						
							
							bump flashinfer to v0.2.8 ( #21385 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 03:20:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6da0078523 
					 
					
						
						
							
							[Feat] Allow custom naming of vLLM processes ( #21445 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-24 03:15:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						73e3949d07 
					 
					
						
						
							
							[Misc] Improve comment for DPEngineCoreActor._set_cuda_visible_devices() ( #21501 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-24 03:13:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6eca337ce0 
					 
					
						
						
							
							Replace --expand-tools-even-if-tool-choice-none with --exclude-tools-when-tool-choice-none for v0.10.0 ( #20544 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: okada <kokuzen@gmail.com >
Signed-off-by: okada shintarou <okada@preferred.jp >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 02:56:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						85bda9e7d0 
					 
					
						
						
							
							remove GLM-4.5 quantization wrong Code ( #21435 )  
						
						 
						
						
						
						
					 
					
						2025-07-24 01:52:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						610852a423 
					 
					
						
						
							
							[Core] Support model loader plugins ( #21067 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 01:49:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f0f4de8f26 
					 
					
						
						
							
							[Misc] Fix duplicate FusedMoEConfig debug messages ( #21455 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-24 01:27:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc5f756db4 
					 
					
						
						
							
							[v1][Core] Clean up usages of SpecializedManager ( #21407 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zhou Fang <fang.github@gmail.com > 
						
						
					 
					
						2025-07-24 00:40:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e74bfc70e4 
					 
					
						
						
							
							[TPU][Bugfix] fix moe layer ( #21340 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-07-24 00:38:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90eeea8f85 
					 
					
						
						
							
							[Bugfix][ROCm] Fix for warp_size uses on host ( #21205 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-07-24 00:37:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dde295a934 
					 
					
						
						
							
							Deduplicate Transformers backend code using inheritance ( #21461 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-24 00:16:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d8d0a24c0 
					 
					
						
						
							
							Add think chunk ( #21333 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Julien Denize <julien.denize@mistral.ai > 
						
						
					 
					
						2025-07-23 21:51:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						11ef7a611e 
					 
					
						
						
							
							[BugFix] Set CUDA_VISIBLE_DEVICES before spawning the subprocesses ( #21211 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yinghai Lu <yinghai@thinkingmachines.ai >
Signed-off-by: Nick Hill <nhill@redhat.com >
Signed-off-by: Rui Qiao <ruisearch42@gmail.com >
Co-authored-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-23 21:44:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc2f159f8a 
					 
					
						
						
							
							Dump input metadata on crash for async scheduling ( #21258 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-23 21:10:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d5b981f8b1 
					 
					
						
						
							
							[DP] Internal Load Balancing Per Node [one-pod-per-node] ( #21238 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Robert Shaw <robshaw@redhat.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com >
Co-authored-by: Robert Shaw <robshaw@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-07-23 20:57:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eec6942014 
					 
					
						
						
							
							[BugFix] Fix KVConnector TP worker aggregation ( #21473 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-23 20:56:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fd48d99ffd 
					 
					
						
						
							
							[BugFix]: Batch generation from prompt_embeds fails for long prompts ( #21390 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: KazusatoOko <kazusto.oko@sakana.ai >
Co-authored-by: KazusatoOko <kazusto.oko@sakana.ai > 
						
						
					 
					
						2025-07-23 20:43:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f8c15c4efb 
					 
					
						
						
							
							[Bugfix] Fix example disagg_example_p2p_nccl_xpyd.sh zombie process ( #21437 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Chen <530634352@qq.com > 
						
						
					 
					
						2025-07-23 20:42:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa08a954f9 
					 
					
						
						
							
							[Bugfix] Fix casing warning ( #21468 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Matthew Bonanni <mbonanni@redhat.com > 
						
						
					 
					
						2025-07-23 20:41:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						13e4ee1dc3 
					 
					
						
						
							
							[XPU][UT] increase intel xpu CI test scope ( #21492 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ma, Liangliang <liangliang.ma@intel.com > 
						
						
					 
					
						2025-07-23 20:24:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						772ce5af97 
					 
					
						
						
							
							[Misc] Add dummy maverick test to CI ( #21324 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-07-23 20:22:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63d92abb7c 
					 
					
						
						
							
							[Frontend] Set MAX_AUDIO_CLIP_FILESIZE_MB via env var instead of hardcoding ( #21374 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Deven Labovitch <deven@videa.ai > 
						
						
					 
					
						2025-07-23 20:22:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						11599b0e1f 
					 
					
						
						
							
							feat(gguf_loader): accept HF repo paths & URLs for GGUF ( #20793 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hardik <hardikgupta1999@gmail.com >
Signed-off-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <2037008807@qq.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-23 20:21:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f3137cdd81 
					 
					
						
						
							
							[Core] Freeze gc during cuda graph capture to speed up init ( #21146 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Codex <codex@openai.com >
Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-23 17:20:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82ec66f514 
					 
					
						
						
							
							[V0 Deprecation] Remove Prompt Adapters ( #20588 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-23 16:36:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78c13e30e1 
					 
					
						
						
							
							[V1] Fix local chunked attention always disabled ( #21419 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-07-23 15:59:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c9b807b34 
					 
					
						
						
							
							[Core] Add reload_weights RPC method ( #20096 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-07-23 14:24:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14bf19e39f 
					 
					
						
						
							
							[TPU][TEST] Fix the downloading issue in TPU v1 test 11.  ( #21418 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-23 11:29:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ac7713e32 
					 
					
						
						
							
							Add test case for compiling multiple graphs ( #21044 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-07-23 11:00:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8560a5b258 
					 
					
						
						
							
							[Core][Model] PrithviMAE Enablement on vLLM v1 engine ( #20577 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Christian Pinto <christian.pinto@ibm.com > 
						
						
					 
					
						2025-07-23 11:00:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						316b1bf706 
					 
					
						
						
							
							[Tests] Add tests for headless internal DP LB ( #21450 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-23 07:49:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c734ee09b 
					 
					
						
						
							
							[Bugfix][Qwen][DCA] fixes bug in dual-chunk-flash-attn backend for qwen 1m models. ( #21364 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com > 
						
						
					 
					
						2025-07-23 06:34:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f59ec35b7f 
					 
					
						
						
							
							[V1] Check all pooling tasks during profiling ( #21299 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-23 05:53:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2671334d45 
					 
					
						
						
							
							[Model] add Hunyuan V1 Dense Model support. ( #21368 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Asher Zhang <asherszhang@tencent.com > 
						
						
					 
					
						2025-07-23 03:54:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2cc5016a19 
					 
					
						
						
							
							[Docs] Clean up v1/metrics.md ( #21449 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-07-23 03:37:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6929f8b437 
					 
					
						
						
							
							[Misc] fixed nvfp4_moe test failures due to invalid kwargs ( #21246 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Chen <yangche@fb.com > 
						
						
					 
					
						2025-07-23 01:41:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32ec9e2f2a 
					 
					
						
						
							
							Mamba V2 Test not Asserting Failures.  ( #21379 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com > 
						
						
					 
					
						2025-07-23 01:40:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						accac82928 
					 
					
						
						
							
							[Sampler] Introduce logprobs mode for logging ( #21398 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-07-23 01:39:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23637dcdef 
					 
					
						
						
							
							[Docs] Fix bullets and grammars in tool_calling.md ( #21440 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-07-23 01:23:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6364af92f8 
					 
					
						
						
							
							Fixed typo in profiling logs ( #21441 )  
						
						 
						
						
						
						
					 
					
						2025-07-23 01:18:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7aaa2bd5a8 
					 
					
						
						
							
							[Bugfix] ensure tool_choice is popped when tool_choice:null is passed in json payload ( #19679 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-07-23 00:30:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f5c14de6a 
					 
					
						
						
							
							add clear messages for deprecated models ( #21424 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-07-23 00:03:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f002e9a870 
					 
					
						
						
							
							[Cleanup] Only log MoE DP setup warning if DP is enabled ( #21315 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-23 00:02:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a1f3610fc6 
					 
					
						
						
							
							[Core] Add basic unit test for maybe_evict_cached_block ( #21400 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com > 
						
						
					 
					
						2025-07-23 00:02:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ecedd1806 
					 
					
						
						
							
							[Bugfix] Fix nightly transformers CI failure ( #21427 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-23 00:01:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						107111a859 
					 
					
						
						
							
							Changing "amdproduction" allocation. ( #21409 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com > 
						
						
					 
					
						2025-07-22 20:48:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2dec7c1a5d 
					 
					
						
						
							
							[Bugfix][CUDA] fixes CUDA FP8 kv cache dtype supported ( #21420 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com > 
						
						
					 
					
						2025-07-22 20:34:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08d2bd78da 
					 
					
						
						
							
							[BUGFIX] deepseek-v2-lite failed due to fused_qkv_a_proj name update ( #21414 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chendi.Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-07-22 20:33:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f76a05f4f 
					 
					
						
						
							
							[BugFix] Update python to python3 calls for image; fix prefix & input calculations. ( #21391 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eric Hanley <ericehanley@google.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-22 20:33:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f154bb9ff0 
					 
					
						
						
							
							Simplify weight loading in Transformers backend ( #21382 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-22 20:29:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ec7170ff1 
					 
					
						
						
							
							[Bugfix][ROCm][Build] Fix build regression on ROCm ( #21393 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-07-22 20:27:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c401c64b4c 
					 
					
						
						
							
							[CI/Build] Fix model executor tests ( #21387 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-22 20:25:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b77c7d327f 
					 
					
						
						
							
							[BugFix] Fix ray import error mem cleanup bug ( #21381 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com >
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com >
Co-authored-by: Travis Johnson <tsjohnso@us.ibm.com > 
						
						
					 
					
						2025-07-22 16:19:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35bc8bd5fb 
					 
					
						
						
							
							[Misc] Copy HF_TOKEN env var to Ray workers ( #21406 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-22 16:18:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4594fc3b28 
					 
					
						
						
							
							[Model] Add Qwen3CoderToolParser ( #21396 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <xmo@berkeley.edu >
Co-authored-by: simon-mo <xmo@berkeley.edu > 
						
						
					 
					
						2025-07-22 15:05:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ae268b6326 
					 
					
						
						
							
							Fix Flashinfer Allreduce+Norm enable disable calculation based on fi_allreduce_fusion_max_token_num ( #21325 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: XIn Li <xinli@nvidia.com > 
						
						
					 
					
						2025-07-22 12:42:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35366ae57c 
					 
					
						
						
							
							[CI/Build] Fix test failure due to updated model repo ( #21375 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-22 08:39:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2226d5bd85 
					 
					
						
						
							
							[Bugfix] Decode Tokenized IDs to Strings for hf_processor in llm.chat() with model_impl=transformers ( #21353 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ariG23498 <aritra.born2fly@gmail.com > 
						
						
					 
					
						2025-07-22 08:27:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						44554a0068 
					 
					
						
						
							
							Add tokenization_kwargs to encode for embedding model truncation ( #21033 )  
						
						 
						
						
						
						
					 
					
						2025-07-22 08:24:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						226b452a20 
					 
					
						
						
							
							Revert "[Refactor] Fix Compile Warning #1444-D ( #21208 )" ( #21384 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-22 08:22:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f38ee34a0a 
					 
					
						
						
							
							[feat] Enable mm caching for transformers backend ( #21358 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: raushan <raushan@huggingface.co > 
						
						
					 
					
						2025-07-22 08:18:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b194557a6c 
					 
					
						
						
							
							Adds parallel model weight loading for runai_streamer ( #21330 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: bbartels <benjamin@bartels.dev >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-07-22 08:15:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						774d0c014b 
					 
					
						
						
							
							[Perf] Cuda Kernel for Per Token Group Quant ( #21083 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-22 07:27:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c8db17cfd 
					 
					
						
						
							
							[feat]: add SM100 support for cutlass FP8 groupGEMM ( #20447 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Duncan Moss <djm.moss@gmail.com >
Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com >
Co-authored-by: jiahanc <173873397+jiahanc@users.noreply.github.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-22 07:27:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4fb56914c5 
					 
					
						
						
							
							[perf] Add fused MLA QKV + strided layernorm ( #21116 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mickael Seznec <mickael@mistral.ai >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-22 07:07:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0df4d9b06b 
					 
					
						
						
							
							[Misc] unify variable for LLM instance v2 ( #21356 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-07-22 06:32:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed25054577 
					 
					
						
						
							
							[Core] Introduce popleft_n and append_n in FreeKVCacheBlockQueue to further optimize block_pool ( #21222 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com > 
						
						
					 
					
						2025-07-22 06:17:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10904e6d75 
					 
					
						
						
							
							[benchmark] Port benchmark request sent optimization to benchmark_serving ( #21209 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com > 
						
						
					 
					
						2025-07-22 05:28:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a32237665d 
					 
					
						
						
							
							[Core] Optimize update checks in LogitsProcessor ( #21245 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com > 
						
						
					 
					
						2025-07-22 05:27:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bc8a8ce5ec 
					 
					
						
						
							
							[Misc] Remove deprecated args in v0.10 ( #21349 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-07-22 05:26:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32142b3c62 
					 
					
						
						
							
							[Bugfix] Fix eviction cached blocked logic ( #21357 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-07-22 01:18:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82b8027be6 
					 
					
						
						
							
							Add arcee model ( #21296 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: alyosha-swamy <raghav@arcee.ai >
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-22 00:57:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3779eb8c81 
					 
					
						
						
							
							[Feature][eplb] add verify ep or tp or dp ( #21102 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-07-21 23:41:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e23ad9655 
					 
					
						
						
							
							Update fp4 quantize API ( #21327 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shu Wang <shuw@nvidia.com > 
						
						
					 
					
						2025-07-21 23:40:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e69a92a1ce 
					 
					
						
						
							
							[Bug] DeepGemm: Fix Cuda Init Error ( #21312 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-21 23:36:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8425f785ad 
					 
					
						
						
							
							[Misc] DeepEPHighThroughtput - Enable Inductor pass ( #21311 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-21 23:35:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c17231e827 
					 
					
						
						
							
							Fix kv_cache_dtype handling for out-of-tree HPU plugin ( #21302 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Konrad Zawora <kzawora@habana.ai >
Signed-off-by: Chendi.Xue <chendi.xue@intel.com >
Co-authored-by: Chendi.Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-07-21 23:35:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e5b5ca580 
					 
					
						
						
							
							[Refactor] Fix Compile Warning #1444-D ( #21208 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-21 23:33:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						488d8a986a 
					 
					
						
						
							
							[V1] [Hybrid] Add new test to verify that hybrid views into KVCacheTensor are compatible ( #21300 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-07-21 23:31:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						af376ca19d 
					 
					
						
						
							
							[Core] Minimize number of dict lookup in _maybe_evict_cached_block ( #21281 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com > 
						
						
					 
					
						2025-07-21 22:37:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e7b2042681 
					 
					
						
						
							
							Revert "[Performance] Performance improvements in non-blockwise fp8 CUTLASS MoE ( #20762 ) ( #21334 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-07-21 21:49:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90f1e55421 
					 
					
						
						
							
							[Intel GPU] Ray Compiled Graph avoid NCCL for Intel GPU ( #21338 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ratnampa <ratnam.parikh@intel.com > 
						
						
					 
					
						2025-07-21 21:48:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e70dcd6e6 
					 
					
						
						
							
							[Doc] Fix CPU doc format ( #21316 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-21 21:47:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						25d585ab7b 
					 
					
						
						
							
							[XPU] Enable external_launcher to serve as an executor via torchrun ( #21021 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chzhang <chaojun.zhang@intel.com > 
						
						
					 
					
						2025-07-21 21:47:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d0a01a5f2 
					 
					
						
						
							
							[v1][sampler] Inplace logprobs comparison to get the token rank ( #21283 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-07-21 13:47:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ec82edda5 
					 
					
						
						
							
							[perf] Speed up align sum kernels ( #21079 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Himanshu Jaju <hj@mistral.ai > 
						
						
					 
					
						2025-07-21 11:19:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						005ae9be6c 
					 
					
						
						
							
							Fix bad lm-eval fork ( #21318 )  
						
						 
						
						
						
						
					 
					
						2025-07-21 10:47:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						29d1ffc5b4 
					 
					
						
						
							
							[DP] Fix Prometheus Logging ( #21257 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Robert Shaw <robshaw@redhat.com >
Co-authored-by: Robert Shaw <robshaw@redhat.com > 
						
						
					 
					
						2025-07-21 09:11:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						304dce7ec0 
					 
					
						
						
							
							[Attention] Clean up iRoPE in V1 ( #21188 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-21 09:10:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ece16c4fe 
					 
					
						
						
							
							[Misc] Add dummy maverick test ( #21199 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-21 09:08:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0e827e07c 
					 
					
						
						
							
							[BugFix] make utils.current_stream thread-safety ( #21252 ) ( #21253 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simpx <simpxx@gmail.com > 
						
						
					 
					
						2025-07-21 09:07:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a15a50fc17 
					 
					
						
						
							
							[CPU] Enable shared-memory based pipeline parallel for CPU backend ( #21289 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-21 09:07:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6dda13c86b 
					 
					
						
						
							
							[Misc] Add sliding window to flashinfer test ( #21282 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-21 08:37:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b46c4b653 
					 
					
						
						
							
							Add Nvidia ModelOpt config adaptation ( #19815 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com > 
						
						
					 
					
						2025-07-21 10:02:58 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d97841078b 
					 
					
						
						
							
							[Misc] unify variable for LLM instance ( #20996 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-07-21 12:18:33 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6b90a2805 
					 
					
						
						
							
							[Docs] Make tables more space efficient in supported_models.md ( #21291 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-21 02:25:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						be54a951a3 
					 
					
						
						
							
							[Docs] Fix hardcoded links in docs ( #21287 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-21 02:23:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						042af0c8d3 
					 
					
						
						
							
							[Model][1/N] Support multiple poolers at model level ( #21227 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-21 02:22:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						378d33c392 
					 
					
						
						
							
							[Bugfix] Fix missing placeholder in logger debug ( #21280 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-20 22:50:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						940af1f03a 
					 
					
						
						
							
							Add the instruction to run e2e validation manually before release ( #21023 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-07-20 22:29:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						92615d7fe8 
					 
					
						
						
							
							[Docs] Add RFC Meeting to Issue Template ( #21279 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-07-20 21:58:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8188196a1c 
					 
					
						
						
							
							[CI] Cleanup modelscope version constraint in Dockerfile ( #21243 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kay Yan <kay.yan@daocloud.io > 
						
						
					 
					
						2025-07-20 20:13:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ba34b1241 
					 
					
						
						
							
							[bugfix] fix syntax warning caused by backslash ( #21251 )  
						
						 
						
						
						
						
					 
					
						2025-07-20 17:12:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9499e26e2a 
					 
					
						
						
							
							[Model] Support VLMs with transformers backend ( #20543 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: raushan <raushan@huggingface.co >
Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-07-20 13:25:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51ba839555 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for bart ( #18299 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <120380290@qq.com > 
						
						
					 
					
						2025-07-20 08:15:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1fb65bde3 
					 
					
						
						
							
							Enable v1 metrics tests ( #20953 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-07-20 03:22:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a1d8940ae 
					 
					
						
						
							
							[TPU] support fp8 kv cache quantization ( #19292 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-07-20 03:01:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b504eb770 
					 
					
						
						
							
							[Docs] [V1] Update docs to remove enforce_eager limitation for hybrid models. ( #21233 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-07-19 16:09:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10eb24cc91 
					 
					
						
						
							
							GLM-4 Update ( #20736 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Signed-off-by: Lu Fang <fanglu@fb.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Lu Fang <fanglu@fb.com > 
						
						
					 
					
						2025-07-19 22:40:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e8cbb58f3 
					 
					
						
						
							
							[BugFix] Fix full cuda graph slot_mapping ( #21228 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: fhl2000 <63384265+fhl2000@users.noreply.github.com > 
						
						
					 
					
						2025-07-19 14:13:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						752c6ade2e 
					 
					
						
						
							
							[V0 Deprecation] Deprecate BlockSparse Attention & Phi3-Small ( #21217 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-19 13:53:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						881e3cbe3b 
					 
					
						
						
							
							[V1] [Hybrid] Enable piecewise CUDA Graph for mamba layers  ( #21194 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-07-19 19:27:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9f414a12ad 
					 
					
						
						
							
							[BugFix] Make PD work with Ray ( #21072 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com > 
						
						
					 
					
						2025-07-19 08:46:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6a971ed692 
					 
					
						
						
							
							[Docs] Update the link to the 'Prometheus/Grafana' example ( #21225 )  
						
						 
						
						
						
						
					 
					
						2025-07-19 06:58:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da6579bf41 
					 
					
						
						
							
							[CI/CD][bugfix]fix: error argument to loads has incompatible type ( #21223 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com >
Signed-off-by: Sungjae Lee <sung-jae.lee@navercorp.com > 
						
						
					 
					
						2025-07-19 05:16:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c81259d33a 
					 
					
						
						
							
							Fix/remove some broken model executor tests ( #21224 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rabi Mishra <ramishra@redhat.com > 
						
						
					 
					
						2025-07-19 12:15:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3a0e43d7f 
					 
					
						
						
							
							[bugfix] Fix auto thread-binding when world_size > 1 in CPU backend and refactor code ( #21032 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-19 05:13:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b3d82108e7 
					 
					
						
						
							
							[Bugfix][Frontend] Fix openai CLI arg middleware ( #21220 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-07-19 02:40:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d0734c562 
					 
					
						
						
							
							[NVIDIA] Add SM100 Flashinfer MoE blockscale fp8 backend for low latency ( #20645 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kaixih <kaixih@nvidia.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-19 02:33:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7d94577138 
					 
					
						
						
							
							Add torch golden impl for moe_align_block_size kernel test ( #20653 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shixian Cui <shixian@amazon.com >
Co-authored-by: Shixian Cui <shixian@amazon.com > 
						
						
					 
					
						2025-07-19 02:32:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59f935300c 
					 
					
						
						
							
							[BugFix] Fix potential cuda-graph IMA ( #21196 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-07-19 02:18:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						18e519ec86 
					 
					
						
						
							
							[Bugfix] Fix ndarray video color from VideoAsset ( #21064 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-19 02:17:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1eaff27815 
					 
					
						
						
							
							[V0 deprecation] Remove long context LoRA ( #21169 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-19 02:15:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cf8cc32674 
					 
					
						
						
							
							Fix a couple of Voxtral tests ( #21218 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-07-19 09:13:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a2cb2649d 
					 
					
						
						
							
							[Misc][Tools][Benchmark] Add readme file for auto_tune script ( #20779 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-07-19 09:06:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e04107d97 
					 
					
						
						
							
							[Model] EXAONE 4.0 model support ( #21060 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Deepfocused <rlawhdrhs27@gmail.com >
Signed-off-by: woongsik <rlawhdrhs27@gmail.com > 
						
						
					 
					
						2025-07-19 14:25:44 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37bd8d6e4c 
					 
					
						
						
							
							[Bug] DeepGemm: Fix TypeError: per_block_cast_to_fp8() missing 1 required positional argument: 'use_ue8m0' for SM100 ( #21187 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-18 23:25:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						468e2400fe 
					 
					
						
						
							
							[BugFix][CPU] Fix TorchSDPABackendImpl doesn't have use_irope  ( #21200 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-07-18 23:18:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dcc6cfb991 
					 
					
						
						
							
							[Kernel][Performance] Tweak MoE Batched silu_mul_fp8_quant_deep_gemm kernel ( #21193 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-18 23:09:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd572c0ab3 
					 
					
						
						
							
							[V0 Deprecation] Remove V0 Spec Decode workers ( #21152 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-18 21:47:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ffe905a41 
					 
					
						
						
							
							[Bugfix][Model] Fix LoRA for Mistral-Small-3.1-24B-Instruct-2503 ( #21183 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com >
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com > 
						
						
					 
					
						2025-07-18 21:15:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a9fda1423 
					 
					
						
						
							
							[Core] Support Local Chunked Attention for Hybrid KV Cache ( #19351 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucia Fang <fanglu@fb.com >
Signed-off-by: Lu Fang <fanglu@meta.com >
Signed-off-by: Lu Fang <fanglu@fb.com >
Co-authored-by: Lu Fang <fanglu@meta.com > 
						
						
					 
					
						2025-07-18 20:48:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						466e878f2a 
					 
					
						
						
							
							[Quantization] Enable BNB support for more MoE models ( #21100 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-18 17:52:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						217937221b 
					 
					
						
						
							
							Elastic Expert Parallel Initial Support ( #20775 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-18 17:46:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5782581acf 
					 
					
						
						
							
							[Bugfix] Voxtral on Blackwell GPUs (RTX 50 series) ( #21077 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: hax0r31337 <liulihaocaiqwq@gmail.com > 
						
						
					 
					
						2025-07-18 18:40:18 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f199f197b 
					 
					
						
						
							
							[Core] Avoid KVCacheBlock.__eq__ invocations in FreeKVCacheBlockQueue ( #21005 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jialin Ouyang <jialino@meta.com > 
						
						
					 
					
						2025-07-18 12:34:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2eb2b5ad7 
					 
					
						
						
							
							[Kernel] Apply torch.Tag.needs_fixed_stride_order only for torch==2.6.0 ( #19346 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-07-18 14:10:21 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						21274ab476 
					 
					
						
						
							
							[CI] Update CODEOWNERS for vllm/compilation ( #21185 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com > 
						
						
					 
					
						2025-07-18 06:51:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed8cbfedf8 
					 
					
						
						
							
							Let GraniteMoeAttention use YaRN ( #21174 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-07-18 05:52:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						45badd05d0 
					 
					
						
						
							
							[Core] Set pooling params based on task and model ( #21128 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-18 05:41:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4adc66f64d 
					 
					
						
						
							
							[Bugfix] Allocate less memory in non-batched CUTLASS MoE ( #21121 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ElizaWszola <ewszola@redhat.com > 
						
						
					 
					
						2025-07-18 18:55:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55ad648715 
					 
					
						
						
							
							[Doc] Fix typo in model name ( #21178 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-18 03:55:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5895afd780 
					 
					
						
						
							
							[Bugfix] The special_tokens in tokenizer should also be controlled by do_lower_case in encoder_config. ( #20750 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-07-18 09:10:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca4eb82bcb 
					 
					
						
						
							
							[Model] Re-add the implicit conversion feature for as_seq_cls_model ( #21103 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-07-18 07:15:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba2dfbb0c2 
					 
					
						
						
							
							[Misc] Make MM embedding merge interface explicit in model runner ( #21147 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-18 07:13:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1bf65138f6 
					 
					
						
						
							
							[benchmark] Sending request strictly follows the random intervals ( #21108 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com > 
						
						
					 
					
						2025-07-18 06:22:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54cf1cae62 
					 
					
						
						
							
							[Misc] Do not print async output warning for v1 ( #21151 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-17 21:57:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5780121c95 
					 
					
						
						
							
							[Perf] Add swap_ab to SM90 FP8 non-block CUTLASS moe grouped gemm ( #20911 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shixian Cui <shixian@amazon.com >
Co-authored-by: Shixian Cui <shixian@amazon.com > 
						
						
					 
					
						2025-07-18 04:34:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7d8724e78 
					 
					
						
						
							
							[Core] FlashInfer CUTLASS fused MoE backend (NVFP4) ( #20037 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shuw <shuw@nvidia.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-17 21:32:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b38baabcf9 
					 
					
						
						
							
							[Doc] Add inplace weights loading example ( #19640 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-07-17 21:12:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						89cab4d01f 
					 
					
						
						
							
							[Attention] Make local attention backend agnostic ( #21093 )  
						
						 
						
						
						
						
					 
					
						2025-07-18 00:10:42 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9a21e9173 
					 
					
						
						
							
							[Docs] Update supported models documentation with missing models ( #20844 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com > 
						
						
					 
					
						2025-07-17 20:12:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c4e3b12524 
					 
					
						
						
							
							[Docs] Add minimal demo of Ray Data API usage ( #21080 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-17 20:09:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8dfb45ca33 
					 
					
						
						
							
							[Bugfix] Fix the tensor non-contiguous issue for Flashinfer TRT-LLM backend attention kernel ( #21133 )  
						
						 
						
						
						
						
					 
					
						2025-07-18 00:35:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a8fc94639 
					 
					
						
						
							
							[Log] Debugging Log with more Information ( #20770 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-18 00:19:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4de7146351 
					 
					
						
						
							
							[V0 deprecation] Remove V0 HPU backend ( #21131 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-17 16:37:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ac9fb732a5 
					 
					
						
						
							
							On environments where numa cannot be detected we get 0 ( #21115 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eric Curtin <ecurtin@redhat.com > 
						
						
					 
					
						2025-07-17 18:52:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3a6c695f4 
					 
					
						
						
							
							[Misc] Qwen MoE model supports LoRA ( #20932 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-17 18:32:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90bd2ab6e3 
					 
					
						
						
							
							[Model] Update pooling model interface ( #21058 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-17 16:05:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9fb2d22032 
					 
					
						
						
							
							[Performance] Performance improvements in non-blockwise fp8 CUTLASS MoE ( #20762 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ElizaWszola <ewszola@redhat.com > 
						
						
					 
					
						2025-07-17 09:56:44 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d6a38209b 
					 
					
						
						
							
							[Docs] Move code block out of admonition now that it's short ( #21118 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-17 06:12:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						89e3c4e9b4 
					 
					
						
						
							
							[Misc] Avoid unnecessary import ( #21106 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-07-17 12:57:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe8a2c544a 
					 
					
						
						
							
							[Docs] Improve docstring formatting for FusedMoEParallelConfig.make ( #21117 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-17 04:13:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ef00b5cac 
					 
					
						
						
							
							[VLM] Add Nemotron-Nano-VL-8B-V1 support ( #20349 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyle Huang <kylhuang@nvidia.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-07-17 03:07:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a7fb3ab9e 
					 
					
						
						
							
							[Model] Add ToolParser and MoE Config for Hunyuan A13B  ( #20820 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Asher Zhang <asherszhang@tencent.com > 
						
						
					 
					
						2025-07-17 09:10:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						11dfdf21bf 
					 
					
						
						
							
							[Kernel] DeepGemm MoE : Integrate triton permute / unpermute kernels  ( #20903 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-17 08:10:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fdc5b43d20 
					 
					
						
						
							
							[Bugfix]: Fix final_res_batch list index out of range error ( #21055 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-17 00:29:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c5b8b5953a 
					 
					
						
						
							
							[Misc] Fix PhiMoE expert mapping ( #21085 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-17 05:47:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4fcef49ec4 
					 
					
						
						
							
							[V1] [KVConnector] Fix MultiprocExecutor worker output aggregation ( #21048 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Ben-David <davidb@pliops.com >
Co-authored-by: David Ben-David <davidb@pliops.com > 
						
						
					 
					
						2025-07-17 13:29:45 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a4e5c5f3c 
					 
					
						
						
							
							[V1][P/D]Enhance Performance and code readability for P2pNcclConnector ( #20906 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abatom <abzhonghua@gmail.com > 
						
						
					 
					
						2025-07-16 22:13:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						76b494444f 
					 
					
						
						
							
							[Attention] Refactor attention metadata builder interface ( #20466 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-07-17 04:44:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						28a6d5423d 
					 
					
						
						
							
							[Bugfix] Fix Machete zero point issue for GPTQ models on SM90 ( #21066 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-16 19:54:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						58760e12b1 
					 
					
						
						
							
							[TPU] Start using python 3.12 ( #21000 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com > 
						
						
					 
					
						2025-07-16 19:37:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a50d918225 
					 
					
						
						
							
							[Docker] Allow FlashInfer to be built in the ARM CUDA Dockerfile ( #21013 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-16 19:37:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9ba8104ed 
					 
					
						
						
							
							[Bugfix] weight loading use correct tp_group with patch_tensor_parallel_group ( #21024 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: KevinXiong-C <kevin_xiong1997@outlook.com > 
						
						
					 
					
						2025-07-16 19:36:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e7dfbe7b4 
					 
					
						
						
							
							Update PyTorch to torch==2.7.1 for CUDA ( #21011 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-17 02:30:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72ad273582 
					 
					
						
						
							
							Remove torch_xla.tpu.version() from pallas.py. ( #21065 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-17 00:25:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01513a334a 
					 
					
						
						
							
							Support FP8 Quantization and Inference Run on Intel Gaudi (HPU) using INC (Intel Neural Compressor) ( #12010 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nir David <ndavid@habana.ai >
Signed-off-by: Uri Livne <ulivne@habana.ai >
Co-authored-by: Uri Livne <ulivne@habana.ai > 
						
						
					 
					
						2025-07-16 15:33:41 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ac2bf41e53 
					 
					
						
						
							
							[Model] Remove model sampler ( #21059 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-16 19:03:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a931b4cdcf 
					 
					
						
						
							
							Remove Qwen Omni workaround that's no longer necessary ( #21057 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-16 16:25:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0f8a79646 
					 
					
						
						
							
							[fix] fix qwen image_embeds input ( #21049 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: h-avsha <avshalom.manevich@hcompany.ai > 
						
						
					 
					
						2025-07-16 15:17:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						18bdcf4113 
					 
					
						
						
							
							feat - add a new endpoint get_tokenizer_info to provide tokenizer/chat-template information ( #20575 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: m-misiura <mmisiura@redhat.com > 
						
						
					 
					
						2025-07-16 21:52:14 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1c3198b6c4 
					 
					
						
						
							
							[Model] Consolidate pooler implementations ( #20927 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-16 13:39:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						260127ea54 
					 
					
						
						
							
							[Docs] Add intro and fix 1-2-3 list in frameworks/open-webui.md ( #19199 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-07-16 06:11:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d0dc4cfca4 
					 
					
						
						
							
							Fix inadvertently silenced PP tests for mp, add DeepSeek V2/V3 model family to PP tests ( #20831 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-07-16 00:14:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d31a647124 
					 
					
						
						
							
							[BugFix] Fix import error on non-blackwell machines ( #21020 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-07-15 22:27:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						85431bd9ad 
					 
					
						
						
							
							[TPU] fix kv_cache_update kernel block size choosing logic ( #21007 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-07-16 04:39:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c11013db8b 
					 
					
						
						
							
							[Meta] Llama4 EAGLE Support ( #20591 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qizixi <qizixi@meta.com >
Co-authored-by: qizixi <qizixi@meta.com > 
						
						
					 
					
						2025-07-15 21:14:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1eb2b9c102 
					 
					
						
						
							
							[CI] update typos config for CI pre-commit and fix some spells ( #20919 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Pan <Peter.Pan@daocloud.io > 
						
						
					 
					
						2025-07-15 21:12:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ebf313790 
					 
					
						
						
							
							Avoid direct comparison of floating point numbers ( #21002 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-07-15 21:12:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cfbcb9ed87 
					 
					
						
						
							
							[Voxtral] Add more tests ( #21010 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-15 21:11:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						76ddeff293 
					 
					
						
						
							
							[Doc] Remove duplicate docstring ( #21012 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-15 20:09:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f46098335b 
					 
					
						
						
							
							[Bugfix] Fix Mistral3 support on SM100/SM120 ( #20998 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-15 20:08:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e9534c7202 
					 
					
						
						
							
							[CI][HPU] update for v0 deprecate by switching to VLLM_TARGET_DEVICE=empty ( #21006 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chendi.Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-07-15 20:07:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7976446015 
					 
					
						
						
							
							Add Dockerfile argument for VLLM_USE_PRECOMPILED environment ( #20943 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dougbtv <dosmith@redhat.com > 
						
						
					 
					
						2025-07-15 19:53:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fcb9f879c1 
					 
					
						
						
							
							[Bugfix] Correct per_act_token in CompressedTensorsW8A8Fp8MoECutlassM… ( #20937 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-07-15 19:53:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ed94f9d0a 
					 
					
						
						
							
							[Docs] Enhance Anyscale documentation, add quickstart links for vLLM ( #21018 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-15 19:46:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fa839565f2 
					 
					
						
						
							
							[Misc] Refactor: Improve argument handling for conda command ( #20481 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-15 19:43:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						75a99b98bf 
					 
					
						
						
							
							[Chore] Remove outdated transformers check ( #20989 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca > 
						
						
					 
					
						2025-07-15 19:42:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b5c3b68359 
					 
					
						
						
							
							[Misc] bump xgrammar version to v0.1.21 ( #20992 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-15 19:42:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6cbc4d4bea 
					 
					
						
						
							
							[Model] Add ModelConfig class for GraniteMoeHybrid to override default max_seq_len_to_capture ( #20923 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-07-15 19:19:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						153c6f1e61 
					 
					
						
						
							
							[Frontend] Remove print left in FrontendArgs.add_cli_args ( #21004 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-15 19:18:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						34cda778a0 
					 
					
						
						
							
							[Frontend] OpenAI Responses API supports input image ( #20975 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-15 18:59:36 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						30800b01c2 
					 
					
						
						
							
							[Nvidia] Integrate SM100 cudnn prefill API to MLA prefill ( #20411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Elfie Guo <elfieg@nvidia.com >
Co-authored-by: Elfie Guo <eflieg@nvidia.com > 
						
						
					 
					
						2025-07-15 17:56:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10be209493 
					 
					
						
						
							
							[Bug Fix] get_distributed_init_method should get the ip from get_ip i… ( #20889 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Li <lcpingping@gmail.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com >
Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-07-15 21:23:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19c863068b 
					 
					
						
						
							
							[Frontend] Support cache_salt in /v1/completions and /v1/responses ( #20981 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com > 
						
						
					 
					
						2025-07-15 21:01:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f29fd8a7f8 
					 
					
						
						
							
							[BugFix] fix 3 issues: (1) using metadata for causal-conv1d, (2) indexing overflow in v1 vLLM, and (3) init_states in v0 ( #20838 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tuan M. Hoang-Trong <tmhoangt@us.ibm.com >
Co-authored-by: Tuan M. Hoang-Trong <tmhoangt@us.ibm.com > 
						
						
					 
					
						2025-07-15 16:08:26 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed10f3cea1 
					 
					
						
						
							
							[ROCm] warpSize is being made non constexpr in ROCm 7.0 ( #20330 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-07-15 14:01:44 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b637e9dcb8 
					 
					
						
						
							
							Add full serve CLI reference back to docs ( #20978 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-15 17:42:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1e36c8687e 
					 
					
						
						
							
							[Deprecation] Remove nullable_kvs ( #20969 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-15 17:21:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5bac61362b 
					 
					
						
						
							
							Configure Gemini ( #20971 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-15 09:37:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						313ae8c16a 
					 
					
						
						
							
							[Deprecation] Remove everything scheduled for removal in v0.10.0 ( #20979 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-15 15:57:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c847e34b39 
					 
					
						
						
							
							[CI/Build] Fix wrong path in Transformers Nightly Models Test ( #20994 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-15 08:53:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e7e3e6d263 
					 
					
						
						
							
							Voxtral ( #20970 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-07-15 07:35:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ffd963fa0 
					 
					
						
						
							
							[v1][core] Support for attention free models ( #20811 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Christian Pinto <christian.pinto@ibm.com > 
						
						
					 
					
						2025-07-15 14:20:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56fe4bedd6 
					 
					
						
						
							
							[Deprecation] Remove TokenizerPoolConfig ( #20968 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-15 14:00:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d91278181d 
					 
					
						
						
							
							[doc] Add more details for Ray-based DP ( #20948 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-15 05:37:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						20149d84d9 
					 
					
						
						
							
							[MISC] Add init files for python package ( #20908 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangli <wangli858794774@gmail.com > 
						
						
					 
					
						2025-07-15 12:16:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3534c39a20 
					 
					
						
						
							
							[V1] [Hybrid] Refactor mamba state shape calculation; enable V1 via cli  ( #20840 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-07-15 04:04:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c586b55667 
					 
					
						
						
							
							[TPU] Optimize kv cache update kernel ( #20415 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yifei Teng <tengyifei88@gmail.com > 
						
						
					 
					
						2025-07-15 03:56:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						33d560001e 
					 
					
						
						
							
							[Docs] Improve documentation for ray cluster launcher helper script ( #20602 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-15 03:55:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f148c44c6a 
					 
					
						
						
							
							[frontend] Refactor CLI Args for a better modular integration ( #20206 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com > 
						
						
					 
					
						2025-07-15 02:23:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						235bfd5dfe 
					 
					
						
						
							
							[Docs] Improve documentation for RLHF example ( #20598 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-15 01:54:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68d28e37b0 
					 
					
						
						
							
							[frontend] Add --help=page option for paginated help output ( #20961 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-15 00:42:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37a7d5d74a 
					 
					
						
						
							
							[Misc] Refactor AllReduceFusionPass. Remove parameter ( #20918 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ilmarkov <imarkov@redhat.com >
Co-authored-by: ilmarkov <imarkov@redhat.com > 
						
						
					 
					
						2025-07-15 06:57:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d4d309409f 
					 
					
						
						
							
							Implement Async Scheduling ( #19970 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-14 23:01:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						85bd6599e4 
					 
					
						
						
							
							[Model] Add AutoWeightsLoader support for BERT, RoBERTa ( #20534 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jennifer He <islandhe@gmail.com >
Signed-off-by: <islandhe@gmail.com >
Signed-off-by: Jen H <islandhe@gmail.com > 
						
						
					 
					
						2025-07-15 13:34:24 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						91b3d190ae 
					 
					
						
						
							
							[cold start] replace VLLM_COMPILE_DEPYF with debug_dump_dir ( #20940 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Boyuan Feng <boyuan@meta.com > 
						
						
					 
					
						2025-07-15 13:02:17 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc017915f5 
					 
					
						
						
							
							[Doc] Clearer mistral3 and pixtral model support description ( #20926 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-14 21:56:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ad0a4588b 
					 
					
						
						
							
							[Bugfix] Switch bailout logic for kv-cache-dtype with SM100 Flashinfer ( #20934 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pavani Majety <pmajety@nvidia.com > 
						
						
					 
					
						2025-07-15 03:27:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						016b8d1b7f 
					 
					
						
						
							
							Enabled BnB NF4 inference on Gaudi ( #20172 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ruheena Suhani Shaik <rsshaik@habana.ai > 
						
						
					 
					
						2025-07-14 20:26:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						80305c1b24 
					 
					
						
						
							
							[CI] Fix flaky test_streaming_response test ( #20913 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-14 20:15:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						37e2ecace2 
					 
					
						
						
							
							feat: add image zoom to improve image viewing experience ( #20763 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-14 20:14:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						054c8657e3 
					 
					
						
						
							
							[Docs] Add Kuberay to deployment integrations ( #20592 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-14 20:13:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d4170fad39 
					 
					
						
						
							
							Use w8a8 quantized matmul Pallas kernel ( #19170 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com > 
						
						
					 
					
						2025-07-15 03:06:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						946aadb4a0 
					 
					
						
						
							
							[CI/Build] Split Entrypoints Test into LLM and API Server ( #20945 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-15 02:44:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bcdfb2a330 
					 
					
						
						
							
							[Bugfix] Fix incorrect dispatch for CutlassBlockScaledGroupedGemm and DeepGEMM ( #20933 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-15 01:42:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba8c300018 
					 
					
						
						
							
							[BugFix] VLLM_DISABLE_COMPILE_CACHE=1 should disable all reads and writes from the cache ( #20942 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com > 
						
						
					 
					
						2025-07-15 01:26:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8cdc371217 
					 
					
						
						
							
							SM100 Cutlass MLA decode with unrestricted num_heads (< 128) for DeepSeek TP ( #20769 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexander Matveev <amatveev@redhat.com > 
						
						
					 
					
						2025-07-15 01:06:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61e20828da 
					 
					
						
						
							
							Fall back if flashinfer comm module not found ( #20936 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-07-14 23:11:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55e1c66da5 
					 
					
						
						
							
							[Docs] remove outdated performance benchmark ( #20935 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kuntai Du <kuntai@uchicago.edu > 
						
						
					 
					
						2025-07-14 22:14:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86f3ac21ce 
					 
					
						
						
							
							Fix overflow indexing in causal_conv1d kernel ( #20938 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-07-14 21:43:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						149f2435a5 
					 
					
						
						
							
							[Misc] Relax translations tests ( #20856 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-14 20:08:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c0569dbc82 
					 
					
						
						
							
							[Misc] ModularKernel : Perform WeightAndReduce inside TritonExperts & DeepGemmExperts ( #20725 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-14 19:47:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8bb43b9c9e 
					 
					
						
						
							
							Add benchmark dataset for mlperf llama tasks ( #20338 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-14 19:10:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						559756214b 
					 
					
						
						
							
							Change default model to Qwen3-0.6B ( #20335 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-07-14 16:54:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d0cf239c6 
					 
					
						
						
							
							[CI/Build] Add Transformers nightly tests in CI ( #20924 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-14 16:33:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3fc964433a 
					 
					
						
						
							
							[Misc] Clean up Aimv2 config registration in Ovis config ( #20921 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-14 15:36:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0caf61c08a 
					 
					
						
						
							
							[CI] Update codeowner for compilation code ( #20929 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-07-14 08:33:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						667624659b 
					 
					
						
						
							
							[CI] cc folks on changes to vllm/compilation ( #20925 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com > 
						
						
					 
					
						2025-07-14 07:52:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						38efa28278 
					 
					
						
						
							
							[Model] Add Ling implementation ( #20680 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vito.yy <vito.yy@antgroup.com > 
						
						
					 
					
						2025-07-14 22:10:32 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e8cc53af5e 
					 
					
						
						
							
							[Misc] Log the reason for falling back to FlexAttention ( #20699 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-14 04:16:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a4851cfe68 
					 
					
						
						
							
							[Bugfix]: Fix messy code when using logprobs ( #20910 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-14 11:06:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9887e8ec50 
					 
					
						
						
							
							[Misc] Remove unused function ( #20909 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-14 10:48:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f326ab9c88 
					 
					
						
						
							
							[Bugfix] Bump up mistral_common to support v13 tokenizer ( #20905 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-07-14 10:45:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dcf2a5e208 
					 
					
						
						
							
							[CI/Build] Fix OOM issue in Jina-VL test ( #20907 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-14 10:32:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1e9438e0b0 
					 
					
						
						
							
							[MISC] Move bind_kv_cache to worker module ( #20900 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-07-14 09:40:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						697ef765ee 
					 
					
						
						
							
							[Refactor][V1] Move outlines utils for V1 imports ( #20878 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-07-14 00:58:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a99b9f7dee 
					 
					
						
						
							
							[Quantization] add BNB for MixtralForCausalLM ( #20893 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-14 07:34:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c488b928a7 
					 
					
						
						
							
							[ROCm] [Bugfix] [Critical]: Fix mamba compilation bug ( #20883 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-07-14 15:23:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c7fa47161 
					 
					
						
						
							
							Fix: Add missing EOFError handling in CLI complete command ( #20896 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-14 07:09:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88fc8a97e3 
					 
					
						
						
							
							Removing redundant python version check ( #20888 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dannyso05 <dansong1177@gmail.com > 
						
						
					 
					
						2025-07-14 06:15:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						66f6fbd393 
					 
					
						
						
							
							[Prefix Cache] Add reproducible prefix-cache block hashing using SHA-256 + CBOR (64bit) ( #20511 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com > 
						
						
					 
					
						2025-07-14 02:45:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8632e831ba 
					 
					
						
						
							
							[Core] Add update_config RPC method ( #20095 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-07-14 00:49:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4bbfc36b16 
					 
					
						
						
							
							[V1] Hybrid allocator without prefix caching ( #20661 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com > 
						
						
					 
					
						2025-07-13 16:55:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						80d38b8ac8 
					 
					
						
						
							
							[V1] [ROCm] [AITER] Upgrade AITER to commit 916bf3c and bugfix APIs ( #20880 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-07-13 15:19:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						211b6a6113 
					 
					
						
						
							
							[Bugfix] fix define of RerankDocument ( #20877 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: liuchenlong <liuchenlong@xiaohongshu.com >
Co-authored-by: liuchenlong <liuchenlong@xiaohongshu.com > 
						
						
					 
					
						2025-07-13 14:32:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						247102f07f 
					 
					
						
						
							
							[Bugfix] Fix: add patch_rope_scaling after hf override ( #20857 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wang Siyuan <wsy0227@sjtu.edu.cn >
Signed-off-by: Wang Siyuan <sywang0227@gmail.com > 
						
						
					 
					
						2025-07-13 00:13:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bd4c1e6fdb 
					 
					
						
						
							
							Support for LlamaForSequenceClassification ( #20807 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: thechaos16 <thechaos16@gmail.com > 
						
						
					 
					
						2025-07-13 00:09:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						99b4f080d8 
					 
					
						
						
							
							Renable google/gemma-3-1b-it accuracy test. ( #20866 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-12 21:48:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						020f58abcd 
					 
					
						
						
							
							[Core] Support multiple tasks per model ( #20771 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-12 19:40:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c1acd6d7d4 
					 
					
						
						
							
							[Refactor] Change the way of import triton ( #20774 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-12 19:39:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3b3b778d4a 
					 
					
						
						
							
							[Bugfix] Fix a couple PPLX+CUTLASS MoE bugs ( #20825 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ElizaWszola <ewszola@redhat.com > 
						
						
					 
					
						2025-07-12 19:39:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						42d440c22b 
					 
					
						
						
							
							[Perf] Use Triton instead of Torch for DeepGEMM Per Token Group Quant ( #20841 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-12 19:38:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f45a332886 
					 
					
						
						
							
							[Sched] Enhance the logic to remove stopped requests from queues ( #20739 )  
						
						 
						
						
						
						
					 
					
						2025-07-12 15:33:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e2c176e1f 
					 
					
						
						
							
							[Bugfix] Restrict Machete to only run on Hopper ( #20830 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-12 17:34:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a86754a12b 
					 
					
						
						
							
							[docs] convert supported configs to table ( #20858 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-12 06:54:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c2a2f19aba 
					 
					
						
						
							
							[Bugfix] Fix Tensor Parallelism Padding Consistency in Granite Models ( #20843 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-07-12 06:11:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c11a738b3 
					 
					
						
						
							
							[Model] New model support for microsoft/Phi-4-mini-flash-reasoning ( #20702 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Congcong Chen <congcongchen@microsoft.com > 
						
						
					 
					
						2025-07-12 06:02:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b639327ad9 
					 
					
						
						
							
							Revert "Use NVCC --compress-mode to reduce binary size by 30%  #20694 " ( #20853 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-11 23:07:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4afe687a82 
					 
					
						
						
							
							Enable ModelOpt Llama4 fp8 checkpoint deployment ( #20419 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com > 
						
						
					 
					
						2025-07-11 23:07:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5de8d9f111 
					 
					
						
						
							
							Remove extra tensor on CPU ( #20693 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-07-12 14:06:34 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c1c8ca57ff 
					 
					
						
						
							
							[cold start time] add envs.VLLM_COMPILE_DEPYF to guard decompile ( #20790 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Boyuan Feng <boyuan@meta.com > 
						
						
					 
					
						2025-07-11 23:06:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3a5a47e48 
					 
					
						
						
							
							[Bugfix] Fix torch.compile x LoRA for PyTorch 2.8  ( #20823 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-07-11 23:06:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fb25e95688 
					 
					
						
						
							
							[Docs] Update basic.md ( #20846 )  
						
						 
						
						
						
						
					 
					
						2025-07-11 23:05:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d4891cd03 
					 
					
						
						
							
							[Bug] Fix DeepGemm for EP low latency case ( #20833 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-11 23:05:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f56d2996ca 
					 
					
						
						
							
							[Misc] Respect no_use_tqdm_on_load flag while capturing CUDA graph ( #20834 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun <github@lkchen.net > 
						
						
					 
					
						2025-07-11 23:04:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						147afb448b 
					 
					
						
						
							
							[Bugfix] Replace unavailable video url in multimodal test ( #20854 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-12 05:25:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c7d942da8 
					 
					
						
						
							
							[Frontend] Abstract prompt and SpeechToTextConfig for transcriptions models ( #20637 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-11 21:33:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						890323dc1b 
					 
					
						
						
							
							[Bugfix] : Fix typo - logger.warn_once -> logger.warning_once ( #20852 )  
						
						 
						
						
						
						
					 
					
						2025-07-11 20:56:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01cae37713 
					 
					
						
						
							
							[CI/Build] Ensure compatability with Transformers v4.53 ( #20541 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-11 20:53:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						11c0198615 
					 
					
						
						
							
							[Bugfix] Fix tensor parallel issue in Qwen3 reranker weight loading ( #20682 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-11 20:52:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b1235c3e10 
					 
					
						
						
							
							[Bugfix] Lazy import fused_experts in BitsAndBytesMoEMethod to avoid break not-cuda-alike devices  ( #20822 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-11 20:52:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						44d02f54db 
					 
					
						
						
							
							[Misc] Restrict deep_gemm's log output ( #20827 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-11 20:50:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8593237c0 
					 
					
						
						
							
							Add pynccl all-gatherv and reducescatterv ( #20154 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Trevor Morris <tmorris@nvidia.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-11 18:59:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc0f41d10a 
					 
					
						
						
							
							Integration SM100 FlashInfer fused allreduce RMSNorm ( #20691 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ilmarkov <imarkov@redhat.com >
Co-authored-by: ilmarkov <imarkov@redhat.com > 
						
						
					 
					
						2025-07-11 18:58:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b828e30d5 
					 
					
						
						
							
							[CI Bug] Fix Async Engine, Inputs, Utils, Worker Test: 'State' object has no attribute 'enable_server_load_tracking' ( #20845 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-11 18:57:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f0af36af5 
					 
					
						
						
							
							Update kimi-k2 tool calling docs, enable unit tests ( #20821 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangzhengtao <wangzhengtao@moonshot.cn >
Co-authored-by: wangzhengtao <wangzhengtao@moonshot.cn >
Co-authored-by: wangzhengtao <wangzhengtao@msh.team > 
						
						
					 
					
						2025-07-11 20:16:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d21b2664c 
					 
					
						
						
							
							[Bugfix] Fix OOM in language generation test ( #20814 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-11 11:21:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9907fc4494 
					 
					
						
						
							
							[Docs] Data Parallel deployment documentation ( #20768 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-11 09:42:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d47661f0cd 
					 
					
						
						
							
							[Kernel] Basic tuned configs for NVFP4 CUTLASS dense GEMM ( #20646 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-11 10:05:33 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53fa457391 
					 
					
						
						
							
							[Misc] Add unit tests for MoE ModularKernel combinations + Profiling utility ( #20449 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-11 07:51:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6fb162447b 
					 
					
						
						
							
							[doc] fix ordered list issue ( #20819 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-11 06:49:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						66177189c5 
					 
					
						
						
							
							[Bugfix] Add missing field to TritonLanguagePlaceholder ( #20812 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-11 05:25:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4f0b5f9aa 
					 
					
						
						
							
							Temporarily suspend google/gemma-3-1b-it. ( #20722 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-11 11:21:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cbd14ed561 
					 
					
						
						
							
							[Bugfix] Refactor /invocations to be task-agnostic ( #20764 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-11 03:20:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7bd4c37ae7 
					 
					
						
						
							
							[Core] Add Flashinfer TRTLLM Backend for Flashinfer decode path (SM100).  ( #19825 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pavani Majety <pmajety@nvidia.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: shuw <shuw@nvidia.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-11 09:23:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8020e98c9f 
					 
					
						
						
							
							[Quantization][1/N] MoE support BNB-Inflight Quantization ( #20061 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-11 08:01:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						762be26a8e 
					 
					
						
						
							
							[Bugfix] Upgrade depyf to 0.19 and streamline custom pass logging ( #20777 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedic <lgovedic@redhat.com >
Signed-off-by: luka <lgovedic@redhat.com > 
						
						
					 
					
						2025-07-11 00:15:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6a9e6b2abf 
					 
					
						
						
							
							[doc] fold long code block ( #20795 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-10 23:16:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5d09152ff1 
					 
					
						
						
							
							[V1] Enable Mamba2 layers other than MambaMixer2 in the v1 engine ( #20660 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com > 
						
						
					 
					
						2025-07-11 05:53:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31d5c1797f 
					 
					
						
						
							
							[Perf][fp8] Use CustomOp abstraction for fp8 quant for better perf ( #19830 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedic <lgovedic@redhat.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-11 04:56:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35514b682a 
					 
					
						
						
							
							[XPU] XCCL support enabled in torch 2.8.0.dev nightly builds ( #20705 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ratnampa <ratnam.parikh@intel.com > 
						
						
					 
					
						2025-07-10 20:39:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e2de455c34 
					 
					
						
						
							
							[Feature] Integrate SM100 DeepGEMM support ( #20087 )  
						
						 
						
						
						
						
					 
					
						2025-07-10 20:18:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b032352cc 
					 
					
						
						
							
							[Attention] MLA - Flashinfer Ragged Prefill ( #20034 )  
						
						 
						
						
						
						
					 
					
						2025-07-10 20:17:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						922f316441 
					 
					
						
						
							
							[Model] Support HF format of minimax ( #20211 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-11 02:55:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5923ab9524 
					 
					
						
						
							
							[fix]: disable cutlass block scaled group gemm for EP ( #20781 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Duncan Moss <djm.moss@gmail.com > 
						
						
					 
					
						2025-07-11 02:39:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0cf893cae1 
					 
					
						
						
							
							Add kimi-k2 tool parser ( #20789 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangzhengtao <wangzhengtao@moonshot.cn >
Co-authored-by: wangzhengtao <wangzhengtao@moonshot.cn >
Co-authored-by: wangzhengtao <wangzhengtao@msh.team > 
						
						
					 
					
						2025-07-11 10:36:23 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cf75cd2098 
					 
					
						
						
							
							[CI Bugfix] Specify same TORCH_CUDA_ARCH_LIST for flashinfer aot and install ( #20772 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-11 01:16:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b854321ffe 
					 
					
						
						
							
							[Docs] Lazy import gguf ( #20785 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-07-10 16:06:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b6fe23d05 
					 
					
						
						
							
							[Bugfix][Benchmark] Make sure the output length > 0 when testing prefill workload. ( #20786 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: KuntaiDu <kuntai@uchicago.edu >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-10 14:52:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f0c98cae27 
					 
					
						
						
							
							[Misc] MoE ModularKernel : Introduce TopKWeightAndReduce  ( #20648 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-10 14:40:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						574ad60db9 
					 
					
						
						
							
							[KVConnector] Always call connector clear_metadata() at end of step ( #20756 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: David Ben-David <sdavidbd@gmail.com > 
						
						
					 
					
						2025-07-10 22:37:27 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fdadb6f43a 
					 
					
						
						
							
							[Bugfix] Fused MoE Modular Kernel chunking loop ( #20392 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-10 20:31:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41060c6e08 
					 
					
						
						
							
							[Core] Add Support for Default Modality Specific LoRAs [generate / chat completions] ( #19126 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-07-10 21:09:37 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3de2ed767f 
					 
					
						
						
							
							[Bugfix] Remove assertion of expert_map being None ( #20714 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <yming@meta.com >
Signed-off-by: Ming Yang <minos.future@gmail.com > 
						
						
					 
					
						2025-07-10 19:55:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						299252ea82 
					 
					
						
						
							
							[CI] Fix pre commit issue ( #20782 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-10 12:48:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6902ce79f 
					 
					
						
						
							
							[V0][V1][Core] Add outlines integration for V1, and update V0 integration. ( #15975 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nathan Hoos <thwackyy.y@gmail.com > 
						
						
					 
					
						2025-07-10 15:30:26 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e53c89a74 
					 
					
						
						
							
							[Bugfix] [CI] Fix Tensorizer LoRA test ( #20760 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sanger Steel <sangersteel@gmail.com > 
						
						
					 
					
						2025-07-10 19:07:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c66e38ea4c 
					 
					
						
						
							
							[Test] Remove docker build from test. ( #20542 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-10 11:21:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						251595368f 
					 
					
						
						
							
							Fix DeepSeek-R1-0528 chat template ( #20717 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Merkel <benjamin.merkel@tngtech.com >
Co-authored-by: Benjamin Merkel <benjamin.merkel@tngtech.com > 
						
						
					 
					
						2025-07-10 17:47:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4bed167768 
					 
					
						
						
							
							[Model][VLM] Support JinaVL Reranker ( #20260 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shineran96 <shinewang96@gmail.com > 
						
						
					 
					
						2025-07-10 10:43:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b140416abf 
					 
					
						
						
							
							[Model] Add reason parser for Hunyuan A13B Model. ( #20625 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Asher Zhang <asherszhang@tencent.com > 
						
						
					 
					
						2025-07-10 16:33:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b8366b61a 
					 
					
						
						
							
							[ROCm][Regression] Remove tensor creation that harms performance on ROCm ( #20741 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-07-10 09:22:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7753a9809 
					 
					
						
						
							
							[Hardware][CPU] Vllm int8 quantization enablement for ARM CPU ( #14129 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nishith-fujitsu <nishith.jaiswal@fujitsu.com > 
						
						
					 
					
						2025-07-10 15:59:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b9a9435bb 
					 
					
						
						
							
							Update Dockerfile FlashInfer to v0.2.8rc1 ( #20718 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-10 08:09:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3482fd7e4e 
					 
					
						
						
							
							[Doc] Add engine args back in to the docs ( #20674 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-10 08:02:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						77f77a951e 
					 
					
						
						
							
							[Misc] Clean up mark to fork process in BNB tests ( #20692 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-10 13:59:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1a4f35e2ea 
					 
					
						
						
							
							Normalize lm-eval command between baseline and correctness test ( #18560 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-10 13:27:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						be1e128dfb 
					 
					
						
						
							
							[CI Bugfix] Skip failing Tensorizer+LoRA test ( #20724 )  
						
						 
						
						
						
						
					 
					
						2025-07-10 21:15:03 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65393ee064 
					 
					
						
						
							
							[doc] fix ordered list ( #20749 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-10 03:13:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc221ad72d 
					 
					
						
						
							
							[Bugfix][Build][Non-CUDA] Only referencing CMAKE_CUDA_COMPILER_VERSION on CUDA where it is defined ( #20738 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-07-10 02:58:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7571a4a7e5 
					 
					
						
						
							
							[CI/Build] Fix Basic Models Test ( #20728 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-10 09:57:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f67d986dd1 
					 
					
						
						
							
							[Misc] loose new-model tagger conditions ( #20747 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-10 02:54:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc876d0f29 
					 
					
						
						
							
							[KVConnector] Aggregate finished requests on the scheduler ( #19555 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Or Ozeri <oro@il.ibm.com > 
						
						
					 
					
						2025-07-10 09:22:18 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fdfd409f8f 
					 
					
						
						
							
							[TPU][Core]Make load weight exceed hbm error more instructive for customers ( #20644 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-07-10 07:01:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ffbcc9e757 
					 
					
						
						
							
							[BugFix] Fix VllmConfig() construction on all platforms ( #20695 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-10 07:00:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59389c927b 
					 
					
						
						
							
							[BugFix][CPU] Fix CPU worker dependency on cumem_allocator ( #20696 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-10 14:24:20 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f2720def9 
					 
					
						
						
							
							[Frontend] Support Tool Calling with both tool_choice='required' and $defs. ( #20629 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-10 13:56:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad6c2e1a0b 
					 
					
						
						
							
							Correct PPMissingLayer handling in Deepseek-V2-Lite PP deployment ( #20665 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-07-09 20:34:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						49e8c7ea25 
					 
					
						
						
							
							Use NVCC --compress-mode to reduce binary size by 30% ( #20694 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-09 18:26:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						805d62ca88 
					 
					
						
						
							
							[Misc] DP : Add ExpertTokensMetadata ( #20332 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun <vsundarr@redhat.com >
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-10 00:33:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b7d9e9416f 
					 
					
						
						
							
							[CI/Build] Fix FlashInfer double build in Dockerfile ( #20651 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-09 17:41:56 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c12a765aa 
					 
					
						
						
							
							[Misc] Simplify the prefix caching logic on draft tokens ( #20701 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-09 14:48:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd587c93ef 
					 
					
						
						
							
							[BugFix]: Properly set engine_id when using multi connector ( #19487 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: leiyiming <leiyiming@kingsoft.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-09 20:32:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						332d4cb17b 
					 
					
						
						
							
							[Feature][Quantization] MXFP4 support for MOE models ( #17888 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Felix Marty <felmarty@amd.com >
Signed-off-by: Bowen Bao <bowenbao@amd.com >
Signed-off-by: Felix Marty <Felix.Marty@amd.com >
Co-authored-by: Bowen Bao <bowenbao@amd.com > 
						
						
					 
					
						2025-07-09 13:19:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf03ff3575 
					 
					
						
						
							
							[Kernel] Add Conch backend for mixed-precision linear layer ( #19818 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jacob Manning <jmanning+oss@stackav.com > 
						
						
					 
					
						2025-07-09 13:17:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						47043eb678 
					 
					
						
						
							
							[Kernel] Triton implementation of causal-conv1d for Mamba-based models ( #18218 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tuan M. Hoang-Trong <tmhoangt@us.ibm.com >
Co-authored-by: Tuan M. Hoang-Trong <tmhoangt@us.ibm.com >
Co-authored-by: Tyler Michael Smith <tysmith@redhat.com >
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-07-09 12:53:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31b96d1c64 
					 
					
						
						
							
							Support Llama 4 for cutlass_moe_fp4 ( #20453 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-09 15:53:38 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e59ba9e142 
					 
					
						
						
							
							[CI/Build] Enlarge tolerance for a CPU multi-modal test ( #20684 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-09 17:48:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						403b481573 
					 
					
						
						
							
							Remove heading form installation inc.md file ( #20697 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-09 10:42:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						138709f8d1 
					 
					
						
						
							
							[Doc] Update CPU doc ( #20676 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-09 10:28:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0bbac1c1b4 
					 
					
						
						
							
							[Bench] Add NVFP4 GEMM benchmark script ( #20578 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-09 13:23:48 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3e4e85ece 
					 
					
						
						
							
							[XPU][CI] enhance xpu test support ( #20652 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ma, Liangliang <liangliang.ma@intel.com >
Co-authored-by: zhenwei-intel <zhenweiliu@habana.ai > 
						
						
					 
					
						2025-07-09 16:53:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eb58f5953d 
					 
					
						
						
							
							[TPU][Bugfix] fix test_pallas ( #20666 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-07-09 09:32:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ac9c33f78 
					 
					
						
						
							
							[Bugfix] Fix handling of Tensorizer arguments for LoadConfig ( #20643 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sanger Steel <sangersteel@gmail.com > 
						
						
					 
					
						2025-07-09 15:36:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						efe73d0575 
					 
					
						
						
							
							[doc] update doc format ( #20673 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-09 08:08:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						853487bc1b 
					 
					
						
						
							
							[Docs] Improve docs for RLHF co-location example ( #20599 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-09 08:06:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ff2af6d2b 
					 
					
						
						
							
							[Benchmark] Parameterization of streaming loading of multimodal datasets ( #20528 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangli <wangli858794774@gmail.com > 
						
						
					 
					
						2025-07-09 13:35:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						70ca5484f5 
					 
					
						
						
							
							[Doc] Update notes ( #20668 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-09 03:46:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5358cce5ff 
					 
					
						
						
							
							[V1] [Doc] Update V1 docs for Mamba models ( #20499 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-07-09 01:02:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2155e95ef1 
					 
					
						
						
							
							[Bugfix] Fix the issue where reasoning_content is None when Thinkng is enabled and tool_choice is set to 'required'. ( #20662 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-09 07:39:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f95570a52d 
					 
					
						
						
							
							[Docs] fix minimax tool_calling docs error ( #20667 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qingjun <qingjun@minimaxi.com > 
						
						
					 
					
						2025-07-09 00:37:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6e7e3d58f 
					 
					
						
						
							
							[Intel GPU] support ray as distributed executor backend for XPU. ( #20659 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-07-09 00:36:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e760fcef22 
					 
					
						
						
							
							[XPU] Use spawn with XPU multiprocessing ( #20649 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com > 
						
						
					 
					
						2025-07-09 00:34:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6bbf1795b7 
					 
					
						
						
							
							[Misc] Fix the size of batched_dummy_mm_inputs in profile_run ( #20434 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: bk-201 <joy25810@foxmail.com > 
						
						
					 
					
						2025-07-08 20:15:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e0ef888f0 
					 
					
						
						
							
							Fix bullets in incremental_build.md ( #20642 )  
						
						 
						
						
						
						
					 
					
						2025-07-09 11:03:41 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						97abeb1daa 
					 
					
						
						
							
							[feat] enable SM100 CUTLASS block scaled group gemm for smaller batch sizes ( #20640 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Duncan Moss <djm.moss@gmail.com > 
						
						
					 
					
						2025-07-09 11:03:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						34dad19e7b 
					 
					
						
						
							
							[Bugfix] set default set cuda_graph_sizes to min(self.max_num_seqs * 2, 512) ( #20628 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: izhuhaoran <izhuhaoran@qq.com > 
						
						
					 
					
						2025-07-09 11:02:51 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6db31e7a27 
					 
					
						
						
							
							[Hardware][PPC64LE] Enable V1 for ppc64le and ARM ( #20554 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Akash Kaothalkar <akash.kaothalkar@ibm.com >
Co-authored-by: Akash Kaothalkar <akash.kaothalkar@ibm.com >
Co-authored-by: Nikhil Gupta <nikhil.gupta2@arm.com > 
						
						
					 
					
						2025-07-08 20:00:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						977180c912 
					 
					
						
						
							
							[Docs] Improve documentation for multi-node service helper script ( #20600 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-08 19:44:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c40784c794 
					 
					
						
						
							
							[BugFix][Intel GPU] Use refactored API for dist_backend in V1 worker ( #20596 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ratnampa <ratnam.parikh@intel.com > 
						
						
					 
					
						2025-07-08 19:44:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						baed180aa0 
					 
					
						
						
							
							[tech debt] Revisit lora request model checker ( #20636 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com > 
						
						
					 
					
						2025-07-09 09:42:41 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b407479ef 
					 
					
						
						
							
							[misc]refactor Platform.set_device method ( #20262 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-07-09 01:39:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5eaf570050 
					 
					
						
						
							
							Replace multiply_add with homogeneous_multiply_add to Address Clang Template Parameter Issue ( #20142 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-07-09 00:30:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d8ee5a2ca4 
					 
					
						
						
							
							[TPU][Bugfix] disable phi-3 test ( #20632 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-08 23:14:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9fca83256 
					 
					
						
						
							
							[Bugfix] Fix GLM-4.1-V video prompt update ( #20635 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-08 23:13:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32dffc2772 
					 
					
						
						
							
							[Core] Rename get_max_tokens_per_item for backward compatibility ( #20630 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-08 23:11:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c438183e99 
					 
					
						
						
							
							[Bugfix] Fix topk_ids indices_type for CUTLASS w8a8 FP8 MoE ( #20166 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <yming@meta.com > 
						
						
					 
					
						2025-07-08 23:10:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						baba0389f7 
					 
					
						
						
							
							[CI] Increase the threshold of the MTEB RERANK tests ( #20615 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-07-08 08:10:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6c22f16d3 
					 
					
						
						
							
							Revert invalid spellchecker fix on deepseek_vl2 ( #20618 )  
						
						 
						
						
						
						
					 
					
						2025-07-08 15:07:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd382e0fe3 
					 
					
						
						
							
							[Model] Implement missing get_language_model for Keye-VL ( #20631 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-08 07:47:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						849590a2a7 
					 
					
						
						
							
							Update torch/xla pin to 20250703 ( #20589 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com > 
						
						
					 
					
						2025-07-08 07:44:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a4c23314c0 
					 
					
						
						
							
							[xpu]feat: support multi-lora on xpu ( #20616 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yan <yan.ma@intel.com > 
						
						
					 
					
						2025-07-08 22:07:10 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b942c094e3 
					 
					
						
						
							
							Stop using title frontmatter and fix doc that can only be reached by search ( #20623 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-08 03:27:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4bab81660 
					 
					
						
						
							
							Remove unnecessary explicit title anchors and use relative links instead ( #20620 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-08 02:49:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b91cb3fa5c 
					 
					
						
						
							
							[Docs] Improve documentation for Deepseek R1 on Ray Serve LLM ( #20601 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-08 02:09:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71d1d75b7a 
					 
					
						
						
							
							[PD][Nixl] Remote consumer READ timeout for clearing request blocks  ( #20139 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-08 08:56:40 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72d14d0eed 
					 
					
						
						
							
							[Frontend] [Core] Integrate Tensorizer in to S3 loading machinery, allow passing arbitrary arguments during save/load ( #19619 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sanger Steel <sangersteel@gmail.com >
Co-authored-by: Eta <esyra@coreweave.com > 
						
						
					 
					
						2025-07-07 22:47:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e34d130c16 
					 
					
						
						
							
							[TPU] Temporary fix vmem oom for long model len by reducing page size ( #20278 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-07-08 05:16:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7721ef1786 
					 
					
						
						
							
							[CI/Build][CPU] Fix CPU CI and remove all CPU V0 files ( #20560 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-07 22:13:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8369b7c2a9 
					 
					
						
						
							
							[Misc] improve error msg ( #20604 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-07 21:45:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3eb4ad53f3 
					 
					
						
						
							
							[Docs] Add Anyscale to frameworks ( #20590 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-07 20:09:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90a2769f20 
					 
					
						
						
							
							[Docs] Add Ray Serve LLM section to openai compatible server guide ( #20595 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-07 20:08:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e60d422f19 
					 
					
						
						
							
							[Docs] Improve docstring for ray data llm example ( #20597 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-07 20:06:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d914c81a2 
					 
					
						
						
							
							[Docs] Rewrite offline inference guide ( #20594 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ricardo Decal <rdecal@anyscale.com > 
						
						
					 
					
						2025-07-07 20:06:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e428cdd7a 
					 
					
						
						
							
							[Doc] Syntax highlight request responses as JSON instead of bash ( #20582 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-07 20:02:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93b9d9f499 
					 
					
						
						
							
							[Bugfix]: Fix messy code when using logprobs ( #19209 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-07-08 11:02:15 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						af107d5a0e 
					 
					
						
						
							
							Make distinct code and console admonitions so readers are less likely to miss them ( #20585 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-07 19:55:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31c5d0a1b7 
					 
					
						
						
							
							[Optimize] Don't send token ids when kv connector is not used ( #20586 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-07 19:04:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						afb7cff1b9 
					 
					
						
						
							
							[Bugfix] Fix Maverick correctness by filling zero to cache space in cutlass_moe ( #20167 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ming Yang <yming@meta.com > 
						
						
					 
					
						2025-07-08 01:07:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d2e841a10a 
					 
					
						
						
							
							[Misc] Improve logging for dynamic shape cache compilation ( #20573 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kyolebu <kyu@redhat.com > 
						
						
					 
					
						2025-07-08 00:48:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14601f5fba 
					 
					
						
						
							
							[Config] Refactor mistral configs  ( #20570 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com > 
						
						
					 
					
						2025-07-07 15:25:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						042d131f39 
					 
					
						
						
							
							Fix links in multi-modal model contributing page ( #18615 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-07 21:13:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e807cdfa4 
					 
					
						
						
							
							[Misc] feat output content in stream response ( #19608 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-07-07 20:45:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e601efcb10 
					 
					
						
						
							
							[Misc] Add fully interleaved support for multimodal 'string' content format ( #14047 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: drobyshev.anton <drobyshev.anton@wb.ru >
Co-authored-by: drobyshev.anton <drobyshev.anton@wb.ru > 
						
						
					 
					
						2025-07-07 19:43:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22dd9c2730 
					 
					
						
						
							
							[Kernel] Optimize Prefill Attention in Unified Triton Attention Kernel ( #20308 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jan van Lunteren <jvl@zurich.ibm.com > 
						
						
					 
					
						2025-07-07 19:08:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6d795d593 
					 
					
						
						
							
							[DP] Copy environment variables to Ray DPEngineCoreActors ( #20344 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-07-07 10:14:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a37d75bbec 
					 
					
						
						
							
							[Front-end] microbatch tokenization ( #19334 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zt2370 <ztang2370@gmail.com > 
						
						
					 
					
						2025-07-07 17:54:10 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						edd270bc78 
					 
					
						
						
							
							[Bugfix] Prevent IndexError for cached requests when pipeline parallelism is disabled ( #20486 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Pan <Peter.Pan@daocloud.io > 
						
						
					 
					
						2025-07-07 09:41:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						110df74332 
					 
					
						
						
							
							[Model][Last/4] Automatic conversion of CrossEncoding model ( #19675 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-07-07 14:46:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1ad69e8375 
					 
					
						
						
							
							[Doc] Fix some MkDocs snippets used in the installation docs ( #20572 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-07 07:44:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8a498c9b2 
					 
					
						
						
							
							[Doc] Add outline for content tabs ( #20571 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-07 07:43:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						923147b5e8 
					 
					
						
						
							
							[Doc] Fix internal links so they don't always point to latest ( #20563 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-07 04:15:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						45877ef740 
					 
					
						
						
							
							[Doc] Use gh-pr and gh-issue everywhere we can in the docs ( #20564 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-07 03:54:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e4bef1bea 
					 
					
						
						
							
							[Doc] Remove extra whitespace from CI failures doc ( #20565 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-07-07 03:35:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ff79a136e 
					 
					
						
						
							
							[Misc] Set the minimum openai version ( #20539 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-07 09:15:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						448acad31e 
					 
					
						
						
							
							[Misc] remove unused jinaai_serving_reranking ( #18878 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abirdcfly <fp544037857@gmail.com > 
						
						
					 
					
						2025-07-07 09:14:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eb0b2d2f08 
					 
					
						
						
							
							[Docs] Clean up tables in supported_models.md ( #20552 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-07-07 01:46:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3112271f6e 
					 
					
						
						
							
							[XPU] log clean up for XPU platform ( #20553 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yan <yan.ma@intel.com > 
						
						
					 
					
						2025-07-07 01:38:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1fd471e957 
					 
					
						
						
							
							Add docstrings to url_schemes.py to improve readability ( #20545 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-07-07 08:31:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c5ebec064 
					 
					
						
						
							
							[XPU][CI] add v1/core test in xpu hardware ci ( #20537 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ma, Liangliang <liangliang.ma@intel.com > 
						
						
					 
					
						2025-07-07 01:16:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e610deb72 
					 
					
						
						
							
							[CI/Build] Enable phi2 lora test ( #20540 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-07 05:10:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e2c19ce22 
					 
					
						
						
							
							[Refactor]Abstract Platform Interface for Distributed Backend and Add xccl Support for Intel XPU ( #19410 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dbyoung18 <yang5.yang@intel.com >
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com >
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-07-07 04:32:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						47db8c2c15 
					 
					
						
						
							
							[Misc] add a tip for pre-commit ( #20536 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-06 19:42:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						462b269280 
					 
					
						
						
							
							Implement OpenAI Responses API [1/N] ( #20504 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-06 18:32:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c18b3b8e8b 
					 
					
						
						
							
							[Bugfix] Add use_cross_encoder flag to use correct activation in ClassifierPooler ( #20527 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-06 14:01:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9528e3a05e 
					 
					
						
						
							
							[BugFix][Spec Decode] Fix spec token ids in model runner ( #20530 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-06 19:44:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9fb52e523a 
					 
					
						
						
							
							[V1] Support any head size for FlexAttention backend ( #20467 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-06 09:54:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e202dd2736 
					 
					
						
						
							
							[V0 deprecation] Remove V0 CPU/XPU/TPU backends ( #20412 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Signed-off-by: jiang1.li <jiang1.li@intel.com >
Co-authored-by: Li, Jiang <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-06 08:48:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						43813e6361 
					 
					
						
						
							
							[Misc] call the pre-defined func ( #20518 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-06 10:25:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cede942b87 
					 
					
						
						
							
							[Benchmark] Add support for multiple batch size benchmark through CLI in benchmark_moe.py ( #20516 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca > 
						
						
					 
					
						2025-07-06 09:20:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe1e924811 
					 
					
						
						
							
							[Frontend] Support image object in llm.chat ( #19635 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com >
Signed-off-by: Flora Feng <4florafeng@gmail.com > 
						
						
					 
					
						2025-07-06 06:47:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4548c03c50 
					 
					
						
						
							
							[TPU][Bugfix] fix the MoE OOM issue ( #20339 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-07-05 21:19:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						40b86aa05e 
					 
					
						
						
							
							[BugFix] Fix: ImportError when building on hopper systems ( #20513 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-07-06 12:17:30 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						432870829d 
					 
					
						
						
							
							[Bugfix] Fix missing per_act_token parameter in compressed_tensors_moe ( #20509 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com > 
						
						
					 
					
						2025-07-06 12:08:30 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f73d02aadc 
					 
					
						
						
							
							[BUG]  Fix   #20484 . Support empty sequence in cuda penalty kernel ( #20491 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Vadim Gimpelson <vadim.gimpelson@centml.ai > 
						
						
					 
					
						2025-07-05 19:38:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c5ebe040ac 
					 
					
						
						
							
							test_attention compat with coming xformers change ( #20487 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-07-05 19:37:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d763cb891 
					 
					
						
						
							
							[Misc] remove unused import ( #20517 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-05 19:17:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cf4cd53982 
					 
					
						
						
							
							[Misc] Add logger.exception for TPU information collection failures ( #20510 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-05 07:24:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32c9be2200 
					 
					
						
						
							
							[v1] Re-add fp32 support to v1 engine through FlexAttention ( #19754 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-05 09:41:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8aeaa910a2 
					 
					
						
						
							
							Fix unknown attribute of topk_indices_dtype in CompressedTensorsW8A8Fp8MoECutlassMethod ( #20507 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com > 
						
						
					 
					
						2025-07-05 14:03:20 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						906e05d840 
					 
					
						
						
							
							[Misc] Remove the unused LoRA test code ( #20494 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-05 13:48:16 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ef9a2990ae 
					 
					
						
						
							
							[doc] small fix ( #20506 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-04 20:56:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e90870491 
					 
					
						
						
							
							[Misc] Add security warning for development mode endpoints ( #20508 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-04 20:52:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3f05c9248 
					 
					
						
						
							
							[Doc] fix mutltimodal_inputs.md gh examples link ( #20497 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guy Stone <guys@spotify.com > 
						
						
					 
					
						2025-07-04 16:41:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c108781c85 
					 
					
						
						
							
							[CI Bugfix] Fix pre-commit failures on main ( #20502 )  
						
						 
						
						
						
						
					 
					
						2025-07-04 14:17:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d184b95b8 
					 
					
						
						
							
							[feat]: CUTLASS block scaled group gemm for SM100 ( #19757 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Duncan Moss <djm.moss@gmail.com >
Co-authored-by: Duncan Moss <dmoss@nvidia.com > 
						
						
					 
					
						2025-07-04 12:58:04 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f35a022e6 
					 
					
						
						
							
							Enable V1 for Hybrid SSM/Attention Models ( #20016 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Co-authored-by: Stanislaw Wozniak <stw@zurich.ibm.com >
Co-authored-by: Tyler Michael Smith <tysmith@redhat.com >
Co-authored-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-07-04 17:46:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ffe00ef77a 
					 
					
						
						
							
							[Misc] Small: Remove global media connector. Each test should have its own test connector object. ( #20395 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-07-04 08:15:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5561681d04 
					 
					
						
						
							
							[CI] add kvcache-connector dependency definition and add into CI build ( #18193 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Peter Pan <Peter.Pan@daocloud.io > 
						
						
					 
					
						2025-07-04 06:49:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fbd62d8750 
					 
					
						
						
							
							[Doc] Fix classification table in list of supported models ( #20489 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-04 06:08:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e26f9156a 
					 
					
						
						
							
							[Model][3/N] Automatic conversion of CrossEncoding model ( #20168 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-07-04 05:47:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e5452ee34 
					 
					
						
						
							
							[Bug][Frontend] Fix structure of transcription's decoder_prompt ( #18809 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sangbumlikeagod <oironese@naver.com > 
						
						
					 
					
						2025-07-04 11:28:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e3fe896e2 
					 
					
						
						
							
							Support Llama 4 for fused_marlin_moe ( #20457 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-04 07:55:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1caca5a589 
					 
					
						
						
							
							[Misc] Add SPDX-FileCopyrightText ( #20428 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-04 07:40:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						783921d889 
					 
					
						
						
							
							[Perf] Optimize Vectorization Utils for Int 8 Quantization Kernels ( #20331 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-04 15:06:24 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4a98edff1f 
					 
					
						
						
							
							[Structured Outputs][V1] Skipping with models doesn't contain tokenizers ( #20365 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-04 15:05:49 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a7bab0c9e5 
					 
					
						
						
							
							[Misc] small update ( #20462 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-03 20:33:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						25950dca9b 
					 
					
						
						
							
							Add ignore consolidated file in mistral example code ( #20420 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-07-04 02:55:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a4113b035c 
					 
					
						
						
							
							[Platform] Add custom default max tokens ( #18557 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gabriel Marinho <gmarinho@ibm.com > 
						
						
					 
					
						2025-07-04 10:50:17 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e1665b089 
					 
					
						
						
							
							[Misc] Change warn_for_unimplemented_methods to debug ( #20455 )  
						
						 
						
						
						
						
					 
					
						2025-07-04 02:35:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d1096e7db 
					 
					
						
						
							
							[Bugfix] Register reducer even if transformers_modules not available ( #19510 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-07-03 22:08:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d775dd30a 
					 
					
						
						
							
							[Misc] Fix Unable to detect current VLLM config. Defaulting to NHD kv cache layout warning ( #20400 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-03 14:56:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78fe77534b 
					 
					
						
						
							
							[Kernel] Enable fp8 support for pplx and BatchedTritonExperts. ( #18864 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-07-03 14:55:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f2fcb31b8 
					 
					
						
						
							
							[Misc] Remove _maybe_ignore_quant_config from GLM4.1v ( #20432 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com > 
						
						
					 
					
						2025-07-03 21:41:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1dba2c4ebe 
					 
					
						
						
							
							[Misc] adjust for ipv6 for mookcacke url parse ( #20107 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-07-03 20:27:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71d6de3a26 
					 
					
						
						
							
							[Misc] Clean up InternVL family config registration ( #19992 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-03 20:01:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						536fd33003 
					 
					
						
						
							
							[CI] Trimming some failing test groups from AMDPRODUCTION. ( #20390 )  
						
						 
						
						
						
						
					 
					
						2025-07-03 08:21:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						619b9f5c7e 
					 
					
						
						
							
							[Frontend] fix duplicate output for bench subcmd ( #20446 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-03 08:02:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1b689c445 
					 
					
						
						
							
							[Bugfix] Fix flaky test_streaming_response test ( #20363 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-03 14:46:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9854dc9040 
					 
					
						
						
							
							[Frontend] improve vllm bench <bench_type> --help display ( #20430 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-03 14:22:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ff5c60fad8 
					 
					
						
						
							
							[Misc] Automatically tag PRs to add new models ( #20222 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-03 07:11:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f1229f91d 
					 
					
						
						
							
							[Model][2/N] Automatic conversion of CrossEncoding model ( #19978 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-07-03 13:59:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1819fbda63 
					 
					
						
						
							
							[Quantization] Bump to use latest bitsandbytes ( #20424 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-03 21:58:46 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f0367109e 
					 
					
						
						
							
							[CI/Build][CPU] Enable cross compilation in CPU release pipeline ( #20423 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-03 05:26:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fb14d53cf6 
					 
					
						
						
							
							[Kernel] refactor cpu worker v0 cache dtype ( #20080 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-07-03 08:39:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b024a42e93 
					 
					
						
						
							
							[Core] Move multimodal placeholder from chat utils to model definition ( #20355 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-03 08:18:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb97f2bfc5 
					 
					
						
						
							
							[Docs] Replace two list with tables in intel_gaudi.md ( #20414 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-07-03 00:48:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						359200f6ac 
					 
					
						
						
							
							[doc] fix link ( #20417 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-03 00:21:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						220aee902a 
					 
					
						
						
							
							[Misc] Add rules to label Speculative Decoding Related PRs ( #20406 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lifan Shen <lifans@meta.com > 
						
						
					 
					
						2025-07-02 23:56:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67d25eca05 
					 
					
						
						
							
							[Tests] Update online DP tests to verify that requests are balanced ( #20157 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-03 14:49:13 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						363528de27 
					 
					
						
						
							
							[Feature] Support MiniMax-M1 function calls features ( #20297 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: QscQ <qscqesze@gmail.com >
Signed-off-by: qingjun <qingjun@minimaxi.com > 
						
						
					 
					
						2025-07-03 06:48:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ff61ababa 
					 
					
						
						
							
							[TPU] Add a case to cover RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8 ( #20385 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-07-03 06:46:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ec3779df7 
					 
					
						
						
							
							[Bugfix][CI/CD][CPU] Fix CPU CI tests ( #20383 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-02 20:11:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b616f6a53d 
					 
					
						
						
							
							[Misc] Small: Fix video loader return type annotations. ( #20389 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-07-03 03:10:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e25bb12a8 
					 
					
						
						
							
							[Bugfix] Fix import of CutlassExpertsFp8 in compressed_tensors_moe.py ( #20381 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-07-03 02:07:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9965c47d0d 
					 
					
						
						
							
							Enable CPU nightly performance benchmark and its Markdown report ( #18444 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tsai, Louie <louie.tsai@intel.com > 
						
						
					 
					
						2025-07-02 17:50:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						059d4cdb49 
					 
					
						
						
							
							[BugFix] Fix DP headless mode arg validation ( #20398 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-02 17:15:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bdb84e26b0 
					 
					
						
						
							
							[Bugfix] Fixes for FlashInfer's TORCH_CUDA_ARCH_LIST ( #20136 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com >
Signed-off-by: Tyler Michael Smith <tysmith@redhat.com > 
						
						
					 
					
						2025-07-02 17:15:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3dd359147d 
					 
					
						
						
							
							[Docs] Update EAGLE example ( #20375 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-02 17:13:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						657f2f301a 
					 
					
						
						
							
							[DP] Support external DP Load Balancer mode ( #19790 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-02 10:21:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a1aafc827a 
					 
					
						
						
							
							[ROCm][FEAT] Enable Full Graph Mode in AITER MLA V1 Attn Backend (Decode Phase only) ( #20254 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-07-02 16:25:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						139508a418 
					 
					
						
						
							
							[Misc] add handler HF_TOKEN is emptry string ( #20369 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-07-02 09:14:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d265414dbc 
					 
					
						
						
							
							[Minor] Clean up incorrect comment in test ( #20382 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-02 09:13:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48fb076cbc 
					 
					
						
						
							
							[V1] LogitsProcessor programming model ( #16728 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Signed-off-by: Andrew Feldman <afeldman@neuralmagic.com >
Signed-off-by: Andrew Feldman <afeldman@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-07-02 09:10:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c1909e7e8c 
					 
					
						
						
							
							[Kernels] MoE refactor ( #19636 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com >
Signed-off-by: ElizaWszola <ewszola@redhat.com >
Co-authored-by: ElizaWszola <ewszola@redhat.com > 
						
						
					 
					
						2025-07-02 06:08:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b95877509b 
					 
					
						
						
							
							Documentation update tool_calling: mapping back to function from response ( #20373 )  
						
						 
						
						
						
						
					 
					
						2025-07-02 05:55:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						706ff13224 
					 
					
						
						
							
							[Model] Adds support for SlimMoE models Phi-tiny-MoE-instruct ( #20286 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zichong Li <t-lizichong@microsoft.com @Reasoning-H100-VM3.drbuo4tcjzruhloch3eo0b25ef.cx.internal.cloudapp.net>
Co-authored-by: Zichong Li <t-lizichong@microsoft.com @Reasoning-H100-VM3.drbuo4tcjzruhloch3eo0b25ef.cx.internal.cloudapp.net>
Co-authored-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-07-02 12:54:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ccbfb1d1c9 
					 
					
						
						
							
							[Bugfix] Fix the max_seq_len limit of 16384 for DeepSeek models ( #20322 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wang Huaqiang <huaqiang.wang@intel.com > 
						
						
					 
					
						2025-07-02 12:53:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e5552aa13 
					 
					
						
						
							
							[NVIDIA] Support Cutlass w8a8 FP8 for Blackwell Geforce GPUs (sm120) ( #17280 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kaln27 <liaojuncheng123@foxmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-02 06:47:19 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c600b9ab6 
					 
					
						
						
							
							[Build/CI] Automatically tag DeepSeek related PRs ( #20370 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-07-02 04:02:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e303dcf523 
					 
					
						
						
							
							[Model] Add Ernie4.5 and Ernie4.5MoE Model Support ( #20220 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangyafeng <wangyafeng@baidu.com > 
						
						
					 
					
						2025-07-02 03:37:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ae9c4d416f 
					 
					
						
						
							
							[Docs] Make TPU ref prettier in google_tpu.md ( #20356 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-07-02 02:04:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d853520b3e 
					 
					
						
						
							
							[Docs] Fix indentations for 2-level items in deprecation_policy.md ( #20352 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-07-01 23:50:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba51aea65e 
					 
					
						
						
							
							[Bugfix] Keye-VL compatibility with tok_kwargs ( #20058 ) ( #20353 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-01 23:46:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8452946c06 
					 
					
						
						
							
							[Model][VLM] Support Keye-VL-8B-Preview ( #20126 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kwai-Keye <Keye@kuaishou.com > 
						
						
					 
					
						2025-07-01 23:35:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e7cbf2d7d 
					 
					
						
						
							
							[Frontend] Support configurable mm placeholder strings & flexible video sampling policies via CLI flags. ( #20105 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-07-01 23:34:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7da296be04 
					 
					
						
						
							
							[TPU] kv cache update kernel supports dynamic grid ( #20235 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-07-02 06:33:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b205e8467d 
					 
					
						
						
							
							[Doc][TPU] Add models and features supporting matrix. ( #20230 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <cuiq@google.com > 
						
						
					 
					
						2025-07-02 06:33:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						be0cfb2b68 
					 
					
						
						
							
							fix[Docs]: link anchor is incorrect  #20309  ( #20315 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zxw <1020938856@qq.com > 
						
						
					 
					
						2025-07-02 06:32:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1a03dd496b 
					 
					
						
						
							
							[Bugfix] Fix dynamic rotary embedding ( #20343 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-07-02 06:31:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27b8017636 
					 
					
						
						
							
							[FIX][Intel GPU]fix ipex flash_attn_varlen_func api missing parameter ( #20348 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-07-01 22:26:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ec1e3065a 
					 
					
						
						
							
							[Misc][Doc] Add missing comment for LLM ( #20285 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lifan Shen <lifans@meta.com > 
						
						
					 
					
						2025-07-01 19:04:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9dae7d46bf 
					 
					
						
						
							
							[Refactor] Remove Unused Env VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON ( #20334 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-01 19:03:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7058d7dd5d 
					 
					
						
						
							
							[Refactor] Remove duplicate find_free_port ( #20333 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-01 19:03:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0389e0554 
					 
					
						
						
							
							[UT][intel GPU] use current_platform instead of device hardcode in v1 tests ( #20169 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ma, Liangliang <liangliang.ma@intel.com > 
						
						
					 
					
						2025-07-02 09:06:04 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3be8d312a2 
					 
					
						
						
							
							[Kernel][Bugfix] Fixup some warnings in nvfp4_blockwise_moe when CUDA < 12.8 ( #20324 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-07-01 18:05:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3abfe22154 
					 
					
						
						
							
							Enable group size 64 for Machete ( #20290 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: czhu-cohere <conway.zhu@cohere.com > 
						
						
					 
					
						2025-07-01 18:05:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e81fbefe8a 
					 
					
						
						
							
							[Refactor] Refactor import utils ( #20269 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-07-01 18:05:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9290de5667 
					 
					
						
						
							
							remove unused variables in marlin_template.h ( #20236 )  
						
						 
						
						
						
						
					 
					
						2025-07-02 00:51:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f280d69c9 
					 
					
						
						
							
							[Optimization] Cache sampled token ids in model runner ( #20291 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-01 11:01:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						02cabff207 
					 
					
						
						
							
							[V1] [ROCm] Enable EP with AITER Fused MoE ( #20270 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-07-01 16:48:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d19d47d91 
					 
					
						
						
							
							[Frontend] Expand tools even if tool_choice="none" ( #17177 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: okada shintarou <okada@preferred.jp > 
						
						
					 
					
						2025-07-01 12:47:38 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8acb4badee 
					 
					
						
						
							
							[CUDA graphs] Enable full cuda graphs with FA3 AoT scheduling ( #20301 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-01 09:07:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						314af8617c 
					 
					
						
						
							
							[Docs] Update transcriptions API to use openai client with stream=True  ( #20271 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-01 15:47:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e96cc9b7e 
					 
					
						
						
							
							[Misc] Minor refactoring for scheduler ( #20299 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-07-01 07:55:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ecad851cbd 
					 
					
						
						
							
							[Model]Add Tencent HunYuanMoEV1 Model Support ( #20114 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: aiyiwang <aiyiwang@tencent.com >
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: quinnrong <quinnrong@tencent.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-07-01 07:28:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed70f3c64f 
					 
					
						
						
							
							Add GLM4.1V model (Draft) ( #19331 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-07-01 12:48:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						650d5dbd04 
					 
					
						
						
							
							[Misc] Minor refactor of NIXL background handshake ( #20068 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-07-01 12:40:14 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9025a9a705 
					 
					
						
						
							
							[Quant] [Bugfix] Fix quantization config matching with hf_to_vllm_mapper ( #20046 )  
						
						 
						
						
						
						
					 
					
						2025-07-01 19:20:34 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c05596f1a3 
					 
					
						
						
							
							[Perf] Validate @config in pre-commit instead of dynamically ( #20200 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lionel Villard <villard@us.ibm.com > 
						
						
					 
					
						2025-07-01 05:10:28 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						787b13389e 
					 
					
						
						
							
							[doc] fix the incorrect logo in dark mode ( #20289 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-07-01 08:18:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						96453cfa83 
					 
					
						
						
							
							[BugFix][V1][ROCm] Triton MLA uses V0 backend on V1 engine ( #19067 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tianyuan Wu <Tianyuan.Wu@amd.com > 
						
						
					 
					
						2025-07-01 16:12:19 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b1c1fe35a5 
					 
					
						
						
							
							[Misc] remove redundant char ( #20287 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-07-01 15:33:22 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08d81f1014 
					 
					
						
						
							
							[Bugfix] Fix deepep tests ( #20288 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-07-01 15:29:08 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6cc1e7d96d 
					 
					
						
						
							
							[CPU] Update custom ops for the CPU backend ( #20255 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-07-01 07:25:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9909726d2a 
					 
					
						
						
							
							Enable ZP Support for Machete ( #20268 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: czhu-cohere <conway.zhu@cohere.com > 
						
						
					 
					
						2025-07-01 07:12:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22e9d42040 
					 
					
						
						
							
							[Misc] add xgrammar for arm64 ( #18359 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com > 
						
						
					 
					
						2025-07-01 07:02:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86debab54c 
					 
					
						
						
							
							Fix numel() downcast in vllm/csrc/moe/moe_align_sum_kernels.cu +2 ( #17082 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-01 06:48:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						be250bbc67 
					 
					
						
						
							
							[V1] Only print cudagraph tqdm on rank 0 with is_global_first_rank ( #19516 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-01 06:02:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27949354fa 
					 
					
						
						
							
							[Feature] A calibration-free RTN-based quantization for accurate and accelerated INT4/INT8 inference ( #18768 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex Kogan <alex.kogan@oracle.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-07-01 05:44:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bd5038af07 
					 
					
						
						
							
							[Doc] add config and troubleshooting guide for NCCL & GPUDirect RDMA ( #15897 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ernest Wong <chwong719@gmail.com > 
						
						
					 
					
						2025-06-30 21:44:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a2f14dc8f9 
					 
					
						
						
							
							[CI][Intel Gaudi][vllm-Plugin]Add CI for hpu-plugin-v1-test ( #20196 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chendi Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-07-01 04:17:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						92ee7baaf9 
					 
					
						
						
							
							[Example] add one-click runnable example for P2P NCCL XpYd ( #20246 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: KuntaiDu <kuntai@uchicago.edu > 
						
						
					 
					
						2025-06-30 21:03:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7151f92241 
					 
					
						
						
							
							[Misc] Fix spec decode example ( #20296 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-30 21:01:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e28533a16f 
					 
					
						
						
							
							[Bugfix] Fix include prompt in stream response when echo=true ( #15233 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yuan Fang <yuanfang@alauda.io > 
						
						
					 
					
						2025-07-01 01:30:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d42ce8315 
					 
					
						
						
							
							[CLI] Improve CLI arg parsing for -O/--compilation-config ( #20156 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: luka <luka@neuralmagic.com > 
						
						
					 
					
						2025-07-01 01:03:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ded1fb635b 
					 
					
						
						
							
							[Bugfix][V1][P/D]Fix the issue of occasional garbled output  for P2pNcclConnector ( #20263 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abatom <abzhonghua@gmail.com > 
						
						
					 
					
						2025-06-30 16:45:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						97d9524fe9 
					 
					
						
						
							
							[Refactor] Remove useless pdb comment ( #20266 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-30 18:15:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d8cf819a9a 
					 
					
						
						
							
							[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models ( #20058 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyle Sayers <kylesayrs@gmail.com > 
						
						
					 
					
						2025-06-30 17:26:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						551ef1631a 
					 
					
						
						
							
							[Unit Test] Add unit test for deep gemm ( #20090 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-06-30 10:26:42 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2863befce3 
					 
					
						
						
							
							[Optimization] Use Shared CachedRequestData Instance Across All Requests ( #20232 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-30 09:07:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2965c99c86 
					 
					
						
						
							
							[Spec Decode] Clean up spec decode example ( #20240 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-30 08:28:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2062c0723d 
					 
					
						
						
							
							[Spec Decode] Refactor spec decoding into a separate function ( #20238 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-30 08:13:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1c50e100a9 
					 
					
						
						
							
							[Bugfix] fix quark ptpc ( #20251 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Haoyang Li <Haoyang.Li@amd.com >
Co-authored-by: Haoyang Li <307790822@qq.com > 
						
						
					 
					
						2025-06-30 22:24:50 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ee56e26be 
					 
					
						
						
							
							[Docs] Fix 1-2-3 list in v1/prefix_caching.md ( #20243 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-06-30 11:20:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8fe7fc8634 
					 
					
						
						
							
							[Quantization] Improve BitsAndBytesModelLoader ( #20242 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-30 18:22:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e936e401de 
					 
					
						
						
							
							[Bugfix] Fix processor initialization in transformers 4.53.0 ( #20244 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-30 10:16:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f5dfa07531 
					 
					
						
						
							
							[Bugfix] Skip loading extra parameters for modelopt Qwen3 MoE model ( #19598 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: noiji <> 
						
						
					 
					
						2025-06-30 18:21:56 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						022c58b80f 
					 
					
						
						
							
							[doc] Add Slack and Forum to the top navigation ( #20208 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-30 07:53:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19108ef311 
					 
					
						
						
							
							[Misc] Fix import ( #20233 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-29 20:34:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a52f389dd 
					 
					
						
						
							
							[BUGFIX][DEEPSEEK][MODEL_LOAD] fix w13, w2 weight not initialized assert ( #20202 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chendi Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-06-29 19:46:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65b1cbb138 
					 
					
						
						
							
							[Model] support dots1 ( #18254 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: redmoe-moutain <agiredmoe@gmail.com > 
						
						
					 
					
						2025-06-29 19:34:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6c9837a761 
					 
					
						
						
							
							Fix cuda_archs_loose_intersection when handling sm_*a ( #20207 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-06-29 16:52:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f2f53a82d 
					 
					
						
						
							
							[Quantization] Add compressed-tensors NVFP4 MoE Support ( #19990 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com >
Signed-off-by: Dipika <dipikasikka1@gmail.com > 
						
						
					 
					
						2025-06-29 22:05:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b1895e6ce 
					 
					
						
						
							
							[CI Fix] Try fixing eagle e2e test OOM by reducing block allocation ( #20213 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-29 10:31:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d36693687 
					 
					
						
						
							
							[Refactor] Create a function util and cache the results for has_deepgemm, has_deepep, has_pplx ( #20187 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-28 22:06:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						daec9dea6e 
					 
					
						
						
							
							[Bugfix] Correct behavior of GraniteMoeHybrid for TensorParallel execution ( #20137 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Stanislaw Wozniak <stw@zurich.ibm.com > 
						
						
					 
					
						2025-06-28 08:16:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						daceac57c7 
					 
					
						
						
							
							[Frontend] Generalize v1/audio/transcriptions endpoint ( #20179 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-06-28 08:15:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8615d9776f 
					 
					
						
						
							
							[CI/Build] Add new CI job to validate Hybrid Models for every PR  ( #20147 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-06-27 23:00:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b460c25f9 
					 
					
						
						
							
							[BugFix] Fix the incorrect func name in the comments. (config.py) ( #20185 )  
						
						 
						
						
						
						
					 
					
						2025-06-27 22:51:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f719772281 
					 
					
						
						
							
							[Bugfix] Properly reject requests with empty list guided_choice ( #20195 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-27 22:50:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d45417b804 
					 
					
						
						
							
							fix ci issue distributed 4 gpu test ( #20204 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-27 22:50:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a29e62ea34 
					 
					
						
						
							
							Fix num_token_padding support for static per-tensor scaled_fp8_quant ( #20188 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-27 22:48:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e53be6f00a 
					 
					
						
						
							
							[Misc] Add type assertion of request_id for LLMEngine.add_request ( #19700 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: n2ptr <xuzhanchaomail@163.com > 
						
						
					 
					
						2025-06-27 22:47:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c329ceca6d 
					 
					
						
						
							
							[CI Fix] Pin tests/models/registry.py MiniMaxText01ForCausalLM to revision due to model changes ( #20199 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-28 13:43:06 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c545c0c3b 
					 
					
						
						
							
							[CI/Build] Allow hermetic builds ( #18064 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Fabien Dupont <fdupont@redhat.com >
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com >
Signed-off-by: Fabien Dupont <fabiendupont@pm.me >
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com >
Co-authored-by: Elias Levy <eliaslevy@google.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-06-27 09:04:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e8c3bd2cd1 
					 
					
						
						
							
							[Bugfix] Fix some narrowing conversion warnings ( #20141 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-06-27 09:01:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6c983053d 
					 
					
						
						
							
							[Bugfix] Mark 'hidden_states' as mutable in moe_forward registration. ( #20152 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-06-27 09:42:22 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aafabaa0d5 
					 
					
						
						
							
							[Fix][torch.compile] Enable custom ops by default when Inductor off ( #20102 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: luka <luka@neuralmagic.com > 
						
						
					 
					
						2025-06-27 09:00:42 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94a55c7681 
					 
					
						
						
							
							[Fix][ROCm] Remove unused variables to fix build error on GFX11/12 ( #19891 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hosang Yoon <hosang.yoon@amd.com > 
						
						
					 
					
						2025-06-27 07:14:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa0dc77ef5 
					 
					
						
						
							
							[Perf] Improved perf for resolve_chat_template_content_format ( #20065 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ilya Lavrenov <ilya.lavrenov@cerebras.net > 
						
						
					 
					
						2025-06-27 09:16:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ab3ac285e 
					 
					
						
						
							
							[Bugfix] Fix flaky failure when getting DP ports ( #20151 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-27 15:30:53 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1c956dc0f 
					 
					
						
						
							
							Gemma3n (Text-only) ( #20134 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com >
Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-06-27 07:16:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dec197e3e5 
					 
					
						
						
							
							Quick Fix by adding conditional import for flash_attn_varlen_func in flash_attn ( #20143 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chendi.Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-06-27 05:48:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e244ae091 
					 
					
						
						
							
							[Perf][Frontend] eliminate api_key and x_request_id headers middleware overhead ( #19946 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yazan-Sharaya <yazan.sharaya.yes@gmail.com > 
						
						
					 
					
						2025-06-27 00:44:14 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd4cfee689 
					 
					
						
						
							
							[Model][1/N] Automatic conversion of CrossEncoding model ( #20012 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-06-26 21:10:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e110930680 
					 
					
						
						
							
							[Fix] Fix gemma CI test failing on main ( #20124 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-06-26 21:06:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8b64c895c0 
					 
					
						
						
							
							[CI] Sync test dependency with test.in for torch nightly ( #19632 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Wang <elainewy@meta.com >
Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Concurrensee <yida.wu@amd.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-26 20:55:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0740e29b66 
					 
					
						
						
							
							[Feature] add quick all reduce ( #19744 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ilmarkov <imarkov@redhat.com >
Signed-off-by: Haoyang Li <Haoyang.Li@amd.com >
Co-authored-by: ilmarkov <imarkov@redhat.com > 
						
						
					 
					
						2025-06-26 20:54:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						44d2e6af63 
					 
					
						
						
							
							[Bugfix] Build moe_data for both sm100 and sm90 ( #20086 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-26 20:50:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d7779f888 
					 
					
						
						
							
							[Perf] SM100 FP8 GEMM Optimizations after cutlass_profiler ( #20071 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ilmarkov <imarkov@redhat.com >
Co-authored-by: ilmarkov <imarkov@redhat.com > 
						
						
					 
					
						2025-06-26 20:50:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a57d57fa72 
					 
					
						
						
							
							[Quantization] Bump to use latest compressed-tensors ( #20033 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dipika <dipikasikka1@gmail.com >
Co-authored-by: Kyle Sayers <kylesayrs@gmail.com > 
						
						
					 
					
						2025-06-26 20:50:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71799fd005 
					 
					
						
						
							
							[CI Failure] Fix OOM with test_oot_registration_embedding ( #20144 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-27 11:21:04 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e9fd658a73 
					 
					
						
						
							
							[Feature] Expert Parallelism Load Balancer (EPLB) ( #18343 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bowen Wang <abmfy@icloud.com > 
						
						
					 
					
						2025-06-26 15:30:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						07b8fae219 
					 
					
						
						
							
							[Doc] correct LoRA capitalization ( #20135 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kyolebu <kyu@redhat.com > 
						
						
					 
					
						2025-06-26 15:22:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						562308816c 
					 
					
						
						
							
							[Refactor] Rename commnication utils ( #20091 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-26 22:19:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04e1642e32 
					 
					
						
						
							
							[TPU] add kv cache update kernel ( #19928 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-06-26 10:01:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b69781f107 
					 
					
						
						
							
							[Hardware][Intel GPU] Add v1 Intel GPU support with Flash attention backend. ( #19560 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-06-26 09:27:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0bceac9810 
					 
					
						
						
							
							Spam folks if config.py changes ( #20131 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-06-26 08:19:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						34878a0b48 
					 
					
						
						
							
							[Doc] Rename page titles ( #20130 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-26 08:18:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6393b03986 
					 
					
						
						
							
							[Doc] Auto sign-off for VSCode ( #20132 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-26 08:18:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0907d507bf 
					 
					
						
						
							
							[Doc] Automatically signed-off by PyCharm ( #20120 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wang.yuqi <noooop@126.com > 
						
						
					 
					
						2025-06-26 14:34:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c894c5dc1f 
					 
					
						
						
							
							[Bug Fix] Fix address/port already in use error for deep_ep test ( #20094 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-26 22:33:13 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f5d178e9c 
					 
					
						
						
							
							Revert "[Bugfix] default set cuda_graph_sizes to max_num_seqs for v1 engine" ( #20128 )  
						
						 
						
						
						
						
					 
					
						2025-06-26 07:32:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27c065df50 
					 
					
						
						
							
							[Bugfix][V1][ROCm] Fix AITER Flash Attention Backend (Fix API Break and Local Attention Logic: affecting Llama4) ( #19904 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-06-26 12:42:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						84c260caeb 
					 
					
						
						
							
							[Docs] Improve frameworks/helm.md ( #20113 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-06-26 10:41:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						167aca45cb 
					 
					
						
						
							
							[Misc] Use collapsible blocks for benchmark examples. ( #20017 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-26 03:35:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0567c8249f 
					 
					
						
						
							
							[CPU] Fix torch version in x86 CPU backend ( #19258 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-06-26 03:34:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d188913d99 
					 
					
						
						
							
							[Refactor] Remove unused library ( #20099 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-26 09:16:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1d7c29f5fe 
					 
					
						
						
							
							[Doc] Update docs for New Model Implementation ( #20115 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-26 00:47:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65397e40f5 
					 
					
						
						
							
							[Bugfix] Allow CUDA_VISIBLE_DEVICES='' in Platform.device_id_to_physical_device_id ( #18979 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-06-26 00:01:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9502c38138 
					 
					
						
						
							
							[Benchmark][Bug] Fix multiple bugs in bench and add args to spec_decode offline ( #20083 )  
						
						 
						
						
						
						
					 
					
						2025-06-25 22:06:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2582683566 
					 
					
						
						
							
							[PD] Skip tp_size exchange with rank0 ( #19413 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-06-25 20:04:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						754b00edb3 
					 
					
						
						
							
							[Bugfix] Fix Mistral tool-parser regex for nested JSON ( #20093 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-26 01:01:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						296ce95d8e 
					 
					
						
						
							
							[CI] Add SM120 to the Dockerfile ( #19794 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-25 16:23:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d7620c3eb 
					 
					
						
						
							
							[TPU] Add TPU specific var VLLM_TPU_MOST_MODEL_LEN ( #19919 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-06-25 15:51:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55c65ab495 
					 
					
						
						
							
							[P/D] Avoid stranding blocks in P when aborted in D's waiting queue ( #19223 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-25 15:19:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2cc2069970 
					 
					
						
						
							
							[TPU][Bugfix] fix kv cache padding ( #20048 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-06-25 21:24:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9f0608fc16 
					 
					
						
						
							
							[Bugfix] default set cuda_graph_sizes to max_num_seqs for v1 engine ( #20062 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: izhuhaoran <izhuhaoran@qq.com > 
						
						
					 
					
						2025-06-25 21:03:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e0db57fff 
					 
					
						
						
							
							Fix the path to the testing script. ( #20082 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-06-25 20:48:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c40692bf9a 
					 
					
						
						
							
							[Misc] Add parallel state node_count function ( #20045 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-25 13:38:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4734704b30 
					 
					
						
						
							
							[PD] let toy proxy handle /chat/completions ( #19730 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun <github@lkchen.net > 
						
						
					 
					
						2025-06-25 15:17:45 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8b8c209e35 
					 
					
						
						
							
							static_scaled_fp8_quant should not run when scale.numel is not 1 ( #20076 )  
						
						 
						
						
						
						
					 
					
						2025-06-25 15:08:03 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23a04e0895 
					 
					
						
						
							
							[Fix] Support cls pooling in ModernBertPooler ( #20067 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shengzhe.li <shengzhe.li@sbintuitions.co.jp > 
						
						
					 
					
						2025-06-25 15:07:45 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						02c97d9a92 
					 
					
						
						
							
							[Quantization] Add compressed-tensors emulations support for NVFP4 ( #19879 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com >
Signed-off-by: Dipika <dipikasikka1@gmail.com > 
						
						
					 
					
						2025-06-25 14:28:19 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e795d723ed 
					 
					
						
						
							
							[Frontend] Add /v1/audio/translations OpenAI API endpoint ( #19615 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <ywang@roblox.com >
Signed-off-by: NickLucche <nlucches@redhat.com >
Co-authored-by: Roger Wang <ywang@roblox.com > 
						
						
					 
					
						2025-06-25 17:54:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8359f4c8d8 
					 
					
						
						
							
							[V1][Speculative Decoding] Fix DeepSeek MTP ( #20022 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cjackal <44624812+cjackal@users.noreply.github.com > 
						
						
					 
					
						2025-06-25 08:41:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf5181583f 
					 
					
						
						
							
							[Doc] Guide for Incremental Compilation Workflow ( #19109 )  
						
						 
						
						
						
						
					 
					
						2025-06-25 22:06:46 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c53fec1fcb 
					 
					
						
						
							
							[doc] add reference link for Intel XPU ( #20064 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-25 12:24:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f9e7354f5 
					 
					
						
						
							
							[BugFix] Fix full-cuda-graph illegal memory access in FA3 ( #20057 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com > 
						
						
					 
					
						2025-06-25 08:39:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba7ba35cda 
					 
					
						
						
							
							[Chore] debloat some initial logs ( #19438 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-06-25 06:36:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						015fab8c2f 
					 
					
						
						
							
							[Kernels][Bugfix] Use torch op for all kernels in FusedMoE forward.  Add additional testing for cudagraphs. ( #19717 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-06-24 23:22:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f59fc60fb3 
					 
					
						
						
							
							[Feat][CLI] enforce-include-usage ( #19695 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max Wittig <max.wittig@siemens.com > 
						
						
					 
					
						2025-06-25 01:43:04 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						879f69bed3 
					 
					
						
						
							
							[Refactor] Remove duplicate ceil_div ( #20023 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-25 05:19:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7108934142 
					 
					
						
						
							
							[Frontend] speed up import time of vllm.config ( #18036 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-06-25 00:41:11 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3443aaf8dd 
					 
					
						
						
							
							Move to a faster base64 implementation ( #19984 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: h-avsha <avshalom.manevich@hcompany.ai > 
						
						
					 
					
						2025-06-24 20:33:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2273ec322c 
					 
					
						
						
							
							Revert "Fix(models/siglip): Add compatibility for Gemma models quantized by llm-compressor" ( #20030 )  
						
						 
						
						
						
						
					 
					
						2025-06-25 11:23:29 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6c4b87fbc 
					 
					
						
						
							
							Revert "[Feature] Integrate new deepgemm ( #19820 )" ( #20049 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-24 19:45:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1afa9948f5 
					 
					
						
						
							
							[Llama4] Update attn_temperature_tuning ( #19997 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca > 
						
						
					 
					
						2025-06-24 22:42:53 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d06b533a0 
					 
					
						
						
							
							cmake: Update vllm_flash_attn for vllm_kernels ( #20032 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eli Uriegas <eliuriegas@meta.com > 
						
						
					 
					
						2025-06-24 22:44:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c01d1c5aba 
					 
					
						
						
							
							use .dev for version comparison with pytorch nightly release ( #20031 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Boyuan Feng <boyuan@meta.com > 
						
						
					 
					
						2025-06-24 21:52:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ead369845d 
					 
					
						
						
							
							[Easy] Remove submodule added in  #19463  ( #20039 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca > 
						
						
					 
					
						2025-06-24 13:23:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6e3bba8e6 
					 
					
						
						
							
							[Feature] Integrate new deepgemm ( #19820 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-24 12:51:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						91f7d9d0b6 
					 
					
						
						
							
							[P/D] Asynchronously do _nixl_handshake ( #19836 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun Chen <github@lkchen.net >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-24 12:46:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8619e7158c 
					 
					
						
						
							
							[BugFix] Fix multi-node offline data parallel ( #19937 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-24 12:45:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c635c5f744 
					 
					
						
						
							
							[Misc][Benchmarking] Add variable request-rate ("ramp-up") to the benchmarking client. ( #19423 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dtransposed <damian@damian-ml-machine.europe-west3-b .c.jetbrains-grazie.internal>
Co-authored-by: dtransposed <damian@damian-ml-machine.europe-west3-b .c.jetbrains-grazie.internal>
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-06-24 18:41:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a045b7e89a 
					 
					
						
						
							
							[Perf] Improve/Fix-regression for FA3 in High QPS regimes ( #19463 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-06-24 13:09:01 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						981eeca41a 
					 
					
						
						
							
							[Fix][V1] Remove --scheduling-policy oracle ( #20010 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: amit <amit.man@gmail.com > 
						
						
					 
					
						2025-06-24 09:52:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						26d34eb67e 
					 
					
						
						
							
							refactor example - qwen3_reranker ( #19847 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-24 14:03:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53da4cd397 
					 
					
						
						
							
							[Bugfix][CPU] Fix InputBatch for pooling models in the CPU v1 ( #20014 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-06-24 13:20:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a3b88328f 
					 
					
						
						
							
							[PERF] Speedup of MRoPE prepare inputs ( #19939 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Vadim Gimpelson <vadim.gimpelson@centml.ai > 
						
						
					 
					
						2025-06-23 23:01:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3014c920da 
					 
					
						
						
							
							add some examples for other benchmark scripts ( #19893 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-24 05:57:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0eed516951 
					 
					
						
						
							
							[doc] Fix broken link in the installation for CPU ( #19980 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kay Yan <kay.yan@daocloud.io > 
						
						
					 
					
						2025-06-24 12:04:11 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee5ad8d2c5 
					 
					
						
						
							
							[Misc][Tools][Benchmark] Add profile to autotune script ( #19711 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-06-24 00:59:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a738dbb2a1 
					 
					
						
						
							
							Update test case parameter to have the throughput above 8.0 ( #19994 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-06-24 00:18:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						33d5e29be9 
					 
					
						
						
							
							[TPU] Fix tpu model runner test ( #19995 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-06-23 16:04:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4671ac6e2a 
					 
					
						
						
							
							[Bugfix][Benchmark] Fix Marlin benchmark ( #19929 )  
						
						 
						
						
						
						
					 
					
						2025-06-24 07:25:12 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd2ccf8dde 
					 
					
						
						
							
							Feat Dynamic Quantization for MoE Layers in GPTQ Marlin Backend ( #19395 )  
						
						 
						
						
						
						
					 
					
						2025-06-24 07:23:28 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3bc76e4b5 
					 
					
						
						
							
							[CI/Build] Push latest tag for cpu and neuron docker image ( #19897 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-23 14:15:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6327c9b3e 
					 
					
						
						
							
							[Feature] Support sequence parallelism for static fp8 quantization ( #19181 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com > 
						
						
					 
					
						2025-06-23 16:09:02 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d0132f025d 
					 
					
						
						
							
							[Misc] Add type alias ReqId and EngineId for better readability ( #19880 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun Chen <github@lkchen.net > 
						
						
					 
					
						2025-06-23 12:57:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61f4fc5dc6 
					 
					
						
						
							
							[Bugfix][v1] Fix step pooler implementation and step pooling usage in v1 ( #19956 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-23 18:38:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68aaeb3749 
					 
					
						
						
							
							[EP+DP] Optimize the little operations in the DeepGEMM + DeepEP low latency case ( #19885 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Signed-off-by: Tyler Michael Smith <tysmith@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-06-23 11:07:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3649e4fee 
					 
					
						
						
							
							[Docs] Fix syntax highlighting of shell commands ( #19870 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-06-23 17:59:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53243e5c42 
					 
					
						
						
							
							[doc] improve readability for long commands ( #19920 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-23 14:27:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6e6604d32 
					 
					
						
						
							
							[Bugfix] Fix CI bitsandbytes failure ( #19969 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-23 21:30:55 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b82e0f82cb 
					 
					
						
						
							
							[doc] use MkDocs collapsible blocks - supplement ( #19973 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-23 10:54:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5111642a6f 
					 
					
						
						
							
							[Doc] Update V1 status for decoder-only embedding models ( #19952 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-23 09:31:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1bcd15edc7 
					 
					
						
						
							
							[BugFix][P/D] Fix for cases where _recving_transfers can be cleaned up when *all* transfer done ( #19874 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun Chen <github@lkchen.net > 
						
						
					 
					
						2025-06-22 22:41:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ebff5b77c 
					 
					
						
						
							
							[P/D][NixlConnector] Support tp_size > num_kv_heads deployments ( #19691 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-22 22:41:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f17aec0d63 
					 
					
						
						
							
							[doc] Fold long code blocks to improve readability ( #19926 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-23 05:24:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						493c275352 
					 
					
						
						
							
							Fix(models/siglip): Add compatibility for Gemma models quantized by llm-compressor ( #19643 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Vensenmu <vensenmu@gmail.com > 
						
						
					 
					
						2025-06-23 03:40:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f39ab2d4bd 
					 
					
						
						
							
							[Misc] Configurable timeout for execute_model RPC calls via env var ( #19544 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jinqinn <goodqinjin@163.com > 
						
						
					 
					
						2025-06-22 20:36:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4a0f7888a3 
					 
					
						
						
							
							[Core] feat: Implement Priority Scheduling in V1 Engine ( #19057 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: amit <amit.man@gmail.com >
Co-authored-by: Roger Wang <Rogerw0108@gmail.com > 
						
						
					 
					
						2025-06-22 20:18:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c4cf260677 
					 
					
						
						
							
							[Perf][CLI] Improve overall startup time ( #19941 )  
						
						 
						
						
						
						
					 
					
						2025-06-22 23:11:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						33d51f599e 
					 
					
						
						
							
							[BugFix] Add an env to disable moe chunking to work around compile incompatibility ( #19642 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-06-22 15:17:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e91386cde1 
					 
					
						
						
							
							[Chore] dedup logs ( #19955 )  
						
						 
						
						
						
						
					 
					
						2025-06-22 19:43:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c11a29f0b 
					 
					
						
						
							
							[Misc] Simplify vllm bench cli subcommand implementation ( #19948 )  
						
						 
						
						
						
						
					 
					
						2025-06-22 12:34:48 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c76a506bd6 
					 
					
						
						
							
							[Misc] Update model-specific PR tagging ( #19949 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-06-22 12:16:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec0db6f51c 
					 
					
						
						
							
							[doc] use snippets for contact us ( #19944 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-22 10:26:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c305a2109d 
					 
					
						
						
							
							[CI/Build] Auto tag perf benchmarks related PRs ( #19943 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-22 08:46:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						202c5df935 
					 
					
						
						
							
							[Benchmark] fix request loss if "ping" is returned ( #19535 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wang, Yi A <yi.a.wang@intel.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-06-22 07:21:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2bb246b8f7 
					 
					
						
						
							
							[MISC] add cpu_kvcache_space_bytes to CacheConfig ( #19812 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-22 13:39:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c409cabc2 
					 
					
						
						
							
							[Misc] add vllm_config in __init__ ( #19866 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-21 23:10:46 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3b1e4c6a23 
					 
					
						
						
							
							[Docs] Add GPT2ForSequenceClassification to supported models in docs ( #19932 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nie3e <adrcwiek@gmail.com > 
						
						
					 
					
						2025-06-21 20:57:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c5302fadd 
					 
					
						
						
							
							[Multimodal] Optimize Qwen2/2.5-VL startup time ( #19756 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu >
Signed-off-by: Roger Wang <hey@rogerw.me >
Co-authored-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-06-21 20:01:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						caa680fd2e 
					 
					
						
						
							
							[doc] add contact us in community ( #19922 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-21 17:29:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3bf9bad11 
					 
					
						
						
							
							[New model support]Support Tarsier2 ( #19887 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-06-21 04:01:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f170f11dd 
					 
					
						
						
							
							[Bugfix] Fix bnb 8bit model weights loading ( #19917 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-21 03:29:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8ca81bb069 
					 
					
						
						
							
							Fix: Check the type of params to be a Sequence not list. ( #19910 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rabin Adhikari <rabin.adk1@gmail.com > 
						
						
					 
					
						2025-06-20 23:03:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e773a9e1c2 
					 
					
						
						
							
							[Misc] Clean up useless code ( #19889 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-06-20 21:09:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71baf85ae1 
					 
					
						
						
							
							[Kernel] mark TorchSDPABackend swap_blocks NotImplementedError ( #19749 )  
						
						 
						
						
						
						
					 
					
						2025-06-20 18:18:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						79f2f1c2a1 
					 
					
						
						
							
							[CPU][CI] Fallback sliding window to v0 and fix CPU pooling model tests ( #19901 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-06-20 15:30:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e3e3c86dc 
					 
					
						
						
							
							Export NaNs in logits to scheduler_stats if output is corrupted ( #18777 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Vlad Mihailescu <vtmihailescu@gmail.com > 
						
						
					 
					
						2025-06-20 22:47:16 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e8977fcd4 
					 
					
						
						
							
							[custom_op][vllm-plugin] update custom_op class to use op_registry ( #19164 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chendi.Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-06-20 07:44:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f1e840e842 
					 
					
						
						
							
							[Model] GPT2ForSequenceClassification model ( #19663 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nie3e <adrcwiek@gmail.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-06-20 12:07:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7771d1de88 
					 
					
						
						
							
							[Fix] import regex instead of re ( #19875 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com > 
						
						
					 
					
						2025-06-20 11:16:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71d1219545 
					 
					
						
						
							
							[Kernel] correct cpu worker function parameter type ( #19745 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-20 10:50:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e384f2f108 
					 
					
						
						
							
							[Misc] refactor example - openai_transcription_client ( #19851 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-20 08:02:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						089a306f19 
					 
					
						
						
							
							[Misc] update cuda version ( #19526 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-20 07:25:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e666f72cd 
					 
					
						
						
							
							[Bugfix][Ray] Set the cuda context eagerly in the ray worker  ( #19583 )  
						
						 
						
						
						
						
					 
					
						2025-06-19 22:01:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3a3e4db46 
					 
					
						
						
							
							[Bugfix] Enable PP with AITER+V1 ( #19822 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiang Li <qiang.li2@amd.com > 
						
						
					 
					
						2025-06-20 12:43:20 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e41bf15cd0 
					 
					
						
						
							
							[Chore]: qwen3-moe-type-hints-mistake ( #19860 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: xinnan.hou <hxn02029096@alibaba-inc.com > 
						
						
					 
					
						2025-06-19 21:43:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5aa4a015ce 
					 
					
						
						
							
							[Benchmark] Fix Value of type "SampleRequest" is not indexable ( #18032 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca > 
						
						
					 
					
						2025-06-19 21:28:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6bad3d186 
					 
					
						
						
							
							[CI][Neuron] Fail and exit on first error ( #19622 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Elaine Zhao <elaineyz@amazon.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-06-20 12:27:51 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee9a1531aa 
					 
					
						
						
							
							[CI/Build][Bugfix] Fix deadlock on v1 engine test CI ( #19872 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-20 09:51:07 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10d82f9ac5 
					 
					
						
						
							
							[Benchmark][Bugfix] Fix Dataset Length Calculation ( #19868 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Robert Shaw <robshaw@redhat.com >
Co-authored-by: Robert Shaw <robshaw@redhat.com > 
						
						
					 
					
						2025-06-19 18:30:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ea10dd9d9e 
					 
					
						
						
							
							[Frontend] early return chat format resolution when specified ( #19735 )  
						
						 
						
						
						
						
					 
					
						2025-06-19 18:49:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ead2110297 
					 
					
						
						
							
							[Core][Bugfix] Fix Online MM Beam Search ( #19688 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-06-19 17:18:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01220ce89a 
					 
					
						
						
							
							[CI][CPU] Improve dummy Triton interfaces and fix the CPU CI ( #19838 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-06-19 15:46:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f68c49220 
					 
					
						
						
							
							[Doc] Update V1 user guide for embedding models ( #19842 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-19 09:43:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4719460644 
					 
					
						
						
							
							Fixing Chunked Prefill Test. ( #19762 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com > 
						
						
					 
					
						2025-06-19 01:36:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						466166dcfd 
					 
					
						
						
							
							[Frontend] Add optional token-level progress bar to LLM.beam_search ( #19301 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ruosen Li <rxl190028@utdallas.edu >
Signed-off-by: Aaron Pham <contact@aarnphm.xyz >
Signed-off-by: Ubuntu <ubuntu@ip-172-31-71-179.ec2.internal >
Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-19 03:21:41 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1d0ae26c85 
					 
					
						
						
							
							Add xLAM tool parser support ( #17148 )  
						
						 
						
						
						
						
					 
					
						2025-06-19 14:26:41 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6021999573 
					 
					
						
						
							
							[Minor] Allow redirecting model path for HfRunner in test ( #19795 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-18 23:04:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7b370c603 
					 
					
						
						
							
							raise exception for pin_lora ( #19809 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-18 22:57:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa20d10a91 
					 
					
						
						
							
							[Misc] [ROCm] Prevent surplus tensor reshape ( #19803 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zsolt Borbely <zsolt.borbely@htecgroup.com > 
						
						
					 
					
						2025-06-19 13:57:16 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2de12be428 
					 
					
						
						
							
							[ROCm] [AITER] [Bugfix] Patch for AITER commit 648764942e552a8bb5fe16026703716a81f05374 ( #18990 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-06-18 22:56:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83ca9ae47b 
					 
					
						
						
							
							Mark invariant normalizer in Gemma as non-persistent ( #19788 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yu-Hang Tang <Tang.Maxin@gmail.com > 
						
						
					 
					
						2025-06-18 22:56:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e2148dc5ea 
					 
					
						
						
							
							[Bugfix] Add check_health to v1 async client. ( #19821 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com > 
						
						
					 
					
						2025-06-18 21:47:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b1098b4072 
					 
					
						
						
							
							[Bugfix] Fix the linter ( #19826 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-18 21:44:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						799397ee4f 
					 
					
						
						
							
							Support embedding models in V1 ( #16188 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Signed-off-by: Max de Bayser <maxdebayser@gmail.com >
Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com >
Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-18 21:36:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4959915089 
					 
					
						
						
							
							[Quantization] Modify the logic of BNB double quantization ( #19742 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-19 03:52:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d1e89d946 
					 
					
						
						
							
							[Misc][ROCm] Enforce no unused variable in ROCm C++ files ( #19796 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-18 20:25:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						36239f79dd 
					 
					
						
						
							
							Fix FA2 fallback for Blackwell V1 ( #19781 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-19 09:53:55 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dfada85eee 
					 
					
						
						
							
							[Frontend] Expose custom args in OpenAI APIs ( #16862 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Feldman <afeldman@neuralmagic.com >
Signed-off-by: Andrew Feldman <afeldman@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-18 17:41:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed33349738 
					 
					
						
						
							
							[BugFix] Fix use_cudagraph=False ( #19612 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com > 
						
						
					 
					
						2025-06-19 08:23:12 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d49adea1f9 
					 
					
						
						
							
							[Multimodal] Use fast processor for Qwen2/2.5-VL ( #19789 )  
						
						 
						
						
						
						
					 
					
						2025-06-18 15:49:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14fdd21d39 
					 
					
						
						
							
							[Core] More fixes to MultiModalEmbeddings type handling ( #19715 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-06-18 22:48:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04fefe7c9a 
					 
					
						
						
							
							[TPU] Update torch-xla version to include paged attention tuned block change ( #19813 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-06-18 22:41:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3b523e38d9 
					 
					
						
						
							
							[Core] Do not copy array during hashing ( #19484 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-06-18 15:36:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						16c16301c8 
					 
					
						
						
							
							Disable "Forbid direct 'import triton'" check for vllm/triton_utils/importing.py in an extensible way ( #19783 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Feldman <afeldman@redhat.com > 
						
						
					 
					
						2025-06-18 15:08:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9206d0ff01 
					 
					
						
						
							
							docs: fix Slack bulletpoint in README ( #19811 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nathan Weinberg <nweinber@redhat.com > 
						
						
					 
					
						2025-06-18 20:47:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a89209b78d 
					 
					
						
						
							
							[v1] Support mamba2 ( #19327 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-06-18 20:34:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ffacb222cb 
					 
					
						
						
							
							[Docs] Add Huzaifa Sidhpurwala to vuln mgmt team doc ( #19808 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-06-18 20:22:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						12575cfa7a 
					 
					
						
						
							
							[Bugfix] fix RAY_CGRAPH_get_timeout is not set successfully ( #19725 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-06-18 10:26:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8b6e1d639c 
					 
					
						
						
							
							[Hardware][AMD] integrate aiter chunked prefill into vllm ( #18596 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: fsx950223 <fsx950223@outlook.com >
Signed-off-by: charlifu <charlifu@amd.com >
Co-authored-by: fsx950223 <fsx950223@outlook.com >
Co-authored-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-06-18 08:46:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						735a9de71f 
					 
					
						
						
							
							[Qwen] Add tagging rule for Qwen related PRs ( #19799 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-18 14:26:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						257ab95439 
					 
					
						
						
							
							[Platform] Allow platform use V1 Engine by default ( #19792 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-06-18 13:03:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cca91a7a10 
					 
					
						
						
							
							[doc] fix the incorrect label ( #19787 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-18 10:30:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f04d604567 
					 
					
						
						
							
							[Minor] Zero-initialize attn output buffer ( #19784 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-18 06:59:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19a53b2783 
					 
					
						
						
							
							[V1] Decouple GPU and TPU InputBatch ( #19778 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Feldman <afeldman@redhat.com > 
						
						
					 
					
						2025-06-18 06:38:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eccdc8318c 
					 
					
						
						
							
							[V1][P/D] An native implementation of xPyD based on P2P NCCL ( #18242 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abatom <abzhonghua@gmail.com > 
						
						
					 
					
						2025-06-18 06:32:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f52a84685 
					 
					
						
						
							
							[V1] Add API docs for EncoderCacheManager ( #19294 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-06-18 13:37:01 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d4629dc43f 
					 
					
						
						
							
							[Misc] Add __str__ for RequestStatus ( #19780 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun Chen <github@lkchen.net > 
						
						
					 
					
						2025-06-18 03:03:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e9cc73f67 
					 
					
						
						
							
							[MISC] correct DeviceConfig device field static type analysis ( #19699 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-17 17:21:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c53711bd63 
					 
					
						
						
							
							[MISC] correct copy_blocks src_to_dists param type ( #19696 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-17 17:21:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dac8cc49f4 
					 
					
						
						
							
							[TPU] Update torch version to include paged attention kernel change ( #19706 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-06-17 22:24:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a44b1c951d 
					 
					
						
						
							
							[Feature][ROCm] Add full graph capture support for TritonAttentionBackend ( #19158 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-06-17 17:03:06 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b447624ee3 
					 
					
						
						
							
							[Bugfix] Fix faulty triton importing logic when using Ray for DP ( #19734 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-17 20:59:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cda92307c1 
					 
					
						
						
							
							[Misc] Update lmcache connector with the latest connector apis ( #19441 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: YaoJiayi <120040070@link.cuhk.edu.cn > 
						
						
					 
					
						2025-06-17 19:57:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bf57ccc5c2 
					 
					
						
						
							
							Remove sm120 arch from sm100 cutlass kernel arch list ( #19716 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-17 11:49:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ffb2cd6b54 
					 
					
						
						
							
							[Perf] Optimize moe_align_block_size CUDA kernel ( #19572 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-17 11:49:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca94d7fa00 
					 
					
						
						
							
							[Bugfix] Update multimodel models mapping to fit new checkpoint after Transformers v4.52 ( #19151 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-17 15:58:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a1c2e15d8 
					 
					
						
						
							
							[Mis] remove duplicate engine status checks ( #19647 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: googs1025 <googs1025@gmail.com > 
						
						
					 
					
						2025-06-17 08:17:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c8f64faa7 
					 
					
						
						
							
							[V1][Kernel] Flashinfer HND KV cache layout ( #19280 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-06-17 09:09:22 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93aee29fdb 
					 
					
						
						
							
							[doc] split "Other AI Accelerators" tabs ( #19708 )  
						
						 
						
						
						
						
					 
					
						2025-06-17 22:05:29 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						154d063b9f 
					 
					
						
						
							
							[doc][mkdocs] Add edit  button to documentation ( #19637 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-17 11:10:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ccd7c05089 
					 
					
						
						
							
							[Kernel] Add Split-KV Support to Unified Triton Attention Kernel ( #19152 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jan van Lunteren <jvl@zurich.ibm.com > 
						
						
					 
					
						2025-06-17 10:45:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c48c6c4008 
					 
					
						
						
							
							Add a doc on how to update PyTorch version ( #19705 )  
						
						 
						
						
						
						
					 
					
						2025-06-17 18:10:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aed8468642 
					 
					
						
						
							
							[Doc] Add missing llava family multi-image examples ( #19698 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-17 07:05:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c76b9cdaf 
					 
					
						
						
							
							[Core] add remove_seq_from_computed_blocks_tracker to BlockSpaceManager ( #19686 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 刘全 <quan.liu2@dbappsecurity.com.cn >
Co-authored-by: 刘全 <quan.liu2@dbappsecurity.com.cn > 
						
						
					 
					
						2025-06-17 04:40:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ddfed314f9 
					 
					
						
						
							
							Fixes IMA for TP w/ flex-attention ( #19712 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: drisspg <drisspguessous@gmail.com > 
						
						
					 
					
						2025-06-17 04:01:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b3ad5ecf2 
					 
					
						
						
							
							[DOC] fix doc typos ( #19600 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Di Liu <liu-di@sjtu.edu.cn > 
						
						
					 
					
						2025-06-17 11:34:53 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ede5c4ebdf 
					 
					
						
						
							
							[Frontend] add chunking audio for > 30s audio ( #19597 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nguyenhoangthuan99 <thuanhppro12@gmail.com > 
						
						
					 
					
						2025-06-17 11:34:00 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						07334959d8 
					 
					
						
						
							
							[Wheel Size] Only build FA2 8.0+PTX ( #19336 )  
						
						 
						
						
						
						
					 
					
						2025-06-17 12:32:49 +09:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						119f683949 
					 
					
						
						
							
							[doc] add project flag to gcloud TPU command ( #19664 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-06-17 01:00:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0860087aff 
					 
					
						
						
							
							[Fix] Fall back to Gloo when NCCL backend is unavailable ( #19641 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: conroy-cheers <conroy@corncheese.org > 
						
						
					 
					
						2025-06-17 08:42:14 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6bc7b57315 
					 
					
						
						
							
							[Quantization] Remove FP4 emulation; Fall-back to marlin for device < 100 ( #19563 )  
						
						 
						
						
						
						
					 
					
						2025-06-16 17:33:51 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90f9c2eb5c 
					 
					
						
						
							
							[V1] Change return type on get_multimodal_embeddings() ( #19446 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-06-16 13:32:15 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						387bdf0ab9 
					 
					
						
						
							
							[Model] Add support for MiniMaxM1ForCausalLM (shares architecture with MiniMaxText01ForCausalLM) ( #19677 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: QscQ <qscqesze@gmail.com > 
						
						
					 
					
						2025-06-16 09:47:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e5baa91aa 
					 
					
						
						
							
							[Kernels] Use empty for modular MoE workspaces ( #19667 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-06-16 14:58:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						836d4ce140 
					 
					
						
						
							
							[Bugfix] fix missing 'finish_reason': null in streaming chat ( #19662 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-06-16 14:10:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3fec47bb7 
					 
					
						
						
							
							[MISC] bump huggingface_hub pkg to 0.33.0 ( #19547 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-16 05:22:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1173804dca 
					 
					
						
						
							
							[Bugfix] Fix TP inference for Flex attention backend ( #19657 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-16 11:21:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d5424029b 
					 
					
						
						
							
							[Feature]:Allow for Granite MoE Hybrid models with _only_ shared experts. ( #19652 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shawn Tan <shawntan@ibm.com > 
						
						
					 
					
						2025-06-16 11:14:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e7506975c 
					 
					
						
						
							
							[DOC] Add reasoning capability to vLLM streamlit code ( #19557 )  
						
						 
						
						
						
						
					 
					
						2025-06-16 07:09:12 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee35e96ac3 
					 
					
						
						
							
							[BugFix] Don't catch BaseException when dumping execute_model errors ( #19626 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-16 11:01:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dec66d253b 
					 
					
						
						
							
							[Kernel] GGUF MMVQ kernel for multiple input vectors ( #18754 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: SzymonOzog <szymon.ozog@gmail.com > 
						
						
					 
					
						2025-06-16 17:33:26 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d120701fd 
					 
					
						
						
							
							[Docs] Move multiproc doc to v1 dir ( #19651 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-06-16 09:10:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f40f763f12 
					 
					
						
						
							
							[CI] Add mteb testing for rerank models ( #19344 )  
						
						 
						
						
						
						
					 
					
						2025-06-16 01:36:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						26bc46ef89 
					 
					
						
						
							
							[MISC] typo fix ( #19672 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-16 07:18:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a77aea59fd 
					 
					
						
						
							
							[TPU] support attention head dim smaller than 128 ( #19620 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-16 06:40:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b692e9cd07 
					 
					
						
						
							
							[Misc] Fix skipped max-model-len validation when deriving max model length from tokenizer config ( #19660 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-06-16 06:30:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						367871a469 
					 
					
						
						
							
							[Misc][Frontend] passthrough bad_words ( #19564 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Francesco Bertolotti <francesco.bertolotti@igenius.ai >
Co-authored-by: Francesco Bertolotti <francesco.bertolotti@igenius.ai >
Co-authored-by: Aaron Pham <Aaronpham0103@gmail.com > 
						
						
					 
					
						2025-06-16 05:05:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						92183b41f3 
					 
					
						
						
							
							[Bugfix][Core] Prefix caching causes incorrect outputs due to outdated ComputedBlocksTracker ( #18957 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 刘全 <quan.liu2@dbappsecurity.com.cn >
Co-authored-by: 刘全 <quan.liu2@dbappsecurity.com.cn > 
						
						
					 
					
						2025-06-15 21:56:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6703d1e0d 
					 
					
						
						
							
							[MISC] Remove unused variableds in C++ ( #19609 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-15 20:05:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a5e7242d5f 
					 
					
						
						
							
							[Misc] Remove duplicate multiproc method setting for CPU platform ( #19649 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-16 02:26:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						91b2c17a55 
					 
					
						
						
							
							[CI/Build] Fix torch nightly CI dependencies part 2 ( #19589 )  
						
						 
						
						
						
						
					 
					
						2025-06-15 20:01:10 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						055915e6ce 
					 
					
						
						
							
							Enable prefix caching with full cuda graphs ( #19617 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-15 01:05:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d330c4c09 
					 
					
						
						
							
							[Benchmark] Refactor benchmark script for fp8 & int8 ( #19627 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-15 15:15:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b73736a0d 
					 
					
						
						
							
							[Kernel] Raise verbose error and consolidate num_heads/num_kv_heads divisibility check ( #19339 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-15 13:43:48 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee1531bc38 
					 
					
						
						
							
							[Bugfix][2/n] Fix speculative decoding CI - Fix test_ngram_e2e_greedy_correctness ( #19644 )  
						
						 
						
						
						
						
					 
					
						2025-06-14 21:15:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e13945f9dd 
					 
					
						
						
							
							[Perf] Further tunings for SM100 FP8 CUTLASS kernel ( #19566 )  
						
						 
						
						
						
						
					 
					
						2025-06-14 17:25:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08500011d3 
					 
					
						
						
							
							[Fix] Convert kv_transfer_config from dict to KVTransferConfig ( #19262 )  
						
						 
						
						
						
						
					 
					
						2025-06-14 12:32:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						861a0a0a39 
					 
					
						
						
							
							[Bugfix] Don't attempt to use triton if no driver is active ( #19561 )  
						
						 
						
						
						
						
					 
					
						2025-06-14 12:30:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bc956b38d0 
					 
					
						
						
							
							Only build CUTLASS MoE kernels on Hopper ( #19648 )  
						
						 
						
						
						
						
					 
					
						2025-06-14 11:44:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						294fc1e2c9 
					 
					
						
						
							
							[Hardware][NVIDIA][kernel] Fp4 MOE quant kernel optimization ( #19500 )  
						
						 
						
						
						
						
					 
					
						2025-06-14 09:34:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2db9044ab6 
					 
					
						
						
							
							[Bugfix] Fix auto dtype casting for BatchFeature ( #19316 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-06-14 15:13:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6fa718a460 
					 
					
						
						
							
							[Misc] Modularize CLI Argument Parsing in Benchmark Scripts ( #19593 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-14 16:54:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						06be858828 
					 
					
						
						
							
							[Bugfix] Fix the speculative decoding test by setting the target dtype ( #19633 )  
						
						 
						
						
						
						
					 
					
						2025-06-13 20:57:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1e34cc9ac 
					 
					
						
						
							
							[V1][Metrics] Deprecate metrics with gpu_ prefix for non GPU specific metrics. ( #18354 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Saheli Bhattacharjee <saheli@krai.ai > 
						
						
					 
					
						2025-06-14 11:07:36 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bd517eb9fe 
					 
					
						
						
							
							[BugFix] Fix DP Coordinator incorrect debug log message ( #19624 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-14 00:18:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d65668b4e8 
					 
					
						
						
							
							Adding "AMD: Multi-step Tests" to amdproduction. ( #19508 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-06-13 17:08:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aafbbd981f 
					 
					
						
						
							
							[torch.compile] Use custom ops when use_inductor=False ( #19618 )  
						
						 
						
						
						
						
					 
					
						2025-06-13 15:05:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f0874515a 
					 
					
						
						
							
							[Doc] Add troubleshooting section to k8s deployment ( #19377 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Anna Pendleton <pendleton@google.com > 
						
						
					 
					
						2025-06-13 21:47:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3597b06a4f 
					 
					
						
						
							
							[CUDA] Enable full cudagraph for FlashMLA ( #18581 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: luka <luka@neuralmagic.com > 
						
						
					 
					
						2025-06-13 18:12:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1015296b79 
					 
					
						
						
							
							[doc][mkdocs] fix the  duplicate Supported features sections in GPU docs ( #19606 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-13 16:25:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce9dc02c93 
					 
					
						
						
							
							[Refactor] Remove unused variables in moe_permute_unpermute_kernel.inl ( #19573 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-13 06:12:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a24cb91600 
					 
					
						
						
							
							[Model] Fix minimax model cache & lm_head precision ( #19592 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qingjun <qingjun@minimaxi.com > 
						
						
					 
					
						2025-06-13 12:08:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e8d97dd3f 
					 
					
						
						
							
							[BugFix] Honor enable_caching in connector-delayed kvcache load case ( #19435 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-13 09:46:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d70bc7c029 
					 
					
						
						
							
							[torch.compile] reorganize the cache directory to support compiling multiple models ( #19064 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-06-13 15:23:25 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce688ad46e 
					 
					
						
						
							
							use base version for version comparison ( #19587 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Boyuan Feng <boyuan@meta.com > 
						
						
					 
					
						2025-06-13 15:09:34 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cefdb9962d 
					 
					
						
						
							
							[Fix] The zip function in Python 3.9 does not have the strict argument ( #19549 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-06-13 14:57:48 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ace5cdaff0 
					 
					
						
						
							
							[Fix] bump mistral common to support magistral ( #19533 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-06-12 22:28:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6458721108 
					 
					
						
						
							
							[CPU] Refine default config for the CPU backend ( #19539 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-06-13 13:27:39 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bb4a0decef 
					 
					
						
						
							
							[Misc] Correct broken docs link ( #19553 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-06-12 22:27:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c707cfc12e 
					 
					
						
						
							
							[doc] fix incorrect link ( #19586 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-13 04:26:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b3c9ff91d 
					 
					
						
						
							
							[Doc] uses absolute links for structured outputs ( #19582 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-06-13 03:35:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c68698b326 
					 
					
						
						
							
							[Bugfix] Fix EAGLE vocab embedding for multimodal target model ( #19570 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qizixi <qizixi@meta.com > 
						
						
					 
					
						2025-06-12 23:09:19 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3b12667d4 
					 
					
						
						
							
							[BugFix] : Fix Batched DeepGemm Experts ( #19515 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-06-12 20:43:02 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6aab5de29 
					 
					
						
						
							
							Revert "[Build/CI] Add tracing deps to vllm container image ( #15224 )" ( #19378 )  
						
						 
						
						
						
						
					 
					
						2025-06-12 17:26:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c57bb199b3 
					 
					
						
						
							
							[V1] Resolve failed concurrent structured output requests ( #19565 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-06-12 23:30:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dba68f9159 
					 
					
						
						
							
							[Doc] Unify structured outputs examples ( #18196 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-06-12 22:50:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3319f4f04 
					 
					
						
						
							
							[Bugfix] Enforce contiguous input for dynamic_per_token FP8/INT8 quant ( #19452 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-12 15:39:15 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d880f594d 
					 
					
						
						
							
							[Misc] Turn MOE_DP_CHUNK_SIZE into an env var ( #19506 )  
						
						 
						
						
						
						
					 
					
						2025-06-12 18:01:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						017ef648e9 
					 
					
						
						
							
							[Spec Decode][Benchmark] Generalize spec decode offline benchmark to more methods and datasets ( #18847 )  
						
						 
						
						
						
						
					 
					
						2025-06-12 10:30:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b25ab14e2 
					 
					
						
						
							
							[doc] Make top navigation sticky ( #19540 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-12 15:48:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f98548b9da 
					 
					
						
						
							
							[torch.compile][ROCm] Fuse quantization onto attention using a torch.compile pass ( #16756 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedič <lgovedic@redhat.com >
Co-authored-by: Sage Moore <sage@neuralmagic.com > 
						
						
					 
					
						2025-06-12 08:31:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						96846bb360 
					 
					
						
						
							
							Fix TorchAOConfig skip layers ( #19265 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mobicham <hicham@mobiuslabs.com > 
						
						
					 
					
						2025-06-12 22:22:53 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6efafd9e4 
					 
					
						
						
							
							[Perf] Vectorize static / dynamic INT8 quant kernels ( #19233 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yewentao256 <zhyanwentao@126.com > 
						
						
					 
					
						2025-06-12 06:51:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1129e2b1ab 
					 
					
						
						
							
							[V1][NixlConnector] Drop num_blocks check  ( #19532 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-06-12 12:36:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c742438f8b 
					 
					
						
						
							
							[Doc] Add V1 column to supported models list ( #19523 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-12 19:16:44 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						73e2e0118f 
					 
					
						
						
							
							[Quantization] Improve AWQ logic ( #19431 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-12 11:02:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9280e6346 
					 
					
						
						
							
							[Bugfix] Respect num-gpu-blocks-override in v1 ( #19503 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jon Swenson <jmswen@gmail.com > 
						
						
					 
					
						2025-06-12 11:00:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						af09b3f0a0 
					 
					
						
						
							
							[Bugfix][V1] Allow manual FlashAttention for Blackwell ( #19492 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-12 10:40:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f6c42fa0a 
					 
					
						
						
							
							[Security] Prevent new imports of (cloud)pickle ( #18018 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Aaron Pham <Aaronpham0103@gmail.com > 
						
						
					 
					
						2025-06-12 10:30:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dff680001d 
					 
					
						
						
							
							Fix typo ( #19525 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 2niuhe <carlton2tang@gmail.com > 
						
						
					 
					
						2025-06-12 09:24:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e090bd5df 
					 
					
						
						
							
							[AMD][Kernel][BugFix] fix test_rocm_compressed_tensors_w8a8 for rocm ( #19509 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-06-12 07:14:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b0b065eb5 
					 
					
						
						
							
							[BugFix] Handle missing sep_token for Qwen3-Reranker in Score API ( #19522 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: strutive07 <strutive07@gmail.com > 
						
						
					 
					
						2025-06-12 07:00:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d5bdf899e4 
					 
					
						
						
							
							[BugFix] Work-around incremental detokenization edge case error ( #19449 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-12 06:43:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e3e74c97c 
					 
					
						
						
							
							[Frontend] Improve error message in tool_choice validation ( #19239 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-12 01:13:00 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3f6341bf7f 
					 
					
						
						
							
							Add Triton Fused MoE kernel config for E=16 on B200 ( #19518 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca > 
						
						
					 
					
						2025-06-12 04:31:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e5d35d62f5 
					 
					
						
						
							
							[BugFix] Force registration of w8a8_block_fp8_matmul_deepgemm via lazy import ( #19514 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-06-12 04:28:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f1c19b245 
					 
					
						
						
							
							[CI] change spell checker from codespell to typos ( #18711 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-06-11 19:57:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						42f52cc95b 
					 
					
						
						
							
							[CI/Build] Fix torch nightly CI dependencies ( #19505 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com > 
						
						
					 
					
						2025-06-11 14:40:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						97a9465bbc 
					 
					
						
						
							
							[UX] Add Feedback During CUDAGraph Capture ( #19501 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com > 
						
						
					 
					
						2025-06-11 21:09:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7ea0b56cd 
					 
					
						
						
							
							[AMD] [Quantization] Add override flag for attention dtype instead of using kv_cache_dtype trigger ( #17331 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-06-11 15:53:28 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						29fa5cac1c 
					 
					
						
						
							
							[Kernels] Add activation chunking logic to FusedMoEModularKernel ( #19168 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-06-11 12:53:10 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2d9be6f7d 
					 
					
						
						
							
							[Docs] Remove WIP features in V1 guide ( #19498 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-11 09:15:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04a55612dd 
					 
					
						
						
							
							[Misc] Fix  misleading ROCm warning ( #19486 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-12 00:12:10 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						89b0f84e17 
					 
					
						
						
							
							[doc] fix "Other AI accelerators" getting started page ( #19457 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-06-11 16:11:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						497a91e9f7 
					 
					
						
						
							
							[CI] Update FlashInfer to 0.2.6.post1 ( #19297 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-11 22:57:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						943ffa5703 
					 
					
						
						
							
							[Bugfix] Update the example code, make it work with the latest lmcache ( #19453 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Runzhen Wang <wangrunzhen@gmail.com > 
						
						
					 
					
						2025-06-11 12:42:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c8d34a42c 
					 
					
						
						
							
							Support no privileged mode on CPU for docker and kubernetes deployments ( #19241 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tsai, Louie <louie.tsai@intel.com > 
						
						
					 
					
						2025-06-11 04:11:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c8694eabe 
					 
					
						
						
							
							Fix some typo ( #19475 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ximing.wxm <ximing.wxm@antgroup.com >
Co-authored-by: ximing.wxm <ximing.wxm@antgroup.com > 
						
						
					 
					
						2025-06-11 10:36:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7484e1fce2 
					 
					
						
						
							
							Add cache to cuda get_device_capability ( #19436 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-11 17:37:05 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a2142f0196 
					 
					
						
						
							
							Support non-string values in JSON keys from CLI ( #19471 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-11 09:34:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						871d6b7c74 
					 
					
						
						
							
							[Misc] Reduce warning message introduced in env_override ( #19476 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-11 17:29:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						29a38f0352 
					 
					
						
						
							
							[Doc] Support "important" and "announcement" admonitions ( #19479 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-11 01:39:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a5115f4ff5 
					 
					
						
						
							
							[Doc] Fix quantization link titles ( #19478 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-11 01:27:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68b4a26149 
					 
					
						
						
							
							[Doc] Update V1 User Guide for Hardware and Models ( #19474 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-11 00:49:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8e809a057 
					 
					
						
						
							
							[Kernel] Support deep_gemm for linear methods ( #19085 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: artetaout <lulala341@gmail.com > 
						
						
					 
					
						2025-06-11 15:14:45 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5039ec2336 
					 
					
						
						
							
							[ROCm] Add rules to automatically label ROCm related PRs ( #19405 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-11 15:09:18 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c644ab6d5 
					 
					
						
						
							
							Fix Typo in Documentation and Function Name ( #19442 )  
						
						 
						
						
						
						
					 
					
						2025-06-10 22:44:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d40665fe8 
					 
					
						
						
							
							Add fused MOE config for Qwen3 30B A3B on B200 ( #19455 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Junhao Li <junhao@ubicloud.com > 
						
						
					 
					
						2025-06-11 13:43:46 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						96ada386b7 
					 
					
						
						
							
							[Misc] Remove unused MultiModalHasher.hash_prompt_mm_data ( #19422 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-06-11 05:18:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1e473b3010 
					 
					
						
						
							
							[CI] Disable failing GGUF model test ( #19454 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-11 05:12:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b1e2111b0 
					 
					
						
						
							
							Fix test_max_model_len in tests/entrypoints/llm/test_generate.py ( #19451 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-11 12:54:59 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a45b979d9f 
					 
					
						
						
							
							[BugFix] Fix docker build cpu-dev image error ( #19394 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: niu_he <carlton2tang@gmail.com > 
						
						
					 
					
						2025-06-10 20:56:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3952731e8f 
					 
					
						
						
							
							[New Model]: Support Qwen3 Embedding & Reranker  ( #19260 )  
						
						 
						
						
						
						
					 
					
						2025-06-10 20:07:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						77f0d465d0 
					 
					
						
						
							
							[BugFix] Allow use_cudagraph to work with dynamic VLLM_USE_V1 ( #19390 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-06-11 07:54:41 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22c3c0aa4a 
					 
					
						
						
							
							Add H20-3e fused MoE kernel tuning configs for Qwen3-235B-A22B-FP8 ( #19401 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 许文卿 <xwq391974@alibaba-inc.com > 
						
						
					 
					
						2025-06-11 07:23:57 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						33f8dba7c6 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for commandr ( #19399 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: py-andy-c <pychen1017@gmail.com > 
						
						
					 
					
						2025-06-10 22:42:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5241ca50d6 
					 
					
						
						
							
							[ROCm][V1] Adding ROCm to the list of plaforms using V1 by default ( #19440 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-06-10 22:06:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da9b523ce1 
					 
					
						
						
							
							[Docs] Note that alternative structured output backends are supported ( #19426 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-06-10 16:20:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6553be1bc 
					 
					
						
						
							
							[Misc] Slight improvement of the BNB  ( #19418 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Isotr0py <2037008807@qq.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-06-10 13:51:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						64a9af5afa 
					 
					
						
						
							
							Simplify ep kernels installation ( #19412 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-06-10 20:06:08 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e4248849ec 
					 
					
						
						
							
							[BugFix][CPU] Fix CPU CI by ignore collecting test_pixtral ( #19411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-06-10 12:02:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						467bef18a3 
					 
					
						
						
							
							[BugFix][FlashInfer] Fix attention backend interface mismatch with unexpected keyword use_irope ( #19134 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yunqiu Guo <guorachel@meta.com > 
						
						
					 
					
						2025-06-10 16:48:51 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f1ac1e1d1 
					 
					
						
						
							
							Revert "[v1] Add fp32 support to v1 engine through flex attn" ( #19404 )  
						
						 
						
						
						
						
					 
					
						2025-06-10 01:30:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9368cc90b2 
					 
					
						
						
							
							Automatically bind CPU OMP Threads of a rank to CPU ids of a NUMA node. ( #17930 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tsai, Louie <louie.tsai@intel.com >
Co-authored-by: Li, Jiang <bigpyj64@gmail.com > 
						
						
					 
					
						2025-06-10 06:22:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32b3946bb4 
					 
					
						
						
							
							Add clear documentation around the impact of debugging flag ( #19369 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Anna Pendleton <pendleton@google.com > 
						
						
					 
					
						2025-06-10 06:16:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b1391ca7e 
					 
					
						
						
							
							[Misc] refactor neuron_multimodal and profiling ( #19397 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-10 06:12:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3f66e75d1 
					 
					
						
						
							
							Add security warning to bug report template ( #19365 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com > 
						
						
					 
					
						2025-06-10 06:06:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						319cb1e351 
					 
					
						
						
							
							[Core] Batch multi modal input using pinned memory ( #19169 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-06-10 13:44:59 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1efef71645 
					 
					
						
						
							
							[Bugfix] Fix modelscope token passed in ( #19389 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangli <wangli858794774@gmail.com >
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-10 13:39:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						646d62f636 
					 
					
						
						
							
							[Core] Use tuple for kv cache group block ids ( #19175 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-10 07:01:17 +02:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6cd4ae8acd 
					 
					
						
						
							
							[Frontend] Add tqdm_leave_pbar to control progress bar visibility ( #19357 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-10 04:55:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c016047ed7 
					 
					
						
						
							
							Fix docs/mkdocs/hooks/remove_announcement.py ( #19382 )  
						
						 
						
						
						
						
					 
					
						2025-06-09 21:36:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9af6d22e4c 
					 
					
						
						
							
							Use xla flag to improve the quantized model performance ( #19303 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com > 
						
						
					 
					
						2025-06-10 01:28:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4589b94032 
					 
					
						
						
							
							[Bugfix] Fix benchmark_moe.py ( #19016 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tianyu Guo <guoty9@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-06-09 18:04:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc867be19c 
					 
					
						
						
							
							[V1] Reuse V0's memory_profiling util for gpu worker memory profiling ( #19312 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-06-10 08:40:01 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a7cd627a8 
					 
					
						
						
							
							[Misc] Fix a config typo in disable_hybrid_kv_cache_manager configuration ( #19383 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com > 
						
						
					 
					
						2025-06-09 16:41:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8058c91108 
					 
					
						
						
							
							[HOT-FIX] Add kv_sharing_target_layer_name argument to cutlass_mla backend ( #19374 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pavani Majety <pmajety@nvidia.com > 
						
						
					 
					
						2025-06-09 19:00:07 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7d44c469fe 
					 
					
						
						
							
							[TPU]Fix KV cache sharing tests ( #19371 )  
						
						 
						
						
						
						
					 
					
						2025-06-09 18:38:15 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						31f58be96a 
					 
					
						
						
							
							[Frontend] Make TIMEOUT_KEEP_ALIVE configurable through env var ( #18472 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: liusiqian <liusiqian@tal.com > 
						
						
					 
					
						2025-06-09 21:41:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebb2f383b8 
					 
					
						
						
							
							[Quantization] Bump compressed-tensors version ( #19295 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kyle Sayers <kylesayrs@gmail.com > 
						
						
					 
					
						2025-06-09 14:33:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c1c7dbbeeb 
					 
					
						
						
							
							[Bugfix][Core] Prevent token lengths exceeding max_model_len in V0 ( #19348 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-09 23:01:29 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5cf2daea9a 
					 
					
						
						
							
							[Misc] Fixes and Optimizations for DeepEP + DeepGEMM combination. ( #19298 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun <vsundarr@redhat.com >
Co-authored-by: Varun <vsundarr@redhat.com > 
						
						
					 
					
						2025-06-09 10:50:39 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8089195b4 
					 
					
						
						
							
							[v1] Add fp32 support to v1 engine through flex attn ( #19319 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-06-09 22:10:44 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						770e5dcdb8 
					 
					
						
						
							
							[full_graph] Fix query_start_loc padding ( #19321 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yinghai Lu <yinghai@thinkingmachines.ai > 
						
						
					 
					
						2025-06-09 21:32:56 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c57c9415b1 
					 
					
						
						
							
							[Docs] Fix a bullet list in usage/security.md ( #19358 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-06-09 13:28:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01810f9236 
					 
					
						
						
							
							[CI] Introduce rules for llama auto-label ( #19323 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-09 20:05:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59abbd84f9 
					 
					
						
						
							
							[Fix] Allow kernel compilation for CUDA capability 8.7 ( #19328 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Conroy Cheers <conroy@corncheese.org > 
						
						
					 
					
						2025-06-09 02:57:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						95a6568b5c 
					 
					
						
						
							
							[CI/Build] Fix LoRA test ( #19350 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-09 09:52:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0eca5eacd0 
					 
					
						
						
							
							[Doc] Fix description in the Automatic Prefix Caching design doc ( #19333 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cr7258 <chengzw258@163.com > 
						
						
					 
					
						2025-06-09 17:30:02 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						12e5829221 
					 
					
						
						
							
							[doc] improve ci doc ( #19307 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-09 07:26:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a4d417707 
					 
					
						
						
							
							[Misc] Cleanup compilation tests ( #19343 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-06-09 15:05:44 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8335667c22 
					 
					
						
						
							
							[Frontend] Remove unreachable code from llm.py ( #19288 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: KsuParkhamchuk <k.parkhamchuk@gmail.com > 
						
						
					 
					
						2025-06-09 10:22:10 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e1c4380d4c 
					 
					
						
						
							
							[Misc] Add documentation update reminder to PR template ( #19289 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-09 10:20:53 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e31ae3de36 
					 
					
						
						
							
							[Deprecation] Remove inputs arg fallback in Engine classes ( #18799 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-09 10:19:56 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ffb9b6e07 
					 
					
						
						
							
							[Bugfix] model_max_length should consider max_model_len in tokenizer_config ( #19201 )  
						
						 
						
						
						
						
					 
					
						2025-06-08 07:17:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cda10fa3e2 
					 
					
						
						
							
							[Multi Modal] Add an env var for message queue max chunk bytes  ( #19242 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yZhen <yZhen@fb.com >
Co-authored-by: yZhen <yZhen@fb.com > 
						
						
					 
					
						2025-06-08 21:39:12 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c123bc33f9 
					 
					
						
						
							
							[Quantization] Add compressed-tensors NVFP4 support ( #18312 )  
						
						 
						
						
						
						
					 
					
						2025-06-08 09:05:55 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9a1791e2c 
					 
					
						
						
							
							[Hardware][POWER] Add IBM POWER11 Support to CPU Extension Detection ( #19082 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Akash Kaothalkar <akash.kaothalkar@ibm.com >
Co-authored-by: Akash Kaothalkar <akash.kaothalkar@ibm.com > 
						
						
					 
					
						2025-06-08 09:17:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						989dcee981 
					 
					
						
						
							
							Add H20-3e fused MoE kernel tuning configs for Qwen3-235B-A22B ( #19315 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xu Wenqing <xuwq1993@qq.com > 
						
						
					 
					
						2025-06-08 16:07:02 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d64d366e0 
					 
					
						
						
							
							[Misc] Change tests/compile to use VLLM_V1 by default ( #19302 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-06-08 16:06:48 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eaa2e51088 
					 
					
						
						
							
							[Bugfix] Re-enable use_cudagraph in vLLM v1 ( #19299 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Zou <zou3519@gmail.com > 
						
						
					 
					
						2025-06-08 08:56:12 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d77f7fb871 
					 
					
						
						
							
							[Bugfix]: Fix TypeError: 'float' object cannot be interpreted as an integer ( #19283 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-06-08 08:16:31 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d8476e465 
					 
					
						
						
							
							[BugFix][V1] Fix memory profiling bug ( #18974 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: luka <luka@neuralmagic.com > 
						
						
					 
					
						2025-06-07 10:34:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88be823d57 
					 
					
						
						
							
							[AMD] Update compatible packaging version ( #19309 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: pramkuma <Pramendra.Kumar@amd.com > 
						
						
					 
					
						2025-06-07 20:55:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e4f63ad45 
					 
					
						
						
							
							[Nit][Benchmark]Fix example in benchmark_serving_structured_output.py ( #19311 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lifan Shen <lifans@meta.com > 
						
						
					 
					
						2025-06-07 18:25:38 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d2f0e7e615 
					 
					
						
						
							
							[CI/Build] Improve Llama GGUF test robustness ( #19287 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-07 17:23:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						122cdca5f6 
					 
					
						
						
							
							[Misc] refactor context extension ( #19246 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-07 05:13:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cf02f9b283 
					 
					
						
						
							
							Add FlexAttention to V1 ( #16078 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: drisspg <drisspguessous@gmail.com > 
						
						
					 
					
						2025-06-06 21:58:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c4296b1a27 
					 
					
						
						
							
							[CI][PowerPC] Use a more appropriate way to select testcase in tests/models/language/pooling/test_embedding.py ( #19253 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaruni Aggarwal <aaruniagg@gmail.com > 
						
						
					 
					
						2025-06-07 11:52:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						66c508b137 
					 
					
						
						
							
							[TPU][Test] Add script to run benchmark on TPU for buildkite ( #19039 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiliang Cui <derrhein@gmail.com > 
						
						
					 
					
						2025-06-06 20:10:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						84166fee97 
					 
					
						
						
							
							[Kernel] Integrate CUTLASS MoE kernel with PPLX ( #18762 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ElizaWszola <ewszola@redhat.com >
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com >
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-06-06 18:26:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e0cd10f72 
					 
					
						
						
							
							[Easy][Test] Simplify test_function_tool_use with multiple parametrizes ( #19269 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-07 09:19:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e010688f50 
					 
					
						
						
							
							[Build][ROCm] Update Dockerfile.rocm ( #19296 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com > 
						
						
					 
					
						2025-06-06 19:35:16 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						441b65d8c7 
					 
					
						
						
							
							[Misc][Tools][Benchmark] Fix and improve auto tune script ( #19163 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-06-06 23:31:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						46ecc57973 
					 
					
						
						
							
							[BugFix] Fix tpu_model_runner block_id concatenation ( #19228 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-06 16:28:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6a3a9f76d 
					 
					
						
						
							
							[Core] Fix abrupt request abort ( #18485 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nicklucche <nlucches@redhat.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-06 16:27:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca27f0f9c1 
					 
					
						
						
							
							[Bugfix][Core] Update cancellation logic in generate() to handle Generator exits ( #19225 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Adolfo Victoria <adovi@meta.com > 
						
						
					 
					
						2025-06-06 20:17:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aad30bd306 
					 
					
						
						
							
							[BugFix] Fix MultiConnector test after HMA changes ( #19291 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-06 20:16:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94ecee6282 
					 
					
						
						
							
							Fixed ppc build when it runs on non-RHEL based linux distros ( #18422 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nishidha Panpaliya <nishidha.panpaliya@partner.ibm.com >
Signed-off-by: Md. Shafi Hussain <Md.Shafi.Hussain@ibm.com >
Signed-off-by: npanpaliya <nishidha.panpaliya@partner.ibm.com >
Co-authored-by: Md. Shafi Hussain <Md.Shafi.Hussain@ibm.com > 
						
						
					 
					
						2025-06-06 11:54:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8267f9916f 
					 
					
						
						
							
							improve logits bias ( #19041 )  
						
						 
						
						
						
						
					 
					
						2025-06-06 19:59:25 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7353492a47 
					 
					
						
						
							
							[Core] Raise when non-multi-instance DP clients target a DP rank ( #19227 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jon Swenson <jmswen@gmail.com > 
						
						
					 
					
						2025-06-06 19:03:01 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7661e92ef8 
					 
					
						
						
							
							[Model] Optimize nemotron_h implementation ( #19249 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-06 10:05:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f168b85725 
					 
					
						
						
							
							Unit Test for run_dp_sharded_vision_model ( #19103 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siqi Yan <siqi@meta.com >
Co-authored-by: Siqi Yan <siqi@meta.com > 
						
						
					 
					
						2025-06-06 16:24:02 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da511d54d8 
					 
					
						
						
							
							Fix CompilationConfig repr ( #19091 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-06-06 16:23:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65c69444b1 
					 
					
						
						
							
							[Docs] Improve V1 KVConnector interface documentation ( #19172 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-06 16:22:45 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94870359cd 
					 
					
						
						
							
							[Quantization] Bump compressed-tensors version; update NVFP4A16 test model ( #19224 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com > 
						
						
					 
					
						2025-06-06 01:21:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d49483ea9 
					 
					
						
						
							
							[TPU] fix kv cache dtype in model runner ( #19244 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-06-06 16:20:16 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90b78ec5f9 
					 
					
						
						
							
							[v1][P/D] Fix a edge case in kv cache schedule ( #19182 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: jinghui <jinghui@fb.com > 
						
						
					 
					
						2025-06-05 23:32:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						91a2ef98ea 
					 
					
						
						
							
							[Chore] update CODEOWNERS ( #19247 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-06-06 06:09:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3da2313d78 
					 
					
						
						
							
							Support allowed_token_ids in ChatCompletionRequest ( #19143 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xu Song <xusong.vip@gmail.com > 
						
						
					 
					
						2025-06-06 05:06:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b61dc5f972 
					 
					
						
						
							
							[TPU] update torch_xla pin ( #19231 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-06-06 04:27:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f8a1a2d108 
					 
					
						
						
							
							[v1] Hybrid Memory Allocator ( #17996 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-06-05 20:47:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3465b87ef8 
					 
					
						
						
							
							[Bugfix] Fix EAGLE vocab embedding construction for Llama 70B ( #19033 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai > 
						
						
					 
					
						2025-06-05 19:10:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8134bea15 
					 
					
						
						
							
							Fix AOPerModuleConfig name changes ( #18869 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jerry Zhang <jerryzh168@gmail.com > 
						
						
					 
					
						2025-06-05 18:51:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb6d572e85 
					 
					
						
						
							
							[Model] NemotronH support ( #18863 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luis Vega <2478335+vegaluisjose@users.noreply.github.com >
Co-authored-by: Luis Vega <2478335+vegaluisjose@users.noreply.github.com > 
						
						
					 
					
						2025-06-05 21:29:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87360308b7 
					 
					
						
						
							
							[V1] Use FlashInfer by default on Blackwell GPUs ( #19118 )  
						
						 
						
						
						
						
					 
					
						2025-06-05 15:40:39 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa49f14832 
					 
					
						
						
							
							[Quantization] Skip Fp4 Test for compressed-tensors ( #19217 )  
						
						 
						
						
						
						
					 
					
						2025-06-05 18:21:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ef9173cfa 
					 
					
						
						
							
							[P/D][NixlConnector] Enable FlashInfer backend ( #19090 )  
						
						 
						
						
						
						
					 
					
						2025-06-05 17:10:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						85e2b7bb13 
					 
					
						
						
							
							[MISC][Bugfix] Use less CPU when message queue has been empty for some time ( #16226 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Povilas Kanapickas <povilas@radix.lt > 
						
						
					 
					
						2025-06-05 16:53:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61059bee40 
					 
					
						
						
							
							[Hardware][NVIDIA] FP4 MoE kernel optimization ( #19110 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chiyue Wei <chiyuew@nvidia.com >
Co-authored-by: Chiyue Wei <chiyuew@nvidia.com > 
						
						
					 
					
						2025-06-05 09:48:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec89524f50 
					 
					
						
						
							
							Add H20-3e fused MoE kernel tuning configs for DeepSeek-R1/V3 ( #19205 )  
						
						 
						
						
						
						
					 
					
						2025-06-05 16:38:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f20f9f063b 
					 
					
						
						
							
							[mistral_common] Add v11 tokenizer ( #19193 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com > 
						
						
					 
					
						2025-06-05 08:27:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9bc8bb07cf 
					 
					
						
						
							
							[Bugfix] properly catch PIL-related errors for vision models when incorrect data urls are provided ( #19202 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-06-05 12:59:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1aeb925f34 
					 
					
						
						
							
							[Frontend] improve vllm run-batch --help display ( #19187 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-05 11:16:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						188a4590d8 
					 
					
						
						
							
							[Misc] Do not override NCCL_CUMEM_ENABLE if set explicitly ( #19105 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-05 11:14:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						18093084be 
					 
					
						
						
							
							[Misc] Remove unnecessary fallback to prefill-decode attention ( #19138 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-06-05 16:08:26 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da40380214 
					 
					
						
						
							
							[Build] Annotate wheel and container path for release workflow ( #19162 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com >
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-06-04 23:24:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8fc57501d3 
					 
					
						
						
							
							[Bugfix]: Fix the incompatibility issue with stream when Thinking is disabled ( #19135 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-06-05 06:24:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						af7fc84fd2 
					 
					
						
						
							
							[BugFix][Minor] Fix full cuda graph bug when max_num_seqs < 512 ( #19171 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-05 13:41:25 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0678b52251 
					 
					
						
						
							
							Handle non-serializable objects when dumping benchmark results ( #19114 )  
						
						 
						
						
						
						
					 
					
						2025-06-04 22:40:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						25b918eee6 
					 
					
						
						
							
							[Torch Nightly]add missing dependency ( #18770 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Wang <elainewy@meta.com > 
						
						
					 
					
						2025-06-04 21:56:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a408820f2f 
					 
					
						
						
							
							[Bugfix] Fix port handling in make_zmq_path ( #19117 )  
						
						 
						
						
						
						
					 
					
						2025-06-04 21:00:59 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c56ed8bb0e 
					 
					
						
						
							
							[Bugfix][Nixl] Fix full prefix cache hit bug ( #18632 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-05 02:07:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78dcf56cb3 
					 
					
						
						
							
							[doc] small fix ( #19167 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-05 09:13:50 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2fac67130 
					 
					
						
						
							
							[P/D] Heterogeneous TP ( #18833 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nicklucche <nlucches@redhat.com > 
						
						
					 
					
						2025-06-04 23:25:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23027e2daf 
					 
					
						
						
							
							[Misc] refactor: simplify EngineCoreClient.make_async_mp_client in AsyncLLM ( #18817 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: googs1025 <googs1025@gmail.com > 
						
						
					 
					
						2025-06-04 15:37:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3fd4d669a 
					 
					
						
						
							
							[Kernel] Integrate batched/masked deepgemm kernel ( #19111 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun <vsundarr@redhat.com >
Co-authored-by: Varun <vsundarr@redhat.com > 
						
						
					 
					
						2025-06-04 21:59:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ef3f98b59f 
					 
					
						
						
							
							[Bugfix] fix v1 cpu worker fails on macOS ( #19121 )  
						
						 
						
						
						
						
					 
					
						2025-06-04 20:17:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ee2590478 
					 
					
						
						
							
							[TPU] Update dynamo dump file name in compilation test ( #19108 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com > 
						
						
					 
					
						2025-06-04 16:13:43 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53a5a0ce30 
					 
					
						
						
							
							[Perf] Tunings for SM100 FP8 CUTLASS kernel ( #18778 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-04 10:46:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d459fae0a2 
					 
					
						
						
							
							[Bugfix][EP+DP] Fix internode check ( #19112 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tysmith@redhat.com > 
						
						
					 
					
						2025-06-04 23:39:23 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8dcc15921 
					 
					
						
						
							
							Allow AsyncLLMEngine.generate to target a specific DP rank ( #19102 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jon Swenson <jmswen@gmail.com > 
						
						
					 
					
						2025-06-04 08:26:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f4ffbd373 
					 
					
						
						
							
							[Doc] Update V1 Guide for embedding models ( #19141 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-04 22:57:55 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f2cd251d2 
					 
					
						
						
							
							Sm100 blockwise fp8 swap ab ( #18564 )  
						
						 
						
						
						
						
					 
					
						2025-06-04 07:48:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						02658c2dfe 
					 
					
						
						
							
							Add DeepSeek-R1-0528 function call chat template ( #18874 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 许文卿 <xwq391974@alibaba-inc.com > 
						
						
					 
					
						2025-06-04 13:24:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01dc9a76db 
					 
					
						
						
							
							[CI/Build][Bugfix] Ensure compatibility with transformers 4.52 ( #18678 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-04 04:49:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35cf32df30 
					 
					
						
						
							
							Improve the output precision of embedding models ( #19092 )  
						
						 
						
						
						
						
					 
					
						2025-06-04 11:48:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8711bc5e68 
					 
					
						
						
							
							[Misc] Add packages for benchmark as extra dependency ( #19089 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-04 04:18:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2669a0d7b5 
					 
					
						
						
							
							Fix ValueError: Missing value for tag key(s): model_name,engine. ( #19113 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-06-04 17:10:45 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e972d9c44 
					 
					
						
						
							
							[TPU] Skip hanging tests ( #19115 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com > 
						
						
					 
					
						2025-06-04 01:43:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3336c8cfbe 
					 
					
						
						
							
							Fix   #19130  ( #19132 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-06-04 01:42:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b124e1085b 
					 
					
						
						
							
							[Bugfix] Fix FA3 full cuda graph correctness ( #19106 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-06-03 23:10:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41aa578428 
					 
					
						
						
							
							[NVIDIA] Add Cutlass MLA backend ( #17625 )  
						
						 
						
						
						
						
					 
					
						2025-06-03 21:40:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d646c2e53 
					 
					
						
						
							
							[Cleanup][v1]:remote guided-decoding-backend for example ( #19059 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <120380290@qq.com > 
						
						
					 
					
						2025-06-04 04:23:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5d6d1adf15 
					 
					
						
						
							
							[KERNEL] Sampler. CUDA kernel for applying repetition penalty ( #18437 )  
						
						 
						
						
						
						
					 
					
						2025-06-03 21:13:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1409ef9134 
					 
					
						
						
							
							[Core] Cast multimodal input in hf processor ( #18862 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-06-03 20:24:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4555143ea7 
					 
					
						
						
							
							[CPU] V1 support for the CPU backend ( #16441 )  
						
						 
						
						
						
						
					 
					
						2025-06-03 18:43:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						52dceb172d 
					 
					
						
						
							
							[Docs] Add developer doc about CI failures ( #18782 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Mark McLoughlin <markmc@redhat.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-06-04 01:09:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						abd7df2fca 
					 
					
						
						
							
							[Misc] Fix path and python alias errors in disagg_prefill exmaples ( #18919 )  
						
						 
						
						
						
						
					 
					
						2025-06-03 17:15:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b712be98c7 
					 
					
						
						
							
							feat: add data parallel rank to KVEventBatch ( #18925 )  
						
						 
						
						
						
						
					 
					
						2025-06-03 17:14:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8da78eac9 
					 
					
						
						
							
							[Bugfix] Max concurrency estimation and check_enough_kv_cache_memory for models with sliding window layers ( #19029 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-06-04 00:14:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5d96533e22 
					 
					
						
						
							
							[Bugfix][P/D] Fix Prefix Cache Bug ( #18411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nicklucche <nlucches@redhat.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com > 
						
						
					 
					
						2025-06-03 23:53:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4de790fcad 
					 
					
						
						
							
							[Bugfix]: Fix the incompatibility issue with tool_choice 'required' when Thinking is enabled ( #19075 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-06-03 23:27:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b5fd9506c1 
					 
					
						
						
							
							[Bugfix] get_num_blocks_to_allocate with null_block ( #19031 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-06-03 15:30:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						135cf55cd1 
					 
					
						
						
							
							[V1][Spec Decode][Ngram] 1.35x gain -> 1.95x gain on InstructCoder with prompt fix ( #18971 )  
						
						 
						
						
						
						
					 
					
						2025-06-03 15:26:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6cac54f4d1 
					 
					
						
						
							
							[v1] Re-init input batch for multiple kv cache groups ( #18654 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-06-03 21:41:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6865fe0074 
					 
					
						
						
							
							Fix interaction between Optional and Annotated in CLI typing ( #19093 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com >
Co-authored-by: Yikun Jiang <yikun@apache.org > 
						
						
					 
					
						2025-06-03 21:07:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e31446b6c8 
					 
					
						
						
							
							[Perf] Tune scaled_fp8_quant by increasing vectorization ( #18844 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-03 13:48:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bdf13965ab 
					 
					
						
						
							
							[V1] Support cross-layer KV sharing ( #18212 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-06-03 20:33:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fa98d77773 
					 
					
						
						
							
							[Kernel] DeepEP dispatch-combine kernel integration ( #18434 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun <vsundarr@redhat.com >
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-06-03 12:30:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01eee40536 
					 
					
						
						
							
							[doc] update docker version ( #19074 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-03 19:08:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19bdaf32b1 
					 
					
						
						
							
							[Doc] Readme standardization ( #18695 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Soren Dreano <soren@numind.ai > 
						
						
					 
					
						2025-06-03 11:50:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						02f0c7b220 
					 
					
						
						
							
							[Misc] Add SPDX-FileCopyrightText  ( #19100 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-06-03 11:20:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d054da1992 
					 
					
						
						
							
							[Misc] fix: add miss best_of param validation ( #18555 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: googs1025 <googs1025@gmail.com > 
						
						
					 
					
						2025-06-03 11:02:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b7817c119 
					 
					
						
						
							
							[Misc] Add missing _Backend enums ( #19081 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nicklucche <nlucches@redhat.com > 
						
						
					 
					
						2025-06-03 16:15:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d00dd65cd4 
					 
					
						
						
							
							[Doc] Improve the Pull Request template with key components ( #19086 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-03 23:44:34 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d81edded69 
					 
					
						
						
							
							[Bugfix] disable processor cache  ( #19068 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: raushan <raushan@huggingface.co > 
						
						
					 
					
						2025-06-03 15:06:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						476844d44c 
					 
					
						
						
							
							Fix underscores in dict keys passed via CLI ( #19030 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-06-03 14:39:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e68ae5e59 
					 
					
						
						
							
							[CI/Build] Remove V0 LoRA test ( #19066 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-03 14:30:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e88723f32 
					 
					
						
						
							
							[doc] clarify windows support ( #19088 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-06-03 21:42:17 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						118ff92111 
					 
					
						
						
							
							[Doc] Update V1 user guide for embedding and enc-dec models ( #19060 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-03 02:29:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec2dcd80bc 
					 
					
						
						
							
							[Misc] Update WeightsMapper for qwen2-vl/qwen2.5-vl ( #19054 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-03 09:08:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						42243fbda0 
					 
					
						
						
							
							[Doc] Add InternVL LoRA support  ( #19055 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-06-03 09:08:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d18ed2a2e 
					 
					
						
						
							
							Update docker docs with ARM CUDA cross-compile ( #19037 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <michael@neuralmagic.com > 
						
						
					 
					
						2025-06-03 08:21:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f32fcd9444 
					 
					
						
						
							
							[v1][KVCacheManager] Rename BlockHashType to BlockHash ( #19015 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-06-03 08:01:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d32aa2e670 
					 
					
						
						
							
							[Bugfix] Use cmake 3.26.1 instead of 3.26 to avoid build failure ( #19019 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-06-03 00:16:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc977286e7 
					 
					
						
						
							
							Reduce logs in CLI scripts and plugin loader ( #18970 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-03 06:00:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						17430e3653 
					 
					
						
						
							
							[bugfix] small fix logic issue ( #18999 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-03 05:35:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1282bd812e 
					 
					
						
						
							
							Add tarsier model support ( #18985 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-06-03 13:13:13 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bdce64f236 
					 
					
						
						
							
							[V1] Support DP with Ray ( #18779 )  
						
						 
						
						
						
						
					 
					
						2025-06-02 21:15:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e6f61e8c3 
					 
					
						
						
							
							[ROCm][Build] Clean up the ROCm build ( #19040 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-06-02 20:47:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8655f47f37 
					 
					
						
						
							
							[CPU][CI] Re-enable the CPU CI tests ( #19046 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-06-02 20:46:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ce42f9204 
					 
					
						
						
							
							Adding "LoRA Test %N" to AMD production tests ( #18929 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu > 
						
						
					 
					
						2025-06-02 20:46:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a57872b2a 
					 
					
						
						
							
							[Bugfix][EP+DP] Use pplx-kernel internode instead of intranode ( #19034 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tysmith@redhat.com >
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-06-03 11:36:51 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5bc1ad6cee 
					 
					
						
						
							
							[Doc] Remove duplicate TOCs during MkDocs migration ( #19021 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-06-02 19:49:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9112b443a0 
					 
					
						
						
							
							[Hardware][TPU] Initial support of model parallelism with single worker using SPMD ( #18011 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com >
Co-authored-by: Hossein Sarshar <hossein.sarshar@gmail.com >
Co-authored-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-06-03 00:06:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c57d577e8d 
					 
					
						
						
							
							add an absolute path for run.sh ( #18258 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <120380290@qq.com > 
						
						
					 
					
						2025-06-02 19:38:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca2f6b9c30 
					 
					
						
						
							
							[Bugfix][Model] Attempt to fix eagle in V0. ( #18978 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-06-02 08:15:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						20133cfee2 
					 
					
						
						
							
							[Frontend] enable custom logging for the uvicorn server (OpenAI API server) ( #18403 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: François Paupier <francois.paupier@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-06-02 15:04:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebb1ec9318 
					 
					
						
						
							
							[Model] enable data parallel for Llama4 vision encoder ( #18368 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yzhen <yzhen@devgpu093.cco2.facebook.com >
Co-authored-by: yZhen <yZhen@fb.com >
Co-authored-by: yzhen <yzhen@devgpu093.cco2.facebook.com > 
						
						
					 
					
						2025-06-02 19:22:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b168b6d7a 
					 
					
						
						
							
							[doc] add pytest tips ( #19010 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-02 11:07:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9760fd8f6a 
					 
					
						
						
							
							[Core] Support inplace model weights loading ( #18745 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-06-02 17:38:50 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9f61e1387 
					 
					
						
						
							
							[Bugfix][Nixl] Fix DP Metadata Handshake ( #19008 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com > 
						
						
					 
					
						2025-06-02 03:30:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6fd3a33b8 
					 
					
						
						
							
							[Misc] reuse num_tokens_across_dp of get_dp_padding to avoid unnecessary dp all reduce in set_forward_context ( #18935 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tysmith@redhat.com >
Co-authored-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com >
Co-authored-by: Tyler Michael Smith <tysmith@redhat.com > 
						
						
					 
					
						2025-06-01 19:41:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						432ec9926e 
					 
					
						
						
							
							[doc] wrong output ( #19000 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-06-01 11:26:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b102d51ad 
					 
					
						
						
							
							[BugFix] Fix incorrect metrics shutdown error log message ( #18992 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-06-01 11:42:23 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa54a7bf7b 
					 
					
						
						
							
							[BugFix] fix data parallel construct ipv6 url addres ( #18991 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-06-01 11:42:10 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ad6194a02 
					 
					
						
						
							
							Let max_num_batched_tokens use human_readable_int for large numbers ( #18968 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-06-01 11:41:29 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c594cbf565 
					 
					
						
						
							
							[doc] small fix -  mkdocs ( #18996 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-31 20:23:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a35ca765a5 
					 
					
						
						
							
							[LoRA] Support dynamically initialize packed_modules_mapping for VLM with arbitrary components ( #18987 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-06-01 11:06:57 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6aa8f9a4e7 
					 
					
						
						
							
							[Core] Rework dtype resolution ( #18751 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-06-01 11:04:23 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1bc86a3da1 
					 
					
						
						
							
							[Bugfix] Fix EAGLE3 broken logits ( #18909 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai > 
						
						
					 
					
						2025-05-31 19:58:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bbfa0c61d1 
					 
					
						
						
							
							[Misc][Benchmark] Add support for CustomDataset ( #18511 )  
						
						 
						
						
						
						
					 
					
						2025-05-31 19:07:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						20079c6e36 
					 
					
						
						
							
							[Misc] add return token strs for tokenize ( #18941 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-31 18:00:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a1b9b99d7 
					 
					
						
						
							
							[BugFix] Fix multi-node offline data-parallel ( #18981 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Yizhou Liu <liu_yizhou@outlook.com > 
						
						
					 
					
						2025-05-31 08:34:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8bf507d766 
					 
					
						
						
							
							[P/D] NixlConnector use cache device index for memory registration ( #18969 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Piotr Tarasiewicz <ptarasiewicz@nvidia.com > 
						
						
					 
					
						2025-05-31 11:19:18 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						306d60401d 
					 
					
						
						
							
							[ROCm][Kernel] Add gfx950 support for skinny gemms ( #18010 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-05-31 07:40:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f2c3f66d59 
					 
					
						
						
							
							[Bugfix] Fix for issue 17396 ( #18773 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Fred Reiss <frreiss@us.ibm.com > 
						
						
					 
					
						2025-05-31 11:58:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f5e0d567e 
					 
					
						
						
							
							[FEAT][ROCm] Add AITER grouped topk for DeepSeekV2 ( #18825 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-05-31 03:39:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c55d804672 
					 
					
						
						
							
							[BugFix] Pydantic part 2 ( #18911 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: luka <luka@neuralmagic.com > 
						
						
					 
					
						2025-05-31 03:39:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						749f5bdd38 
					 
					
						
						
							
							[doc] fix the list rendering issue - security.md ( #18982 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-31 10:39:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2a50ef5760 
					 
					
						
						
							
							[Neuron] Add Multi-Modal model support for Neuron ( #18921 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Satyajith Chilappagari <satchill@amazon.com >
Co-authored-by: Ashraf Mahgoub <ashymahg@amazon.com >
Co-authored-by: Rohith Nallamaddi <nalrohit@amazon.com >
Co-authored-by: FeliciaLuo <luof@amazon.com >
Co-authored-by: Elaine Zhao <elaineyz@amazon.com > 
						
						
					 
					
						2025-05-31 10:39:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8b904795d 
					 
					
						
						
							
							fix security issue of logging llm output ( #18980 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com >
Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com > 
						
						
					 
					
						2025-05-31 10:38:56 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba5111f237 
					 
					
						
						
							
							[Bugfix]: Fix the incompatibility issue with Structured Outputs when Thinking is disabled ( #18879 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-05-31 09:20:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1e123529d7 
					 
					
						
						
							
							[Misc] Fix estimated max model len msg ( #18966 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-05-31 16:43:44 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dff80b0e42 
					 
					
						
						
							
							[Frontend] Add rerank support to run_batch endpoint ( #16278 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pooya Davoodi <pooya.davoodi@parasail.io > 
						
						
					 
					
						2025-05-31 07:40:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7782464a17 
					 
					
						
						
							
							create util function for batched arange ( #18937 )  
						
						 
						
						
						
						
					 
					
						2025-05-31 13:50:38 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f71e24034 
					 
					
						
						
							
							[Docs] Correct multiprocessing design doc ( #18964 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-05-31 01:30:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1dab4d5718 
					 
					
						
						
							
							Tool parser regex timeout handling ( #18960 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Will Eaton <weaton@redhat.com > 
						
						
					 
					
						2025-05-30 21:02:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f21e8052b 
					 
					
						
						
							
							[Misc] add group_size is -1 in awq quantization ( #18910 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-05-30 17:34:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a8641638a 
					 
					
						
						
							
							[VLM] Add PP support and fix GPTQ inference for Ovis models ( #18958 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: isotr0py <2037008807@qq.com >
Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-30 17:11:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f49239cb45 
					 
					
						
						
							
							Benchmark script for fp8 vs bf16 gemm ( #17126 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-30 10:56:11 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2dbe8c0774 
					 
					
						
						
							
							[Perf] API-server scaleout with many-to-many server-engine comms  ( #17546 )  
						
						 
						
						
						
						
					 
					
						2025-05-30 08:17:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						84ec470fca 
					 
					
						
						
							
							Improve "failed to get the hash of the compiled graph" error ( #18956 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-05-30 15:00:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b29ca5c4d5 
					 
					
						
						
							
							[Docs] Update SECURITY.md with link to our security guide ( #18961 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-30 07:37:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec6833c5e9 
					 
					
						
						
							
							[doc] show the count for fork and watch ( #18950 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-30 06:45:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e1fadf1197 
					 
					
						
						
							
							[Feature] minicpm eagle support ( #18943 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: huangyuxiang03 <huangyx0321@gmail.com >
Co-authored-by: huangyuxiang03 <huangyx0321@gmail.com > 
						
						
					 
					
						2025-05-30 06:45:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						43ff405b90 
					 
					
						
						
							
							[CI/Build] remove regex from build dependencies ( #18945 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniele Trifirò <dtrifiro@redhat.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-05-30 04:02:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fba02e3bd1 
					 
					
						
						
							
							[Bugfix][TPU] Fix tpu model runner testcase failure ( #18810 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Carol Zheng <cazheng@google.com > 
						
						
					 
					
						2025-05-30 18:04:03 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4577fc9abb 
					 
					
						
						
							
							[Misc]Fix typo ( #18947 )  
						
						 
						
						
						
						
					 
					
						2025-05-30 02:21:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5f1d0c8118 
					 
					
						
						
							
							[Bugfix][Failing Test] Fix test_vllm_port.py ( #18618 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rabi <ramishra@redhat.com > 
						
						
					 
					
						2025-05-30 17:13:47 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3bb9f2331 
					 
					
						
						
							
							[Model] Use in-place adds in SigLIP ( #18922 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-05-30 17:12:59 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f8900cee9 
					 
					
						
						
							
							[doc] add mkdocs doc ( #18930 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-30 07:58:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6acb7a6285 
					 
					
						
						
							
							[Misc]Fix benchmarks/README.md for speculative decoding ( #18897 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rabi <ramishra@redhat.com > 
						
						
					 
					
						2025-05-30 07:58:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f4a6b844a 
					 
					
						
						
							
							[Deprecation] Remove mean pooling default for Qwen2EmbeddingModel ( #18913 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-30 06:53:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d0a1541be 
					 
					
						
						
							
							[Bugfix] Remove NVFP4 scales assertions to fix load_format=dummy ( #18861 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-30 13:37:36 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						77b6e74fe2 
					 
					
						
						
							
							[ROCm] Remove unnecessary assertion of max_model_len in ROCM_AITER_MLA attention backend. ( #18938 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-05-29 22:33:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5acf828d99 
					 
					
						
						
							
							[docs] fix: fix markdown syntax ( #18927 )  
						
						 
						
						
						
						
					 
					
						2025-05-30 05:20:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3987e2ae96 
					 
					
						
						
							
							[Model] Use AutoWeightsLoader for mamba2 ( #18918 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: iLeGend <824040212@qq.com > 
						
						
					 
					
						2025-05-30 04:50:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						77164dad5e 
					 
					
						
						
							
							[Bugfix] Consistent ascii handling in tool parsers ( #18883 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-05-30 04:44:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3de3eadf5b 
					 
					
						
						
							
							improve the robustness of parsing vlms config in AutoRound ( #18894 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wenhuach21 <wenhua.cheng@intel.com > 
						
						
					 
					
						2025-05-29 19:24:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3132290a14 
					 
					
						
						
							
							[TPU][CI/CD] Clean up docker for TPU tests. ( #18926 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Carol Zheng <cazheng@google.com > 
						
						
					 
					
						2025-05-30 10:24:19 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1aa2f81b43 
					 
					
						
						
							
							[Misc] Update type annotation for rotary embedding base ( #18914 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-30 10:17:01 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d54af615d5 
					 
					
						
						
							
							[Bugfix] Fix PP default fallback behavior for V1 ( #18915 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-30 10:13:17 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a1cc9f33a3 
					 
					
						
						
							
							[TPU] remove transpose ops in moe kernel ( #18923 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-05-29 23:00:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a521ef06e5 
					 
					
						
						
							
							Use standalone_compile by default in torch >= 2.8.0 ( #18846 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-05-30 06:41:58 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						64eaf5fe05 
					 
					
						
						
							
							[P/D] NixlConnector DP fixes ( #18903 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Will Eaton <weaton@redhat.com > 
						
						
					 
					
						2025-05-29 18:08:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1d61f3351 
					 
					
						
						
							
							[BugFix] Make DP work with connector-delayed new requests ( #18559 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Will Eaton <weaton@redhat.com > 
						
						
					 
					
						2025-05-29 18:04:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32ce3cf7c9 
					 
					
						
						
							
							[V1] Allocate kv_cache with stride order for V1 ( #18775 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nicklucche <nlucches@redhat.com > 
						
						
					 
					
						2025-05-29 17:54:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d58f9c7f7a 
					 
					
						
						
							
							[Misc] Remove duplicate init for self.vllm_config ( #18896 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: googs1025 <googs1025@gmail.com > 
						
						
					 
					
						2025-05-29 17:26:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c29034037d 
					 
					
						
						
							
							[Deprecation] Disallow pos-args other than model when initializing LLM ( #18802 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-29 09:36:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b7cfd5a36 
					 
					
						
						
							
							[ROCm][V0][Attention] Revert to the previous FA triton kernel ( #18226 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-29 12:13:18 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da4b69d0b4 
					 
					
						
						
							
							[Attention][V1] Toggle for v1 attention backend ( #18275 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-29 10:48:24 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9479b2920 
					 
					
						
						
							
							[Bugfix] Fix the failing gte embedding test ( #18720 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-29 07:39:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6f2909405e 
					 
					
						
						
							
							[Doc]  Fix codeblocks formatting in LoRA adapters documentation ( #18907 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-05-29 07:38:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b169d5f7b6 
					 
					
						
						
							
							[Misc][Tools][Benchmark] Add benchmark_serving supports for llama.cpp.  ( #18692 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Duyi-Wang <duyi.wang@intel.com > 
						
						
					 
					
						2025-05-29 20:02:08 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f8977c233f 
					 
					
						
						
							
							Fix an error in dummy weight loading for quantization models ( #18855 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-05-29 03:07:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f274581f44 
					 
					
						
						
							
							[BugFix] Update pydantic to fix error on python 3.10 ( #18852 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: luka <luka@neuralmagic.com > 
						
						
					 
					
						2025-05-29 03:05:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b1447f890 
					 
					
						
						
							
							[Bugfix] Ensure tensors are contiguous during serialisation ( #18860 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-05-29 03:05:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24d0ef8970 
					 
					
						
						
							
							[Misc] Replace TODO in serving transcription ( #18895 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-05-29 02:58:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7fcfd954ff 
					 
					
						
						
							
							[Bugfix] Fix misleading information in the documentation ( #18845 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-29 02:54:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e740d07f07 
					 
					
						
						
							
							[doc] add CLI doc ( #18871 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-29 09:51:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a652e71dd0 
					 
					
						
						
							
							[Doc] Remove redundant spaces from compatibility_matrix.md ( #18891 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-05-29 02:51:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						34d6c447c4 
					 
					
						
						
							
							[LoRA] Add LoRA support for InternVL  ( #18842 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-29 08:46:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						972eddf7c9 
					 
					
						
						
							
							[Neuron] Add multi-LoRA support for Neuron. ( #18284 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Satyajith Chilappagari <satchill@amazon.com > 
						
						
					 
					
						2025-05-29 16:41:22 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fd7bb88d72 
					 
					
						
						
							
							Fixes a dead link in nightly benchmark readme ( #18856 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Brent Salisbury <bsalisbu@redhat.com > 
						
						
					 
					
						2025-05-29 04:41:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c49dbdd03 
					 
					
						
						
							
							Skip device and quant Pydantic validation to make plugin device work ( #18843 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yikun Jiang <yikunkero@gmail.com > 
						
						
					 
					
						2025-05-28 20:12:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1661a9c28f 
					 
					
						
						
							
							[Doc][Neuron] Update documentation for Neuron ( #18868 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Elaine Zhao <elaineyz@amazon.com > 
						
						
					 
					
						2025-05-28 19:44:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e882ffdc0 
					 
					
						
						
							
							[Bugfix][TPU] fix moe custom kernel import ( #18853 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-05-28 19:34:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						26b4fa45be 
					 
					
						
						
							
							Add ability to use CUDAGraphs with use_inductor=False ( #17345 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-05-29 10:16:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						515b413ebf 
					 
					
						
						
							
							Prevent the cross-encoder logic from being applied to classification tasks ( #18838 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-05-28 19:16:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						269d901734 
					 
					
						
						
							
							[Bugfix][ROCm] fix the power of 2 exception from triton_unified_attention.py when running llama4 models and unit test fix ( #18100 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hongxia Yang <hongxia.yang@amd.com >
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-05-29 07:21:46 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7951d78738 
					 
					
						
						
							
							[Core] Enable CUDA graphs for DP + All2All kernels  ( #18724 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com >
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com > 
						
						
					 
					
						2025-05-28 22:55:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6dbe5b5c93 
					 
					
						
						
							
							Remove checks for None for fields which should never be None ( #17985 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-28 21:32:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						643622ba46 
					 
					
						
						
							
							[Hardware][TPU][V1] Multi-LoRA Optimisations for the V1 TPU backend ( #15655 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Akshat Tripathi <akshat@krai.ai >
Signed-off-by: Chengji Yao <chengjiyao@google.com >
Signed-off-by: xihajun <junfan@krai.ai >
Signed-off-by: Jorge de Freitas <jorge.de-freitas22@imperial.ac.uk >
Signed-off-by: Jorge de Freitas <jorge@krai.ai >
Co-authored-by: Chengji Yao <chengjiyao@google.com >
Co-authored-by: xihajun <junfan@krai.ai >
Co-authored-by: Jorge de Freitas <jorge.de-freitas22@imperial.ac.uk >
Co-authored-by: Jorge de Freitas <jorge@krai.ai > 
						
						
					 
					
						2025-05-28 19:59:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a09c7ca9f2 
					 
					
						
						
							
							[Chore][Spec Decode] Update check NoneType instead of assigning variables ( #18836 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-05-28 18:57:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e98964e94 
					 
					
						
						
							
							[V1][Metrics] Remove metrics that were deprecated in 0.8 ( #18837 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-05-28 18:54:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c68b5c63eb 
					 
					
						
						
							
							[Misc] fix olmoe model layer can't laod in tp gt 1 ( #18828 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-05-28 17:36:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fced756923 
					 
					
						
						
							
							[Chore] update ty configuration ( #18839 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-05-28 08:59:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						321331b8ae 
					 
					
						
						
							
							[Core] Add Lora Support to Beam Search ( #18346 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-05-28 08:58:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e4cea1cc5 
					 
					
						
						
							
							decrement server_load on listen for disconnect ( #18784 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Daniel Salib <danielsalib@meta.com > 
						
						
					 
					
						2025-05-28 22:15:12 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						435fa95444 
					 
					
						
						
							
							[Frontend] add run batch to CLI ( #18804 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-28 07:08:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c2b38ce9e 
					 
					
						
						
							
							Enable Pydantic mypy checks and convert configs to Pydantic dataclasses ( #17599 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-28 12:46:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d781930f90 
					 
					
						
						
							
							[Platform][Dist] Make torch distributed process group extendable ( #18763 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mengqing Cao <cmq0113@163.com > 
						
						
					 
					
						2025-05-28 10:52:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce75efeecb 
					 
					
						
						
							
							[BugFix] FA2 MLA Accuracy Issue ( #18807 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: LucasWilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-28 08:59:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa42561e40 
					 
					
						
						
							
							Fix PiecewiseCompileInterpreter ( #17338 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-05-28 08:40:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de65fc8e1e 
					 
					
						
						
							
							[CI] improve embed testing ( #18747 )  
						
						 
						
						
						
						
					 
					
						2025-05-28 00:16:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c492b7824 
					 
					
						
						
							
							[Deprecation] Remove fallbacks for Embeddings API ( #18795 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-28 15:09:04 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f0926b43f 
					 
					
						
						
							
							[Deprecation] Remove unused sync methods in async_timeout ( #18792 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-28 15:08:48 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f2c1a87e9 
					 
					
						
						
							
							[Deprecation] Require overriding get_dummy_text and get_dummy_mm_data ( #18796 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-28 15:08:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b78f844a67 
					 
					
						
						
							
							[Bugfix][FailingTest]Fix test_model_load_with_params.py ( #18758 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rabi <ramishra@redhat.com > 
						
						
					 
					
						2025-05-28 05:42:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e13c07d00 
					 
					
						
						
							
							[V1] [Bugfix] eagle bugfix and enable correct lm_head for multimodal (2) ( #18781 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ronald Xu <ronaldxu@amazon.com > 
						
						
					 
					
						2025-05-28 05:09:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						774c5fde30 
					 
					
						
						
							
							[V1] fix torch profiling for V1 offline scenarios ( #18445 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Divakar Verma <divakar.verma@amd.com > 
						
						
					 
					
						2025-05-28 04:16:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a21e331ff 
					 
					
						
						
							
							[Bugfix]: correctly propagate errors message caught at the chat_templating step to the client ( #18769 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-05-28 03:35:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e9ce609bd 
					 
					
						
						
							
							[Bugfix] Fix nomic max_model_len ( #18755 )  
						
						 
						
						
						
						
					 
					
						2025-05-27 20:29:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						794ae1f551 
					 
					
						
						
							
							[rocm] Fix wrong attention log ( #18764 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Felix Marty <felmarty@amd.com > 
						
						
					 
					
						2025-05-27 19:45:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d73a9457a5 
					 
					
						
						
							
							[Core] Improve Tensor serialisation ( #18774 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-05-28 09:46:21 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3896c7f02 
					 
					
						
						
							
							[Build] Fixes for CMake install ( #18570 )  
						
						 
						
						
						
						
					 
					
						2025-05-27 20:49:24 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51e98e4ffd 
					 
					
						
						
							
							[Bugfix] Disable prefix caching by default for benchmark ( #18771 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com > 
						
						
					 
					
						2025-05-28 08:18:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e56f44d9ec 
					 
					
						
						
							
							Support datasets in vllm bench serve and sync with benchmark_[serving,datasets].py ( #18566 )  
						
						 
						
						
						
						
					 
					
						2025-05-27 19:59:48 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0cbad4e30 
					 
					
						
						
							
							[Neuron] Support quantization on neuron ( #18283 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Satyajith Chilappagari <satchill@amazon.com > 
						
						
					 
					
						2025-05-27 22:10:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b48d5cca16 
					 
					
						
						
							
							[CI/Build] [TPU] Fix TPU CI exit code ( #18282 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Carol Zheng <cazheng@google.com > 
						
						
					 
					
						2025-05-27 14:54:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5873877241 
					 
					
						
						
							
							[Bugfix] Mistral tool calling when content is list ( #18729 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-27 09:05:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						696259ca01 
					 
					
						
						
							
							[Core] Automatically cast multi-modal input dtype ( #18756 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-27 23:45:48 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b6d496114 
					 
					
						
						
							
							optimize get_kv_cache_torch_dtype ( #18531 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: idellzheng <idellzheng@tencent.com > 
						
						
					 
					
						2025-05-27 13:08:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aaa4ac1c95 
					 
					
						
						
							
							Disable prefix cache by default for benchmark ( #18639 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com > 
						
						
					 
					
						2025-05-27 20:06:34 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						06a0338015 
					 
					
						
						
							
							[V1][Metrics] Add API for accessing in-memory Prometheus metrics ( #17010 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-05-27 09:37:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4318c0559d 
					 
					
						
						
							
							[CI/Build] Remove imports of built-in re ( #18750 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-27 09:19:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a68e293cb9 
					 
					
						
						
							
							[Doc]  Convert Sphinx directives ( {class}, {meth}, {attr}, ...) to MkDocs format for better documentation linking ( #18663 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-05-27 01:44:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6881107948 
					 
					
						
						
							
							[BUG FIX] minicpm ( #18739 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: huangyuxiang03 <huangyx0321@gmail.com >
Co-authored-by: huangyuxiang03 <huangyx0321@gmail.com > 
						
						
					 
					
						2025-05-27 01:04:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e0f0ff87b8 
					 
					
						
						
							
							[Build] fix cpu build missing libtbbmalloc.so ( #18744 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-05-27 01:03:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c24b1572ac 
					 
					
						
						
							
							Minor fix about MooncakeStoreConnector ( #18721 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: baoloongmao <baoloongmao@tencent.com > 
						
						
					 
					
						2025-05-27 08:02:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4693a3438c 
					 
					
						
						
							
							[Doc] cleanup deprecated flag for doc ( #18715 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <120380290@qq.com > 
						
						
					 
					
						2025-05-27 07:12:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bbd9a84dc5 
					 
					
						
						
							
							[Hardware][Intel-Gaudi] [CI/Build] Fix multiple containers using the same name in run-hpu-test.sh ( #18752 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukasz Durejko <ldurejko@habana.ai > 
						
						
					 
					
						2025-05-27 00:10:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a547aeb828 
					 
					
						
						
							
							feat(rocm-support): support mamba2 on rocm ( #18565 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Islam Almersawi <islam.almersawi@openinnovation.ai >
Co-authored-by: Islam Almersawi <islam.almersawi@openinnovation.ai > 
						
						
					 
					
						2025-05-27 00:07:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc6d0c290f 
					 
					
						
						
							
							[Misc] improve docs ( #18734 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-27 07:07:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						753944fa9b 
					 
					
						
						
							
							[Doc] Update reproducibility doc and example ( #18741 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-27 07:03:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						25a817f202 
					 
					
						
						
							
							[Doc] Update OOT model docs ( #18742 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-27 06:30:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d260f799a9 
					 
					
						
						
							
							[FEAT] [ROCm] Upgrade AITER Fused MoE kernels. ( #18271 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-05-26 23:14:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b50602d5f0 
					 
					
						
						
							
							[Model][Gemma3] Cast image pixel values already on CPU ( #18732 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-05-27 05:42:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f1b1bc03b 
					 
					
						
						
							
							[V1][Quantization] Add CUDA graph compatible v1 GGUF support ( #18646 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn >
Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-27 04:40:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f88dbd2bb 
					 
					
						
						
							
							[Misc] improve web section group title display ( #18684 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-27 04:35:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0eebd74842 
					 
					
						
						
							
							[Model][Gemma3] Simplify image input validation ( #18710 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-05-27 11:13:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27bebcd897 
					 
					
						
						
							
							Convert examples to ruff-format ( #18400 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-26 16:57:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e7523c2e03 
					 
					
						
						
							
							[V1][Sampler] Improve performance of FlashInfer sampling by sampling logits instead of probs ( #18608 )  
						
						 
						
						
						
						
					 
					
						2025-05-26 11:49:36 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a869baca73 
					 
					
						
						
							
							[Bugfix] Fix Llama GGUF initialization ( #18717 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-26 07:49:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82e2339b06 
					 
					
						
						
							
							[Doc] Move examples and further reorganize user guide ( #18666 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-26 07:38:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9553fdb41e 
					 
					
						
						
							
							[Doc] Improve API docs ( #18713 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-26 07:33:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						243eb9199f 
					 
					
						
						
							
							[Bugfix]: handle hf-xet CAS error when loading Qwen3 weights in vLLM ( #18701 )  
						
						 
						
						
						
						
					 
					
						2025-05-26 07:10:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0665e29998 
					 
					
						
						
							
							[Misc] add AutoGen integration ( #18712 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-05-26 13:56:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e76be06550 
					 
					
						
						
							
							[Hardware][Intel-Gaudi] [CI/Build] Add tensor parallel size = 2 test to HPU CI ( #18709 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukasz Durejko <ldurejko@habana.ai > 
						
						
					 
					
						2025-05-26 05:26:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0877750029 
					 
					
						
						
							
							[CI/Build] Split pooling and generation extended language models tests in CI ( #18705 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-26 04:00:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d68030f1c 
					 
					
						
						
							
							[Model] Add support for YARN in NemotronNAS models ( #18427 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nave Assaf <nassaf@nvidia.com > 
						
						
					 
					
						2025-05-26 10:31:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a2c76cbe1 
					 
					
						
						
							
							[CI] fix dump_input for str type ( #18697 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-26 18:23:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						38b13dfe78 
					 
					
						
						
							
							[CI/Build] Replace math.isclose with pytest.approx ( #18703 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-26 02:05:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61a45e7a72 
					 
					
						
						
							
							[Bugfix] Fix Mistral-format models with sliding window ( #18693 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-26 01:44:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65523a0995 
					 
					
						
						
							
							[Doc] Fix issue template format ( #18699 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-26 00:45:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b7740a105 
					 
					
						
						
							
							[GH] Add issue template for reporting CI failures ( #18696 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-26 00:42:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ea62c0ea0 
					 
					
						
						
							
							[CI] add missing argument ( #18694 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-26 00:22:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						561b77a0d6 
					 
					
						
						
							
							[Bugfix] Fix the lm_head in gpt_bigcode in lora mode ( #6357 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Signed-off-by: Max de Bayser <maxdebayser@gmail.com > 
						
						
					 
					
						2025-05-26 14:52:25 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						abd4030d94 
					 
					
						
						
							
							refactor: simplify request handler, use positive condition check for handler assignment ( #18690 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: googs1025 <googs1025@gmail.com > 
						
						
					 
					
						2025-05-26 06:32:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8820821b59 
					 
					
						
						
							
							[Misc] Fixed the abnormally high TTFT issue in the PD disaggregation example ( #18644 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zhaohaidao <zhaohaidao2008@hotmail.com >
Signed-off-by: zhaohaiyuan <zhaohaiyuan@xiaohongshu.com >
Co-authored-by: zhaohaiyuan <zhaohaiyuan@xiaohongshu.com > 
						
						
					 
					
						2025-05-26 13:51:27 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fba0642704 
					 
					
						
						
							
							[CI/Build][Doc] Update gte-Qwen2-1.5B-instruct usage ( #18683 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-25 20:27:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6071e989df 
					 
					
						
						
							
							[Core][Multimodal] Convert PIL Image to array without data copy when hashing ( #18682 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-05-25 17:33:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						57fd13a707 
					 
					
						
						
							
							[Bugfix] Fix profiling dummy data for Pixtral ( #18677 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-25 14:05:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a886bd58c 
					 
					
						
						
							
							[Misc] small improve ( #18680 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-25 06:05:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35be8fad62 
					 
					
						
						
							
							[CI/build] fix no regex ( #18676 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-25 10:10:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f2faac745d 
					 
					
						
						
							
							[Bugfix] Fix cpu usage and cache hit stats reporting on cpu environment ( #18674 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zzzyq <zhangyuqi94@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-05-25 02:36:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						279f854519 
					 
					
						
						
							
							[doc] improve readability ( #18675 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-25 01:40:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						624b77a2b3 
					 
					
						
						
							
							[doc] fix broken links ( #18671 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-25 01:36:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						503f8487c2 
					 
					
						
						
							
							[Misc] Reduce logs on startup ( #18649 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-24 23:03:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						44073a7ac3 
					 
					
						
						
							
							[BUGFIX] catch subclass first for try...except ( #18672 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-25 05:34:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63934543a0 
					 
					
						
						
							
							Speed up the kernels/quantization/ tests ( #18669 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-25 05:02:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						75f81750f3 
					 
					
						
						
							
							[VLM] Initialize video input support for InternVL models ( #18499 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-05-25 04:51:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ab681bcbe 
					 
					
						
						
							
							[Misc][ModelScope] Change to use runtime VLLM_USE_MODELSCOPE ( #18655 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mengqing Cao <cmq0113@163.com >
Signed-off-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-25 04:51:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cebc22f3b6 
					 
					
						
						
							
							[Misc]Replace cuda hard code with current_platform in Ray ( #14668 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: noemotiovon <757486878@qq.com > 
						
						
					 
					
						2025-05-24 20:26:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6c6dcd8611 
					 
					
						
						
							
							[MISC] correct signature for LoaderFunction ( #18670 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-24 20:17:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7891fdf0c6 
					 
					
						
						
							
							[V1] Fix _pickle.PicklingError: Can't pickle <class 'transformers_modules.deepseek-ai.DeepSeek-V2-Lite... ( #18640 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-05-24 20:07:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6825d9a998 
					 
					
						
						
							
							[BugFix][Spec Decode] Improve Prefix Caching Logic in Speculative Decoding ( #18668 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-05-24 17:33:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b554ab736e 
					 
					
						
						
							
							[CI/Build] fix permission denied issue ( #18645 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-24 16:09:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ea7f1abf3 
					 
					
						
						
							
							fix(regression): clone from reference items ( #18662 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-05-24 15:25:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2807271c86 
					 
					
						
						
							
							[CI] enforce import regex instead of re ( #18665 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-05-24 08:04:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9018a3f9f 
					 
					
						
						
							
							[BugFix] Fix import error for fused_moe ( #18642 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-05-24 07:53:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ceafb6299 
					 
					
						
						
							
							[MISC] typo fix and clean import ( #18664 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-24 07:52:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2e6705784f 
					 
					
						
						
							
							[CI/Build] chmod +x to cleanup_pr_body.sh ( #18650 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-24 07:26:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1cb194a018 
					 
					
						
						
							
							[Doc] Reorganize user guide ( #18661 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-24 07:25:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2cd4d58df4 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for gpt2 ( #18625 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zt2370 <ztang2370@gmail.com > 
						
						
					 
					
						2025-05-24 13:36:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d166a8d35 
					 
					
						
						
							
							[Doc] Add community links ( #18657 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-24 06:06:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ef1dd6870f 
					 
					
						
						
							
							[Doc] Fix indentation problems in V0 Paged Attention docs ( #18659 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-24 06:06:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e77dc4bad8 
					 
					
						
						
							
							[MISC][pre-commit] Add pre-commit check for triton import ( #17716 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mengqing Cao <cmq0113@163.com > 
						
						
					 
					
						2025-05-24 20:09:15 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						07458a51ce 
					 
					
						
						
							
							[Doc] Update README links, mark external links ( #18635 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-24 09:57:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c1e4a4052d 
					 
					
						
						
							
							[V1][Spec Decode] Support multi-layer eagle draft model ( #18030 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qizixi <qizixi@meta.com > 
						
						
					 
					
						2025-05-24 09:45:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a859320575 
					 
					
						
						
							
							[Model] Add support for Qwen2.5-Omni-7B-AWQ (Qwen2_5OmniForConditionalGeneration) ( #18647 )  
						
						 
						
						
						
						
					 
					
						2025-05-24 09:15:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						441dc63ac7 
					 
					
						
						
							
							[Frontend] improve vllm serve --help display ( #18643 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-24 07:53:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d55e446d13 
					 
					
						
						
							
							[V1][Spec Decode] Small refactors to improve eagle bookkeeping performance ( #18424 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qizixi <qizixi@meta.com > 
						
						
					 
					
						2025-05-24 06:51:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec82c3e388 
					 
					
						
						
							
							FIX MOE issue in AutoRound format ( #18586 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wenhuach21 <wenhua.cheng@intel.com > 
						
						
					 
					
						2025-05-23 22:01:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						45ab403a1f 
					 
					
						
						
							
							config.py: Clarify that only local GGUF checkpoints are supported. ( #18623 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mathieu Bordere <mathieu@letmetweakit.com > 
						
						
					 
					
						2025-05-24 08:46:34 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b10ba7491 
					 
					
						
						
							
							[Bugfix][Nixl] Fix Preemption Bug ( #18631 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com > 
						
						
					 
					
						2025-05-23 23:30:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4fc1bf813a 
					 
					
						
						
							
							[Bugfix] Migrate to REGEX Library to prevent catastrophic backtracking ( #18454 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Crucifixion-Fxl <xmufxl@gmail.com >
Co-authored-by: Crucifixion-Fxl <xmufxl@gmail.com > 
						
						
					 
					
						2025-05-23 16:16:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f2036734fb 
					 
					
						
						
							
							[ModelOpt] Introduce VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE env var to control blockscale tensor allocation ( #18160 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pavani Majety <pmajety@nvidia.com > 
						
						
					 
					
						2025-05-23 15:52:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7d9216495c 
					 
					
						
						
							
							[Doc] Update references to doc files ( #18637 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-23 15:49:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ddf88e16e 
					 
					
						
						
							
							[CI] Enable test_initialization to run on V1 ( #16736 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-23 15:09:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1645b60196 
					 
					
						
						
							
							Use prebuilt FlashInfer x86_64 PyTorch 2.7 CUDA 12.8 wheel for CI ( #18537 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-05-23 21:17:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2628a69e35 
					 
					
						
						
							
							[V1] Support Deepseek MTP ( #18435 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com >
Signed-off-by: YaoJiayi <120040070@link.cuhk.edu.cn >
Co-authored-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-05-23 10:26:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						371f7e4ca2 
					 
					
						
						
							
							[Doc] Fix broken links and unlinked docs, add shortcuts to home sidebar ( #18627 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-23 10:22:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						15b45ffb9a 
					 
					
						
						
							
							[Doc] Avoid documenting dynamic / internal modules ( #18626 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-23 09:58:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						273cb3b4d9 
					 
					
						
						
							
							[Doc] Fix top-level API links/docs ( #18621 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-23 09:46:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8ddd1cf26a 
					 
					
						
						
							
							[Doc] fix list formatting ( #18624 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-05-23 09:41:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6550114c9c 
					 
					
						
						
							
							[v1] Redo "Support multiple KV cache groups in GPU model runner ( #17945 )" ( #18593 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-23 09:39:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9520a989df 
					 
					
						
						
							
							[Docs] Change mkdocs to not use directory urls ( #18622 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-23 09:33:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d28ad343f 
					 
					
						
						
							
							Fix figures in design doc ( #18612 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-23 09:09:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6a7988c55b 
					 
					
						
						
							
							Refactor pplx init logic to make it modular (prepare for deepep) ( #18200 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-05-23 23:43:43 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						022d8abe29 
					 
					
						
						
							
							[Doc] Use a different color for the announcement ( #18616 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-23 08:25:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5221815a00 
					 
					
						
						
							
							[Doc] Fix markdown list indentation for MkDocs rendering ( #18620 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-05-23 08:23:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1068556b2c 
					 
					
						
						
							
							[Bugfix][Build/CI] Fixup CUDA compiler version check for CUDA_SUPPORTED_ARCHS ( #18579 )  
						
						 
						
						
						
						
					 
					
						2025-05-23 07:43:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2cd1fa4556 
					 
					
						
						
							
							[Misc] add Haystack integration ( #18601 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-23 06:21:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d4c2919760 
					 
					
						
						
							
							Include private attributes in API documentation ( #18614 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-23 06:18:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6220f3c6b0 
					 
					
						
						
							
							[Bugfix] Fix transformers model impl ignored for mixtral quant ( #18602 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tristan Leclercq <tristanleclercq@gmail.com > 
						
						
					 
					
						2025-05-23 05:54:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						52fb23f47e 
					 
					
						
						
							
							Fix examples with code blocks in docs ( #18609 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-23 05:53:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6dd51c7ef1 
					 
					
						
						
							
							[CI/Build] Fix V1 flag being set in entrypoints tests ( #18598 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-23 05:51:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2edb533af2 
					 
					
						
						
							
							Replace {func} with mkdocs style links ( #18610 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-23 05:51:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						38a95cb4a8 
					 
					
						
						
							
							[Doc] Fix indent of contributing to vllm ( #18611 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-05-23 05:50:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd821ea5d2 
					 
					
						
						
							
							[CI] fix kv_cache_type argument ( #18594 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-23 04:49:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ab056c273 
					 
					
						
						
							
							[Hardware][CPU] Update intel_extension_for_pytorch 2.7.0 and move to requirements/cpu.txt  ( #18542 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kay Yan <kay.yan@daocloud.io > 
						
						
					 
					
						2025-05-23 04:38:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6526e05111 
					 
					
						
						
							
							Add myself as docs code owner ( #18605 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-23 04:08:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e493e48524 
					 
					
						
						
							
							[V0][Bugfix] Fix parallel sampling performance regression when guided decoding is enabled ( #17731 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Madeesh Kannan <shadeMe@users.noreply.github.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-23 03:38:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ce64e2df4 
					 
					
						
						
							
							[Bugfix][Model] Fix baichuan model loader for tp ( #18597 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mengqing Cao <cmq0113@163.com > 
						
						
					 
					
						2025-05-23 02:39:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fbb13a2c15 
					 
					
						
						
							
							Revert "[V1] [Bugfix] eagle bugfix and enable correct lm_head for multimodal ( #18034 )" ( #18600 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-23 02:18:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a1fe24d961 
					 
					
						
						
							
							Migrate docs from Sphinx to MkDocs ( #18145 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-23 02:09:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d0bc2f810b 
					 
					
						
						
							
							[Bugfix] Add half type support in reshape_and_cache_cpu_impl on x86 cpu platform ( #18430 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yuqi Zhang <yuqizhang@google.com >
Co-authored-by: Yuqi Zhang <yuqizhang@google.com > 
						
						
					 
					
						2025-05-23 01:41:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b046cf792d 
					 
					
						
						
							
							[Feature][V1]: suupports cached_tokens in response usage ( #18149 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: simon-mo <xmo@berkeley.edu > 
						
						
					 
					
						2025-05-23 01:41:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54af915949 
					 
					
						
						
							
							[Doc] Update quickstart and install for cu128 using --torch-backend=auto ( #18505 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-23 08:36:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71ea614d4a 
					 
					
						
						
							
							[Feature]Add async tensor parallelism using compilation pass ( #17882 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com > 
						
						
					 
					
						2025-05-23 01:03:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c611348a7 
					 
					
						
						
							
							[V1] [Bugfix] eagle bugfix and enable correct lm_head for multimodal ( #18034 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ronald Xu <ronaldxu@amazon.com > 
						
						
					 
					
						2025-05-23 00:37:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						60cad94b86 
					 
					
						
						
							
							[Hardware] correct method signatures for HPU,ROCm,XPU ( #18551 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-22 22:31:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9c1baa5bc6 
					 
					
						
						
							
							[Misc] Replace cuda hard code with current_platform ( #16983 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shen-shanshan <467638484@qq.com > 
						
						
					 
					
						2025-05-23 04:38:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4be2255c81 
					 
					
						
						
							
							[Bugfix][Benchmarks] Fix a benchmark of deepspeed-mii backend to use api_key ( #17291 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Teruaki Ishizaki <teruaki.ishizaki@ntt.com > 
						
						
					 
					
						2025-05-23 12:30:47 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed5d408255 
					 
					
						
						
							
							[Neuron] Remove bypass on EAGLEConfig and add a test ( #18514 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Elaine Zhao <elaineyz@amazon.com > 
						
						
					 
					
						2025-05-22 21:26:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						583507d130 
					 
					
						
						
							
							[Spec Decode] Make EAGLE3 draft token ID mapping optional ( #18488 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-05-22 20:17:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e44d8ce8c7 
					 
					
						
						
							
							[Bugfix] Set KVTransferConfig.engine_id in post_init ( #18576 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun Chen <github@lkchen.net > 
						
						
					 
					
						2025-05-23 02:54:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93ecb8139c 
					 
					
						
						
							
							[BugFix] Increase TP execute_model timeout ( #18558 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-23 10:22:11 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fae453f8ce 
					 
					
						
						
							
							[Misc] refactor: simplify input validation and num_requests handling in _convert_v1_inputs ( #18482 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: googs1025 <googs1025@gmail.com > 
						
						
					 
					
						2025-05-23 10:15:32 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b0da7b60e 
					 
					
						
						
							
							Enable hybrid attention models for Transformers backend ( #18494 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-23 10:12:08 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6b636f9fb 
					 
					
						
						
							
							[V1][Spec Decoding] Use model_loader.get_model() to load models ( #18273 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-05-23 02:05:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04eb88dc80 
					 
					
						
						
							
							Re-submit: Fix: Proper RGBA -> RGB conversion for PIL images. ( #18569 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenheli Hua <huachenheli@outlook.com > 
						
						
					 
					
						2025-05-23 01:59:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						46791e1b4b 
					 
					
						
						
							
							[AMD] [P/D] Compute num gpus for ROCm correctly in run_accuracy_test.sh ( #18568 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-05-22 18:45:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c32e249a23 
					 
					
						
						
							
							[Frontend] [Core] Add Tensorizer support for V1, LoRA adapter serialization and deserialization ( #17926 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sanger Steel <sangersteel@gmail.com > 
						
						
					 
					
						2025-05-22 18:44:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c91fe7b1b9 
					 
					
						
						
							
							[Frontend][Bug Fix] Update llama4 pythonic jinja template and llama4_pythonic parser ( #17917 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kai Wu <kaiwu@meta.com > 
						
						
					 
					
						2025-05-22 16:44:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a04720bc36 
					 
					
						
						
							
							[V1][Spec Decode][Bugfix] Load quantize weights for EAGLE ( #18290 )  
						
						 
						
						
						
						
					 
					
						2025-05-22 15:17:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b9d832c80 
					 
					
						
						
							
							[Tool] Add NIXL installation script ( #18172 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun <github@lkchen.net > 
						
						
					 
					
						2025-05-22 14:33:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e588da0f4 
					 
					
						
						
							
							[Build/CI] Fix CUDA 11.8 build ( #17679 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com >
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com >
Signed-off-by: Tyler Michael Smith <tysmith@redhat.com >
Co-authored-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-22 12:13:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f8d2cc5f55 
					 
					
						
						
							
							[Compile][Platform] Make PiecewiseBackend pluggable and extendable ( #18076 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mengqing Cao <cmq0113@163.com >
Co-authored-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-05-22 12:11:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						721fb9b181 
					 
					
						
						
							
							[Platform] Move platform check to right place ( #18470 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-05-22 12:11:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f3a1200e4 
					 
					
						
						
							
							[Bugfix] make test_openai_schema.py pass ( #18224 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-22 18:34:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54631f8262 
					 
					
						
						
							
							[Misc] Call ndarray.tobytes() directly instead of ndarray.data.tobytes() ( #18347 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com > 
						
						
					 
					
						2025-05-22 09:00:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb506ecb5a 
					 
					
						
						
							
							[Misc] improve Automatic Prefix Caching example ( #18554 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-22 14:50:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93f71673ce 
					 
					
						
						
							
							[BugFix][CPU] Fix x86 SHM distributed module initialization ( #18536 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-05-22 07:35:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3f505233fd 
					 
					
						
						
							
							[Doc] Add stream flag for chat completion example ( #18524 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <120380290@qq.com > 
						
						
					 
					
						2025-05-22 14:07:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e04eceb58 
					 
					
						
						
							
							[Bugfix] Use random hidden states in dummy sampler run ( #18543 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bowen Wang <abmfy@icloud.com > 
						
						
					 
					
						2025-05-22 06:48:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71075029f2 
					 
					
						
						
							
							[Doc] Support --stream arg in openai_completion_client.py script ( #18388 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: googs1025 <googs1025@gmail.com > 
						
						
					 
					
						2025-05-22 13:20:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca86a7cf6e 
					 
					
						
						
							
							[CI/Build] Update bamba test model location ( #18544 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-22 06:01:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a35a494745 
					 
					
						
						
							
							[Bugfix] Add kwargs to RequestOutput __init__ to be forward compatible ( #18513 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun <github@lkchen.net > 
						
						
					 
					
						2025-05-22 05:24:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f6037d1907 
					 
					
						
						
							
							[Bugfix] Fix MRoPE Errors in the Qwen-VL Model When Processing Pure Text ( #18526 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: 松灵 <wpf272043@alibaba-inc.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-22 05:22:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fa72f9a812 
					 
					
						
						
							
							Order sequence ids + config update to support specifying custom quantization layers ( #18279 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Elaine Zhao <elaineyz@amazon.com >
Co-authored-by: Tailin Pan <tailinpa@amazon.com >
Co-authored-by: Rishabh Rajesh <rishyraj@amazon.com >
Co-authored-by: Yishan McNabb <yishanm@amazon.com >
Co-authored-by: Patrick Lange <patlange@amazon.com >
Co-authored-by: Maxwell Goldberg <mgld@amazon.com >
Co-authored-by: Aakash Shetty <sheaak@amazon.com > 
						
						
					 
					
						2025-05-22 02:20:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebed81fbf5 
					 
					
						
						
							
							Update default neuron config for speculation ( #18274 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Elaine Zhao <elaineyz@amazon.com >
Co-authored-by: Shashwat Srijan <sssrijan@amazon.com >
Co-authored-by: Aakash Shetty <sheaak@amazon.com > 
						
						
					 
					
						2025-05-22 02:18:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e2d7d31244 
					 
					
						
						
							
							[Neuron] Update Dockerfile.neuron to use latest neuron release (2.23) ( #18512 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Satyajith Chilappagari <satchill@amazon.com > 
						
						
					 
					
						2025-05-22 02:17:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23b67b37b2 
					 
					
						
						
							
							[Doc] Fix invalid JSON in example args ( #18527 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-22 07:11:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						db5a29ba19 
					 
					
						
						
							
							[Bugfix] Fix LoRA test ( #18518 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-21 21:48:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51797775c3 
					 
					
						
						
							
							[Bugfix][Model] Make Olmo2Model weight loading return loaded weights ( #18504 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shane A <shanea@allenai.org > 
						
						
					 
					
						2025-05-21 21:17:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cf5984b2fe 
					 
					
						
						
							
							[BugFix][DP] Send DP wave completion only from dp_rank==0 ( #18502 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: kourosh hakhamaneshi <kourosh@anyscale.com > 
						
						
					 
					
						2025-05-21 20:25:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d022115cc6 
					 
					
						
						
							
							[Bugfix] Inconsistent token calculation compared to HF in llava family ( #18479 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jaycha <jaycha@ncsoft.com > 
						
						
					 
					
						2025-05-21 20:21:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						acb54ca8e1 
					 
					
						
						
							
							Intialize io_thread_pool attribute in the beginning. ( #18331 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rabi <ramishra@redhat.com > 
						
						
					 
					
						2025-05-21 20:21:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e0fd34d3c 
					 
					
						
						
							
							[CI] Fix race condition with StatelessProcessGroup.barrier ( #18506 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-21 20:19:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						176d62e4ea 
					 
					
						
						
							
							[MISC] update project urls in pyproject.toml ( #18519 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-21 20:17:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						20bd6f4d2e 
					 
					
						
						
							
							[FalconH1] Fix output dtype in RMSNorm fallback path for Falcon-H1 (e.g. 0.5B) ( #18500 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dhia.rhaiem <dhia.rhaiem@tii.ae >
Co-authored-by: younesbelkada <younesbelkada@gmail.com >
Co-authored-by: Ilyas Chahed <ilyas.chahed@tii.ae >
Co-authored-by: Jingwei Zuo <jingwei.zuo@tii.ae > 
						
						
					 
					
						2025-05-21 19:23:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f079540db 
					 
					
						
						
							
							[Bugfix] Consistent ascii handling in tool parsers ( #17704 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sebastian Schönnenbeck <sebastian.schoennenbeck@comma-soft.com > 
						
						
					 
					
						2025-05-21 20:41:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						94d8ec8d2b 
					 
					
						
						
							
							[FEAT][ROCm] Upgrade AITER MLA v1 backend ( #18338 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com > 
						
						
					 
					
						2025-05-21 10:34:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bb0a311213 
					 
					
						
						
							
							Revert "[v1] Support multiple KV cache groups in GPU model runner ( #17945 ) ( #18459 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-05-21 10:25:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd5fa7e04f 
					 
					
						
						
							
							[ROCm][Kernel][V1] Enable AMD Radeon GPU Custom Paged Attention on v1 ( #17004 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hosang Yoon <hosang.yoon@amd.com > 
						
						
					 
					
						2025-05-21 08:35:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b16104557 
					 
					
						
						
							
							[Misc] Update deprecation message for --enable-reasoning ( #18404 )  
						
						 
						
						
						
						
					 
					
						2025-05-21 07:33:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						371376f996 
					 
					
						
						
							
							[Build] fix Dockerfile shell ( #18402 )  
						
						 
						
						
						
						
					 
					
						2025-05-21 07:32:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6c10ca920 
					 
					
						
						
							
							[Bugfix] Reduce moe_sum test size to avoid OOM ( #18484 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-05-21 06:46:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c154d89306 
					 
					
						
						
							
							[Doc] fix arg docstring in linear layers ( #18410 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: giantcroc <1204449533@qq.com > 
						
						
					 
					
						2025-05-21 06:45:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eca18691d2 
					 
					
						
						
							
							[MODEL] FalconH1 ( #18406 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dhia.rhaiem <dhia.rhaiem@tii.ae >
Co-authored-by: younesbelkada <younesbelkada@gmail.com >
Co-authored-by: Ilyas Chahed <ilyas.chahed@tii.ae >
Co-authored-by: Jingwei Zuo <jingwei.zuo@tii.ae > 
						
						
					 
					
						2025-05-21 04:59:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61acfc45bc 
					 
					
						
						
							
							[Bugfix][Failing Test] Fix test_events.py ( #18460 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rabi <ramishra@redhat.com > 
						
						
					 
					
						2025-05-21 04:57:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						107f5fc4cb 
					 
					
						
						
							
							[Misc] refactor disaggregated-prefill-v1 example ( #18474 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-21 11:10:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						907f935de9 
					 
					
						
						
							
							[V1] Fix general plugins not loaded in engine for multiproc ( #18326 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-05-21 01:21:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5d7f545204 
					 
					
						
						
							
							[Frontend] deprecate --device arg ( #18399 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-05-21 01:21:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd8dfc6dfc 
					 
					
						
						
							
							[Misc] MultiConnector._connectors type ( #18423 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: nicklucche <nlucches@redhat.com > 
						
						
					 
					
						2025-05-20 22:48:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d06dd72ba9 
					 
					
						
						
							
							[Bugfix][Failing Test] Fix nixl connector test when promt size < block size ( #18429 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com > 
						
						
					 
					
						2025-05-20 22:41:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad0012a0ac 
					 
					
						
						
							
							Revert "[Bugfix] Fix MRoPE Errors in the Qwen-VL Model When Processing Pure Text ( #18407 )" ( #18456 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-20 22:39:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						92247c522e 
					 
					
						
						
							
							[Bug] Fix moe_sum signature ( #18440 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bill Nell <bnell@redhat.com > 
						
						
					 
					
						2025-05-20 22:37:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c15c2e486 
					 
					
						
						
							
							[Bugfix] config.head_dim is now explicitly set to None ( #18432 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-20 21:04:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3b17ea26e4 
					 
					
						
						
							
							[TPU] Re-enable the Pallas MoE kernel ( #18025 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-20 19:52:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23baa2180b 
					 
					
						
						
							
							fix:Build torch wheel inline rather than picking from nightly ( #18351 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dilip Gowda Bhagavan <dilip.bhagavan@ibm.com > 
						
						
					 
					
						2025-05-20 22:22:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						980a172474 
					 
					
						
						
							
							[Kernel] update comment for KV shape in unified triton attn ( #18099 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: haochengxia <xhc_1007@163.com > 
						
						
					 
					
						2025-05-20 11:19:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e1f5a71ed7 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for bloom ( #18300 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <120380290@qq.com > 
						
						
					 
					
						2025-05-20 09:40:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4a8a37465 
					 
					
						
						
							
							[Minor] Rename quantization nvfp4 to modelopt_fp4 ( #18356 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-20 09:08:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f55962a7f 
					 
					
						
						
							
							[Misc] refactor prompt embedding examples ( #18405 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-20 15:26:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						be48360c1f 
					 
					
						
						
							
							[Bugfix] Fix MRoPE Errors in the Qwen-VL Model When Processing Pure Text ( #18407 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: 松灵 <wpf272043@alibaba-inc.com > 
						
						
					 
					
						2025-05-20 06:59:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86847700d7 
					 
					
						
						
							
							[CI] Add mteb testing to test the accuracy of the embedding model ( #17175 )  
						
						 
						
						
						
						
					 
					
						2025-05-20 06:51:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6c86d09ae 
					 
					
						
						
							
							Update cpu.txt ( #18398 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com > 
						
						
					 
					
						2025-05-20 10:53:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b35cb10a0 
					 
					
						
						
							
							[Misc] Add LoRA code owner ( #18387 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-20 03:27:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b1e8e05ff 
					 
					
						
						
							
							[doc] update env variable export ( #18391 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-20 08:53:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bca55b556f 
					 
					
						
						
							
							[Bugfix] fix adding bias twice in ipex GPTQ quantization ( #18363 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rand-fly <randfly@outlook.com > 
						
						
					 
					
						2025-05-20 00:54:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d981396778 
					 
					
						
						
							
							[release] Change dockerhub username for TPU release ( #18389 )  
						
						 
						
						
						
						
					 
					
						2025-05-19 23:49:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9609327fa4 
					 
					
						
						
							
							[Core] [Bugfix]: tensor parallel with prompt embeds ( #18171 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nan2018 <nan@protopia.ai >
Co-authored-by: Andrew Sansom <andrew@protopia.ai > 
						
						
					 
					
						2025-05-19 20:21:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f07a673eb2 
					 
					
						
						
							
							[Misc] Allow AutoWeightsLoader to skip loading weights with specific substr in name ( #18358 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-19 20:20:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d565e0976f 
					 
					
						
						
							
							[neuron] fix authorization issue ( #18364 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Liangfu Chen <liangfc@amazon.com > 
						
						
					 
					
						2025-05-19 23:30:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						258bf621d5 
					 
					
						
						
							
							fix CUDA_check redefinition in  #17918  ( #18287 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucia Fang <fanglu@fb.com >
Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com > 
						
						
					 
					
						2025-05-19 13:42:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc1440cf9f 
					 
					
						
						
							
							Neuron up mistral ( #18222 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Satyajith Chilappagari <satchill@amazon.com > 
						
						
					 
					
						2025-05-19 09:54:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8171221834 
					 
					
						
						
							
							[Misc] Fix typo ( #18330 )  
						
						 
						
						
						
						
					 
					
						2025-05-19 09:51:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7937c2fd52 
					 
					
						
						
							
							Add files via uploadAdd fused MoE kernel tuning configs (fp8_w8a8) for DeepSeek V3/R1 on a single-node 8x NVIDIA H20 96GB setup ( #18337 )  
						
						 
						
						
						
						
					 
					
						2025-05-19 09:49:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e2ee1e8e9e 
					 
					
						
						
							
							[Feature]Add support for models quantized with AutoRound ( #17850 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wenhuach21 <wenhua.cheng@intel.com > 
						
						
					 
					
						2025-05-19 09:38:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						20d8ce81eb 
					 
					
						
						
							
							[Frontend] add --quick option for vllm chat/complete ( #18297 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-19 09:36:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						84ab4feb7e 
					 
					
						
						
							
							[Doc] Fix typo ( #18355 )  
						
						 
						
						
						
						
					 
					
						2025-05-19 16:05:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6781af5608 
					 
					
						
						
							
							[Quantization] Pool model support bitsandbytes ( #18087 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-19 09:03:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1b15df2546 
					 
					
						
						
							
							[BugFix] Fix handling of num_computed_tokens with connector ( #18232 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nicolò Lucchesi <nicolo.lucchesi@gmail.com > 
						
						
					 
					
						2025-05-19 09:03:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						43b5f61dce 
					 
					
						
						
							
							[Doc] Move input-related docs to Features ( #18353 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-19 15:08:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c5bb0ebdc6 
					 
					
						
						
							
							[Doc] Fix prompt embedding examples ( #18350 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangli <wangli858794774@gmail.com > 
						
						
					 
					
						2025-05-19 06:48:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d637b96099 
					 
					
						
						
							
							[BugFix] [Vul] Add missing usedforsecurity=False in MD5 hashing to enable FIPS ( #18319 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com >
Signed-off-by: shaoyuyoung <shaoyuyoung@gmail.com >
Co-authored-by: cascade <cascade812@outlook.com > 
						
						
					 
					
						2025-05-19 01:31:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						275c5daeb0 
					 
					
						
						
							
							fix: Add type specifications for CLI arguments in tensorizer options ( #18314 )  
						
						 
						
						
						
						
					 
					
						2025-05-18 23:42:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						47fda6d089 
					 
					
						
						
							
							[Build] Supports CUDA 12.6 and 11.8 after Blackwell Update ( #18316 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-05-18 23:19:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						27d0952600 
					 
					
						
						
							
							[Misc] extract parser.parse_args() ( #18323 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-19 04:06:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						221cfc2fea 
					 
					
						
						
							
							Feature/vllm/input embedding completion api ( #17590 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Sansom <andrew@protopia.ai >
Signed-off-by: Nan2018 <nan@protopia.ai >
Co-authored-by: 临景 <linjing.yx@alibaba-inc.com >
Co-authored-by: Bryce1010 <bryceyx@gmail.com >
Co-authored-by: Andrew Sansom <andrew@protopia.ai >
Co-authored-by: Andrew Sansom <qthequartermasterman@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-05-18 20:18:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9da1095daf 
					 
					
						
						
							
							[Spec Decode][V0] Fix spec decode correctness test in V0 eagle/medusa ( #18175 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com > 
						
						
					 
					
						2025-05-18 19:49:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1211f8794 
					 
					
						
						
							
							[Doc] Add doc to explain the usage of Qwen3 thinking ( #18291 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: WangErXiao <863579016@qq.com > 
						
						
					 
					
						2025-05-18 23:04:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6a6e7a529 
					 
					
						
						
							
							[Misc] add litellm integration ( #18320 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-18 15:32:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4fb349f66a 
					 
					
						
						
							
							Fix copy-paste error in phi4mm image processing ( #18315 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lifu Huang <lifu.hlf@gmail.com > 
						
						
					 
					
						2025-05-18 07:00:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						908733aca7 
					 
					
						
						
							
							[Model] Use sigmoid for single-label classification ( #18313 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-05-18 07:00:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1a8f68bb90 
					 
					
						
						
							
							[doc] update reasoning doc ( #18306 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-18 06:59:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ab2c02ff8 
					 
					
						
						
							
							Support sequence parallelism combined with pipeline parallelism ( #18243 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com > 
						
						
					 
					
						2025-05-17 22:47:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						66e63e86ec 
					 
					
						
						
							
							[MISC] fix typo ( #18305 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-17 10:52:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9214e60631 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for solar ( #18113 )  
						
						 
						
						
						
						
					 
					
						2025-05-17 00:24:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f880d42582 
					 
					
						
						
							
							Fixed build on ppc64le due to openssl conflicts ( #18262 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nishidha Panpaliya <nishidha.panpaliya@partner.ibm.com > 
						
						
					 
					
						2025-05-17 00:23:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dcfe95234c 
					 
					
						
						
							
							Update Dockerfile to build for Blackwell ( #18095 )  
						
						 
						
						
						
						
					 
					
						2025-05-17 00:23:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48ac2bed5b 
					 
					
						
						
							
							[Hardware][TPU] Optionally import for TPU backend ( #18269 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com >
Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com >
Co-authored-by: Carol Zheng <cazheng@google.com >
Co-authored-by: Jade Zheng <zheng.shoujian@outlook.com >
Co-authored-by: Hongmin Fan <fanhongmin@google.com > 
						
						
					 
					
						2025-05-17 15:23:12 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e0d435027 
					 
					
						
						
							
							[P/D][V1] Support dynamic loading of external KV connector implementations ( #18142 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Ben-David <davidb@pliops.com >
Co-authored-by: David Ben-David <davidb@pliops.com > 
						
						
					 
					
						2025-05-17 06:40:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ee4826ede 
					 
					
						
						
							
							[BugFix] Correct max_model_len derivation from config.json for Mistral format  ( #17937 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 汪志鹏 <wangzhipeng628@gmail.com >
Co-authored-by: tracelogfb <48808670+tracelogfb@users.noreply.github.com >
Co-authored-by: Stephen Chen <tracelog@meta.com > 
						
						
					 
					
						2025-05-17 04:20:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						60017dc841 
					 
					
						
						
							
							[Misc] reformat the collect-env output ( #18285 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-16 19:46:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55f1a468d9 
					 
					
						
						
							
							Move cli args docs to its own page ( #18228 ) ( #18264 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Trevor Royer <troyer@redhat.com > 
						
						
					 
					
						2025-05-16 19:43:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fd195b194e 
					 
					
						
						
							
							[V1][P/D] Local attention optimization for NIXL ( #18170 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-16 21:16:33 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fabe89bbc4 
					 
					
						
						
							
							[Spec Decode] Don't fall back to V0 when spec decoding is enabled ( #18265 )  
						
						 
						
						
						
						
					 
					
						2025-05-16 16:10:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e73b7dfd69 
					 
					
						
						
							
							[Bugfix] fix an illegal memory access was encountered of marlin kernel + act_order  ( #18245 )  
						
						 
						
						
						
						
					 
					
						2025-05-16 16:02:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7fdfa01530 
					 
					
						
						
							
							[Sampler] Adapt to FlashInfer 0.2.3 sampler API ( #15777 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bowen Wang <abmfy@icloud.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-16 15:14:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aef94c6d07 
					 
					
						
						
							
							[CI] Assign reviewer to mergify with changes to Tensorizer files ( #18278 )  
						
						 
						
						
						
						
					 
					
						2025-05-16 12:04:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ceaebf87b 
					 
					
						
						
							
							[BugFix] Fix ordering of KVConnector finished send/rcv sets ( #18211 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-16 09:20:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1db4f47f81 
					 
					
						
						
							
							[BugFix] Fix multi async save in MultiConnector ( #18246 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-16 08:13:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3d91b6f71 
					 
					
						
						
							
							[Misc][MacOS] fix bfloat16 error ( #18249 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-16 15:05:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87d871470d 
					 
					
						
						
							
							[Model] Use autoweightloader for dbrx ( #18251 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: learner0810 <zhongjun.li@daocloud.io > 
						
						
					 
					
						2025-05-16 07:54:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a5f8c111c2 
					 
					
						
						
							
							[Fix] Fix typo in resolve_hf_chat_template ( #18259 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Felix Marty <felmarty@amd.com > 
						
						
					 
					
						2025-05-16 14:52:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e23564cb70 
					 
					
						
						
							
							use ceil_div in cutlass block scaling shape check ( #17918 )  
						
						 
						
						
						
						
					 
					
						2025-05-16 03:02:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						390ec88905 
					 
					
						
						
							
							[Misc] Consolidate Audio tests into multimodal common generation tests ( #18214 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-16 09:18:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						541817670c 
					 
					
						
						
							
							[Misc] Add Ray Prometheus logger to V1 ( #17925 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Seiji Eicher <seiji@anyscale.com > 
						
						
					 
					
						2025-05-16 01:02:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67da5720d4 
					 
					
						
						
							
							[PERF] Speed up Qwen2.5-VL model by speed up rotary position embedding ( #17973 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Vadim Gimpelson <vadim.gimpelson@centml.ai > 
						
						
					 
					
						2025-05-15 23:31:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c04bb8b86 
					 
					
						
						
							
							[doc] fix multimodal example script ( #18089 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-05-16 06:05:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d2779c29a 
					 
					
						
						
							
							[Feature] Support Pipeline Parallism in torchrun SPMD offline inference for V1 ( #17827 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucia Fang <fanglu@fb.com > 
						
						
					 
					
						2025-05-15 22:28:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b31c84aff 
					 
					
						
						
							
							Throw better error for when running into k8s service discovery issue ( #18209 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Will Eaton <weaton@redhat.com > 
						
						
					 
					
						2025-05-15 21:07:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b18201fe06 
					 
					
						
						
							
							Allow users to pass arbitrary JSON keys from CLI ( #18208 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-15 21:05:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f4937a51c1 
					 
					
						
						
							
							[Model] vLLM v1 supports Medusa ( #17956 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: lisiqi23 <lisiqi23@xiaomi.com >
Signed-off-by: skylee-01 <497627264@qq.com >
Co-authored-by: lisiqi23 <lisiqi23@xiaomi.com > 
						
						
					 
					
						2025-05-15 21:05:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee659e3b60 
					 
					
						
						
							
							[Bugfix][ROCm] Use chunked_prefill_paged_decode as fallback for V1 attention on ROCm ( #18093 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kf <kuanfu.liu@embeddedllm.com > 
						
						
					 
					
						2025-05-15 19:30:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e1c6a0264 
					 
					
						
						
							
							[Bugfix] fix rotary embedding test for _get_padded_tensor_shape ( #18229 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-16 01:32:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7852a6d9b 
					 
					
						
						
							
							[Build] Allow shipping PTX on a per-file basis ( #18155 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-15 16:41:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8795eb9975 
					 
					
						
						
							
							[Bugfix] Fix test_eagle test ( #18223 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucia Fang <fanglu@fb.com > 
						
						
					 
					
						2025-05-15 15:59:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b34593017 
					 
					
						
						
							
							Adding "AMD: Tensorizer Test" to amdproduction. ( #18216 )  
						
						 
						
						
						
						
					 
					
						2025-05-15 11:01:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3f3aee6f4 
					 
					
						
						
							
							[Misc] Avoid cuda graph log when sizes still match ( #18202 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-05-15 09:59:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						92540529c0 
					 
					
						
						
							
							[Bugfix] [ROCm]: Remove assertion logic when using AITER fused moe in unquantizedMethod to reenable LLama4 BF16 ( #18205 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-05-15 09:53:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fadb8d5c2d 
					 
					
						
						
							
							[Bugfix]Change the exception thrown by call_hf_processor from RuntimeError to ValueError ( #18181 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Abatom <abzhonghua@gmail.com > 
						
						
					 
					
						2025-05-15 09:01:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2aa5470ac5 
					 
					
						
						
							
							[Frontend] Fix chat template content format detection ( #18190 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sebastian Schönnenbeck <sebastian.schoennenbeck@comma-soft.com > 
						
						
					 
					
						2025-05-15 09:00:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51ff154639 
					 
					
						
						
							
							Improve examples rendering in docs and GitHub ( #18203 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-15 15:57:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						566ec04c3d 
					 
					
						
						
							
							Adding "Basic Models Test" and "Multi-Modal Models Test (Extended) 3" in AMD Pipeline ( #18106 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-05-15 08:49:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						01c22335ba 
					 
					
						
						
							
							[Kernel] [V1] Fix performance regression for triton unified attention ( #18161 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Co-authored-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-15 06:39:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						451da4bcbd 
					 
					
						
						
							
							add tools into TokenizeChatRequest ( #18187 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yangxia <yangxiast@gmail.com > 
						
						
					 
					
						2025-05-15 04:01:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						07ad27121f 
					 
					
						
						
							
							Update deprecated type hinting in model_loader ( #18130 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-15 04:00:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a9944aabfa 
					 
					
						
						
							
							fix: typos ( #18151 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: omahs <73983677+omahs@users.noreply.github.com > 
						
						
					 
					
						2025-05-15 02:16:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8f5aec20a 
					 
					
						
						
							
							[V1] Update zmq socket creation in nixl connector ( #18148 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-14 23:17:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de71fec81b 
					 
					
						
						
							
							[CI] don't skip fixed test_kv_cache_events() ( #18183 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-05-14 23:17:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						70f8b96724 
					 
					
						
						
							
							[Bugfix] Fix FusedMoEPrepareAndFinalize for cuda-disalike backends ( #18178 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mengqing Cao <cmq0113@163.com > 
						
						
					 
					
						2025-05-14 23:16:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd2a94596a 
					 
					
						
						
							
							[Model] Allow the use of sliding window in Qwen2  ( #17772 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: inkcherry <mingzhi.liu@intel.com > 
						
						
					 
					
						2025-05-14 22:29:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						420caf7557 
					 
					
						
						
							
							[UT] Add ut for none hash ( #17892 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-05-15 13:28:11 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f07a64075 
					 
					
						
						
							
							Support custom implementations of VideoLoader backends. ( #18091 )  
						
						 
						
						
						
						
					 
					
						2025-05-15 13:26:49 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e6b8e65d2d 
					 
					
						
						
							
							[Bugfix] Fix fp8 tests for triton_unified_attention for Triton 3.3 ( #18013 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Co-authored-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-15 13:26:34 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						26d0419309 
					 
					
						
						
							
							Update deprecated type hinting in models ( #18132 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-14 22:06:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83f74c698f 
					 
					
						
						
							
							[Fix][ROCm] Enforce eager for all encoder-decoder models on ROCm ( #18154 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedič <lgovedic@redhat.com > 
						
						
					 
					
						2025-05-14 22:04:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2dff093574 
					 
					
						
						
							
							[Misc] add lobe-chat support ( #18177 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-15 05:02:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						afe3236e90 
					 
					
						
						
							
							[Chore] astral's ty ( #18116 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-05-15 05:00:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65334ef3b9 
					 
					
						
						
							
							[V1][Metrics] Remove unused code ( #18158 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-05-14 20:13:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e60f550b38 
					 
					
						
						
							
							[v1] Support multiple KV cache groups in GPU model runner ( #17945 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-14 18:54:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f25e0d1125 
					 
					
						
						
							
							[Bugfix]: make most of test_openai_schema.py pass ( #17664 )  
						
						 
						
						
						
						
					 
					
						2025-05-14 17:04:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						09f106a91e 
					 
					
						
						
							
							Upload vllm index for the rc builds ( #18173 )  
						
						 
						
						
						
						
					 
					
						2025-05-14 16:35:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2142035b51 
					 
					
						
						
							
							[V1] Support multiple kv connectors ( #17564 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-14 16:28:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						78aa341d12 
					 
					
						
						
							
							[CI] Fix race condition in test_kv_cache_events test ( #18169 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-14 16:27:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7974736740 
					 
					
						
						
							
							Add support for loading torchao models with AOPerModuleConfig ( #17826 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jerry Zhang <jerryzh168@gmail.com > 
						
						
					 
					
						2025-05-14 16:24:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2fc9075b82 
					 
					
						
						
							
							[V1] Structured Outputs + Thinking compatibility ( #16577 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz >
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-14 15:45:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d93c976a0d 
					 
					
						
						
							
							[Kernel] Have rotary embeddings support tensors ( #18046 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-14 15:43:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						749f792553 
					 
					
						
						
							
							[Frontend] decrease import time of vllm.multimodal ( #18031 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Aaron Pham <Aaronpham0103@gmail.com > 
						
						
					 
					
						2025-05-14 15:43:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						856865008e 
					 
					
						
						
							
							[CI] Disable Failing Tests ( #18165 )  
						
						 
						
						
						
						
					 
					
						2025-05-14 13:49:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f9c069c85e 
					 
					
						
						
							
							Modularize fused experts and integrate PPLX kernels ( #15956 )  
						
						 
						
						
						
						
					 
					
						2025-05-14 13:11:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						418d2f8bfb 
					 
					
						
						
							
							[V1][Spec Decode] Share input embedding of target model with EAGLE draft model to free ~1GB for llama 3 model ( #17326 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: root <root@ekagra-8xh100.us-east5-a .c.serving-efficiency-poc.internal>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-05-14 12:31:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						964472b966 
					 
					
						
						
							
							[Doc] Update prefix cache metrics to counting tokens ( #18138 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-14 15:23:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						59dd311cf5 
					 
					
						
						
							
							[KVConnector] Keep KVTransferParams as a dict ( #18033 )  
						
						 
						
						
						
						
					 
					
						2025-05-14 08:05:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d066e52013 
					 
					
						
						
							
							[Bugfix] Fix chat utils tests ( #18139 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-14 05:38:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8ea982d9b 
					 
					
						
						
							
							Update deprecated type hinting in platform, plugins, triton_utils, vllm_flash_attn ( #18129 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-14 05:28:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc372b9c8a 
					 
					
						
						
							
							Update deprecated type hinting in vllm/device_allocator and vllm/distributed ( #18126 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-14 04:07:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9b5b39b650 
					 
					
						
						
							
							Update deprecated type hinting in vllm/lora ( #18128 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-14 03:57:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ccc6ded42 
					 
					
						
						
							
							[doc] add missing import ( #18133 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-14 10:57:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d62a076e84 
					 
					
						
						
							
							[Model] GritLM supports other attention backends ( #18109 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-14 03:33:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						259127f8b8 
					 
					
						
						
							
							[Bugfix] Fix LoRA test ( #18123 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-14 10:25:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						612c2edb4f 
					 
					
						
						
							
							[FEAT] [ROCm]: Add AITER CK 2 Stages MoE support ( #17110 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-14 03:03:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						38fe728d60 
					 
					
						
						
							
							[Bugfix] Fix QKVCrossParallelLinear::sync_weight_attrs for PyTorch compile ( #17844 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrzej Kotłowski <akotlowski@habana.ai > 
						
						
					 
					
						2025-05-14 09:39:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82e7f9bb03 
					 
					
						
						
							
							[Misc] replace does not exist model ( #18119 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-05-14 02:13:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63dc3426e0 
					 
					
						
						
							
							[Model] Add packed_modules_mapping for Qwen3-MOE ( #18118 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-14 02:13:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f5dc41481 
					 
					
						
						
							
							[Bugfix] Fix entrypoints audio test failure ( #18111 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-14 09:08:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63ad622233 
					 
					
						
						
							
							[New Model]: support GTE NewModel ( #17986 )  
						
						 
						
						
						
						
					 
					
						2025-05-14 01:31:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e7ef61c1f0 
					 
					
						
						
							
							[Bugfix][Example] make lmcache v0 work. ( #18051 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ma, Jianpeng <jianpeng.ma@intel.com > 
						
						
					 
					
						2025-05-13 23:43:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d4154c35a2 
					 
					
						
						
							
							[Bugfix] fix moe marlin topk_weight loading ( #18080 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-13 23:31:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6685890d11 
					 
					
						
						
							
							[Fix] Move "model_config" as keyword args in chat_utils.py ( #18098 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Linkun <github@lkchen.net > 
						
						
					 
					
						2025-05-13 23:27:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						33011318c2 
					 
					
						
						
							
							Fix broken example: examples/offline_inference/profiling at scheduler_config  ( #18117 )  
						
						 
						
						
						
						
					 
					
						2025-05-13 23:19:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f8b373225 
					 
					
						
						
							
							[BugFix][AMD] Compatible patch for AITER lib after 04/20 ( #17912 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiang Li <qiang.li2@amd.com > 
						
						
					 
					
						2025-05-13 23:05:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b2f28deba 
					 
					
						
						
							
							[AMD][torch.compile] Enable silu+fp8_quant fusion for rocm ( #18082 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-05-13 22:13:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2d912fb66f 
					 
					
						
						
							
							[FEAT] [ROCm] [V1]: Add AITER biased group topk for DeepSeekV3 ( #17955 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-05-13 22:03:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						12e6c0b41c 
					 
					
						
						
							
							[Bugfix][V1] Fix FlashInfer V1 backend using the wrong VllmConfig ( #18086 )  
						
						 
						
						
						
						
					 
					
						2025-05-13 20:36:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9a2a6357de 
					 
					
						
						
							
							[Bugfix] Fix FP8 Marlin MoE and enable for compressed-tensors models ( #18026 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-13 19:48:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6266c57bae 
					 
					
						
						
							
							[core][distributed] add ep group and all2all interface ( #18077 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-05-14 10:46:49 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						754b699cbe 
					 
					
						
						
							
							[Bug]: Fix S3 model/tokenizer path resolution ( #18083 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jon Gill <jon@yurts.ai > 
						
						
					 
					
						2025-05-13 19:34:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e27c6d86b 
					 
					
						
						
							
							[Misc] Remove unused numpy tensor ( #18084 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <hey@rogerw.me > 
						
						
					 
					
						2025-05-13 19:33:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d5af47a149 
					 
					
						
						
							
							[P/D] Add some more debug logs to NixlConnector ( #18102 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-13 19:33:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65f0f74b66 
					 
					
						
						
							
							[Hardware/NVIDIA/Modelopt] Fix modelopt forward method for v1 torch.compile ( #18101 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pavani Majety <pmajety@nvidia.com > 
						
						
					 
					
						2025-05-13 19:33:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						176a95c670 
					 
					
						
						
							
							[Fix] Support CUDAGraph capture for encoder-decoder on ROCm ( #18104 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedič <lgovedic@redhat.com > 
						
						
					 
					
						2025-05-13 19:31:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f2ae883b67 
					 
					
						
						
							
							[v1][KVCacheManager] pass num_new_computed_tokens to kv cache manager ( #18001 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-13 19:09:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						40de1ef455 
					 
					
						
						
							
							[FEAT] [ROCm]: Add AITER Block-Scaled GEMM Feature ( #14968 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-05-13 19:08:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0189a65a2e 
					 
					
						
						
							
							[Docs] Expand security doc with firewall info ( #18081 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-13 19:36:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55aa7af994 
					 
					
						
						
							
							[V1] DP scale-out (2/N): Decouple engine process management and comms ( #15977 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-13 10:48:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b217da646 
					 
					
						
						
							
							Update deprecated type hinting in vllm/adapter_commons ( #18073 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 08:32:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19324d660c 
					 
					
						
						
							
							Update deprecated type hinting in vllm/compilation ( #18072 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 08:32:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc407a1425 
					 
					
						
						
							
							Give auto-merge label workflow permission to add labels to issues ( #18078 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 07:53:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						009d9e7590 
					 
					
						
						
							
							Convert benchmarks to ruff format ( #18068 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 13:43:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b922c2ebd2 
					 
					
						
						
							
							[Bugfix] Fix entrypoints metrics tests ( #18063 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-13 06:42:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						00b14e0f16 
					 
					
						
						
							
							[CI] set token permissions for pre-commit CI job ( #17729 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-05-13 13:38:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54e467e6f8 
					 
					
						
						
							
							[CI] Add token permissions for add-ready-label CI job ( #17730 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-05-13 13:38:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						79a1d25bbd 
					 
					
						
						
							
							[CI] Add workflow permissions for helm CI job ( #17727 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-05-13 12:49:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9944011b30 
					 
					
						
						
							
							[CI] Set token permissions for reminder comment CI job ( #17728 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> 
						
						
					 
					
						2025-05-13 12:46:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c946cecca 
					 
					
						
						
							
							Update deprecated type hinting in vllm/transformers_utils ( #18058 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 04:34:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ff334ca1cd 
					 
					
						
						
							
							Update deprecated type hinting in vllm/profiler ( #18057 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 04:34:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6223dd8114 
					 
					
						
						
							
							Update deprecated type hinting in model_executor/layers ( #18056 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 04:17:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						906f0598fc 
					 
					
						
						
							
							[doc] add download/list/delete HF model CLI usage ( #17940 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-13 11:15:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb528d0585 
					 
					
						
						
							
							[Fix] check to make sure processor has chat templates ( #18047 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-05-13 03:04:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98fcba1575 
					 
					
						
						
							
							Convert .buildkite to ruff format ( #17656 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 09:28:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						23b3134eb5 
					 
					
						
						
							
							[Benchmarks] Refactor run_structured_output_benchmarks.sh ( #17722 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-13 01:47:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ea6ae8cb45 
					 
					
						
						
							
							[Bugfix] Fix marlin moe fallback logic for llama4 ( #18042 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-13 07:53:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ff297dce9 
					 
					
						
						
							
							[BugFix] Set default random seed to 0 for V1 ( #17929 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-05-13 07:52:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8dd0671bac 
					 
					
						
						
							
							[Bugfix][V1] Only get input embeddings w/ multi-modal models if first PP ( #17916 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jin Huang <jinhun@amazon.com >
Co-authored-by: Jin Huang <jinhun@amazon.com > 
						
						
					 
					
						2025-05-13 15:10:07 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f0d610a8ae 
					 
					
						
						
							
							[v1][KVCacheManager] Avoid full cache hit by controlling max_length ( #17999 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com >
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-05-13 06:50:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e57e4d6e9e 
					 
					
						
						
							
							Fix Broken macro for cutlass moe ( #18049 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: drisspg <drisspguessous@gmail.com > 
						
						
					 
					
						2025-05-12 23:31:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee5be834e7 
					 
					
						
						
							
							[BugFix] Fix 4-GPU RLHF tests ( #18007 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-12 23:03:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48545728d8 
					 
					
						
						
							
							cleanup invalid prints ( #18050 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: calvin chen <120380290@qq.com > 
						
						
					 
					
						2025-05-12 23:01:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc1a821768 
					 
					
						
						
							
							[Feature][V1]  Support tool_choice: required when using Xgrammar as the StructuredOutputBackend. ( #17845 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-05-12 23:01:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61e0a506a3 
					 
					
						
						
							
							[Bugfix] Avoid repeatedly creating dummy data during engine startup ( #17935 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-12 22:40:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1df491c522 
					 
					
						
						
							
							[Bugfix] Fixes for new marlin moe usage ( #18017 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-13 03:50:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d8487ef557 
					 
					
						
						
							
							[ROCm]: Fix build from source failure with gcc14 and ROCm 6.3 ( #13779 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Arjun Kathuria <arjun.kathuria8@gmail.com > 
						
						
					 
					
						2025-05-12 20:36:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c06af9a959 
					 
					
						
						
							
							[Misc] Slight spelling modification ( #18039 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-12 20:36:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						60f7624334 
					 
					
						
						
							
							Implements dual-chunk-flash-attn backend for dual chunk attention with sparse attention support ( #11844 )  
						
						 
						
						
						
						
					 
					
						2025-05-12 19:52:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f6518b2b48 
					 
					
						
						
							
							[ROCm] Skip tests for quantizations incompatible with ROCm ( #17905 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hissu Hyvarinen <hissu.hyvarinen@amd.com > 
						
						
					 
					
						2025-05-12 18:39:28 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d67085c2c8 
					 
					
						
						
							
							Remove noisy warnings from SchedulerConfig ( #17995 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-13 00:33:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						307939f299 
					 
					
						
						
							
							Use NVFP4 Marlin for CompressedTensorsW4A16Fp4 ( #18000 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Dipika <dipikasikka1@gmail.com >
Co-authored-by: Dipika <dipikasikka1@gmail.com > 
						
						
					 
					
						2025-05-12 18:07:34 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d7ea9dbbf 
					 
					
						
						
							
							Update some more deprecated type hinting ( #17998 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-12 23:49:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						acee8f48aa 
					 
					
						
						
							
							[Model] Support MiMo-7B inference with MTP ( #17433 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wp-alpha <wangpeng66@xiaomi.com >
Co-authored-by: wangpeng66 <wangpeng66@xiaomi.com > 
						
						
					 
					
						2025-05-12 23:25:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f065de4e88 
					 
					
						
						
							
							Fix FBGEMM integration ( #18002 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-12 23:02:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc9905368d 
					 
					
						
						
							
							[V1][Spec Decode] Eagle unit tests ( #17350 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wwl2755 <wangwenlong2755@gmail.com > 
						
						
					 
					
						2025-05-12 23:01:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebab1ac37c 
					 
					
						
						
							
							[CI] Make JSON output tests less likely to fail ( #17859 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-12 22:31:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b0db9b0e2 
					 
					
						
						
							
							Enable standard language model for torhc nightly ( #18004 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Wang <elainewy@meta.com > 
						
						
					 
					
						2025-05-12 14:00:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						195adb47c0 
					 
					
						
						
							
							[Chore] Remove unused method ( #18024 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com > 
						
						
					 
					
						2025-05-12 13:59:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						302f3aca7e 
					 
					
						
						
							
							[v1][KVCacheManager] Change prefix caching metric from counting blocks to counting tokens ( #18003 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-12 13:46:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e9c730c9bd 
					 
					
						
						
							
							Enabling "Weight Loading Multiple GPU Test - Large Models" ( #18020 )  
						
						 
						
						
						
						
					 
					
						2025-05-12 13:05:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						289199feb6 
					 
					
						
						
							
							[Core] Use platform-agnostic device control for DP engine core ( #17245 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com > 
						
						
					 
					
						2025-05-12 12:09:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9fd0d7a69 
					 
					
						
						
							
							[CI/Build] Fix TPU V1 Test mixed use of & and && across tests ( #17968 )  
						
						 
						
						
						
						
					 
					
						2025-05-12 12:06:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72a3f6b898 
					 
					
						
						
							
							Construct KVTransferConfig properly from Python instead of using JSON blobs without CLI ( #17994 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-12 11:25:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98ea35601c 
					 
					
						
						
							
							[Lora][Frontend]Add default local directory LoRA resolver plugin. ( #16855 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jberkhahn <jaberkha@us.ibm.com > 
						
						
					 
					
						2025-05-12 10:39:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d19110204c 
					 
					
						
						
							
							[P/D] NIXL Integration ( #17751 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ApostaC <yihua98@uchicago.edu >
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com >
Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com >
Signed-off-by: Robert Shaw <rshaw@neuralmagic.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Signed-off-by: Brent Salisbury <bsalisbu@redhat.com >
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com >
Co-authored-by: ApostaC <yihua98@uchicago.edu >
Co-authored-by: Robert Shaw <rshaw@neuralmagic.com >
Co-authored-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Tyler Michael Smith <tysmith@redhat.com >
Co-authored-by: Brent Salisbury <bsalisbu@redhat.com > 
						
						
					 
					
						2025-05-12 09:46:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05a4324f8e 
					 
					
						
						
							
							Initialize the delta tool call fields explicitly ( #17340 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Co-authored-by: igmainc <igmainc@icloud.com > 
						
						
					 
					
						2025-05-12 13:28:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ea6cb28b2 
					 
					
						
						
							
							[Misc] Improve modelscope  import error  ( #17983 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-12 10:46:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9fbf2bfbd5 
					 
					
						
						
							
							Correcting testcases in builkite job for IBM Power ( #17675 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaruni Aggarwal <aaruniagg@gmail.com > 
						
						
					 
					
						2025-05-12 08:11:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a5ea75129 
					 
					
						
						
							
							[Feature] Support DeepSeekV3 Function Call ( #17784 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 许文卿 <xwq391974@alibaba-inc.com >
Signed-off-by: Xu Wenqing <xuwq1993@qq.com > 
						
						
					 
					
						2025-05-12 00:45:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						891b9d33de 
					 
					
						
						
							
							[Fix] Benchmark "EngineClient" has no attribute "model_config" ( #17976 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca > 
						
						
					 
					
						2025-05-11 22:55:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						430783018c 
					 
					
						
						
							
							[Bugfix][TPU] Use np array when updating cache slot_mapping ( #17971 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com > 
						
						
					 
					
						2025-05-12 12:58:33 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19a3c78d1f 
					 
					
						
						
							
							[Bugfix] Fix pydantic.errors.PydanticUserError ( #17962 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangli <wangli858794774@gmail.com > 
						
						
					 
					
						2025-05-12 12:58:23 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ada50aa295 
					 
					
						
						
							
							[bugfix] fix the wrong parser ( #17958 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-12 04:58:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08bf784078 
					 
					
						
						
							
							[Bugfix] validate grammar and throw 400 error instead of crashing the engine when xgrammar validation fails ( #17623 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jason Cheng <jasoncky96@gmail.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-12 09:06:10 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d45fe333fb 
					 
					
						
						
							
							[misc] add instructions on how to install nvshmem/pplx/deepep ( #17964 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-05-11 18:02:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						021c16c7ca 
					 
					
						
						
							
							[Model] Broadcast Ovis2 implementation to fit Ovis1.6 ( #17861 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-11 17:56:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7de18d541b 
					 
					
						
						
							
							[BUG] [ROCm] [MLA] Fix variable name bug due to change in variable name in PR  #17483  ( #17961 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-05-11 09:14:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a810b5b088 
					 
					
						
						
							
							[BugFix] [ROCm]: Bugfix and handle addition case of input for rocm_aiter_rms_norm ( #17857 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-05-11 04:17:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						009b3d5382 
					 
					
						
						
							
							[Misc] not show --model in vllm serve --help ( #16691 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-11 08:47:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e4b8713380 
					 
					
						
						
							
							[New Model]: nomic-embed-text-v2-moe ( #17785 )  
						
						 
						
						
						
						
					 
					
						2025-05-11 00:59:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						06c0922a69 
					 
					
						
						
							
							[FP8][ROCm][Attention] Enable FP8 KV cache on ROCm for V1 ( #17870 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-11 15:58:45 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd3edfc908 
					 
					
						
						
							
							[Misc] Add compressed-tensors NVFP4A16 emulation support ( #17914 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com >
Signed-off-by: Dipika <dipikasikka1@gmail.com > 
						
						
					 
					
						2025-05-11 15:58:38 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9cea90eab4 
					 
					
						
						
							
							[Frontend] Add /classify endpoint ( #17032 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Frieda (Jingying) Huang <jingyingfhuang@gmail.com > 
						
						
					 
					
						2025-05-11 07:57:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1110f5b5a 
					 
					
						
						
							
							[doc] update lora doc ( #17936 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-11 15:56:21 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8132365b74 
					 
					
						
						
							
							[Bugfix]: v1 engine - consider lora adapters in allowed_token_ids ( #17855 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ben Browning <bbrownin@redhat.com > 
						
						
					 
					
						2025-05-11 00:53:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eea22a56ab 
					 
					
						
						
							
							fix amd triton mla path ( #17871 )  
						
						 
						
						
						
						
					 
					
						2025-05-11 07:53:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9112155283 
					 
					
						
						
							
							[Perf] Use small max_num_batched_tokens for A100 ( #17885 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: KuntaiDu <kuntai@uchicago.edu > 
						
						
					 
					
						2025-05-11 07:53:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90d0a74b60 
					 
					
						
						
							
							[Bugfix] Add revision to transformers.Auto*.from_pretrained processors ( #17948 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xin Li <xin@centml.ai > 
						
						
					 
					
						2025-05-11 07:52:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d74e5f37bc 
					 
					
						
						
							
							[Kernel] fp4 marlin kernel ( #17687 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com > 
						
						
					 
					
						2025-05-10 19:58:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca66a1674c 
					 
					
						
						
							
							[v1] Rename specialized_manager.py to single_type_kv_cache_manager.py ( #17946 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-10 16:14:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						950751a987 
					 
					
						
						
							
							[v1] Pass BlockTable and KVCacheSpec to AttentionMetadataBuilders ( #17483 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-10 16:12:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c31218f80 
					 
					
						
						
							
							[Misc] remove --model from vllm serve usage ( #17944 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-10 13:23:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68311891f5 
					 
					
						
						
							
							Don't default construct ModelConfig when default constructing VllmConfig ( #17943 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-10 13:23:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc4441a4ee 
					 
					
						
						
							
							Add missing content type headers to /ping and /health ( #17036 ) ( #17786 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ximo Guanter <ximo.guanter@gmail.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-10 07:13:32 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						246e3e0a36 
					 
					
						
						
							
							fix broken test vllm:test_kernels - test_attention_selector.py::test_flash_attn ( #17873 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: Stephen Chen <tracelog@meta.com > 
						
						
					 
					
						2025-05-10 10:46:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7042cc96b0 
					 
					
						
						
							
							[V1][Spec Decoding] Log accumulated metrics after system goes idle ( #17913 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-05-09 18:23:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c0fdae84f 
					 
					
						
						
							
							[Hardware/NVIDIA/Kernel] Enable nvidia/DeepSeek-R1-FP4 Model ( #16362 )  
						
						 
						
						
						
						
					 
					
						2025-05-09 16:24:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3b602cdea7 
					 
					
						
						
							
							AMD conditional all test execution // new test groups ( #17556 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com >
Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu > 
						
						
					 
					
						2025-05-09 15:35:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b2ed7926a 
					 
					
						
						
							
							Improve configs - the rest! ( #17562 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-09 15:18:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e3571134f 
					 
					
						
						
							
							[V1][Spec Decoding] Include bonus tokens in mean acceptance length ( #17908 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-05-09 13:32:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ea2236bf95 
					 
					
						
						
							
							Add option to use torch._inductor.standalone_compile ( #17057 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-05-09 12:59:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7d4aedae7c 
					 
					
						
						
							
							Handle error when str passed to /v1/audio/transcriptions ( #17909 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-09 19:23:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22481fbfa3 
					 
					
						
						
							
							Update CT WNA16MarlinMoE integration ( #16666 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-09 13:19:45 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c4c08f6f1 
					 
					
						
						
							
							[Misc] Auto fallback to float16 for pre-Ampere GPUs when detected bfloat16 config ( #17265 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-09 17:16:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c44c384b1c 
					 
					
						
						
							
							[Misc] Add references in ray_serve_deepseek example ( #17907 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-05-09 16:59:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						85b72cb7b1 
					 
					
						
						
							
							Revert "[BugFix][AMD] Compatible patch for latest AITER(05/07/2025)" ( #17910 )  
						
						 
						
						
						
						
					 
					
						2025-05-09 08:58:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e5595ca39 
					 
					
						
						
							
							[CI/Build] Automatically retry flaky tests ( #17856 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-09 09:55:17 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						200da9a517 
					 
					
						
						
							
							[v1] Move block management logic from KVCacheManager to SpecializedManager ( #17474 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-09 15:25:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9f64e93415 
					 
					
						
						
							
							[BugFix][AMD] Compatible patch for latest AITER(05/07/2025) ( #17864 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiang Li <qiang.li2@amd.com > 
						
						
					 
					
						2025-05-09 08:59:36 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec61ea20a8 
					 
					
						
						
							
							[Misc] add dify integration ( #17895 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-09 03:42:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c6798baa9c 
					 
					
						
						
							
							Change top_k to be disabled with 0 (still accept -1 for now) ( #17773 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-09 10:01:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b2dcbf0b8 
					 
					
						
						
							
							Fix Whisper crash caused by invalid`` max_num_batched_tokens`` config ( #17853 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: inkcherry <mingzhi.liu@intel.com > 
						
						
					 
					
						2025-05-09 09:16:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e4a93e3f7 
					 
					
						
						
							
							[Bugfix][CPU] Fix broken AVX2 CPU TP support ( #17252 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-09 08:55:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						217db4baa6 
					 
					
						
						
							
							[Bugfix][ROCm] Fix AITER MLA V1 ( #17880 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-05-09 08:38:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ff8c400502 
					 
					
						
						
							
							[Doc] remove visible token in doc ( #17884 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yan <yanma1@habana.ai > 
						
						
					 
					
						2025-05-09 01:21:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						89a0315f4c 
					 
					
						
						
							
							[Doc] Update several links in reasoning_outputs.md ( #17846 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-05-09 01:20:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d1e387652 
					 
					
						
						
							
							[Docs] Add Slides from NYC Meetup ( #17879 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-05-08 21:46:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d310e6de98 
					 
					
						
						
							
							[BUGFIX]: return fast when request requires prompt logprobs ( #17251 )  
						
						 
						
						
						
						
					 
					
						2025-05-08 21:25:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e6f939484 
					 
					
						
						
							
							[Attention] MLA move rotary embedding to cuda-graph region ( #17668 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-09 11:14:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						760e3ecc8f 
					 
					
						
						
							
							[V1][Structured Output] Update llguidance (>= 0.7.11) to avoid AttributeError (no StructTag)  ( #17839 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shen-shanshan <467638484@qq.com > 
						
						
					 
					
						2025-05-08 20:14:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c9396a64f 
					 
					
						
						
							
							[FEAT][ROCm]: Support AITER MLA on V1 Engine ( #17523 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: qli88 <qiang.li2@amd.com >
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com > 
						
						
					 
					
						2025-05-09 10:42:05 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						376786fac1 
					 
					
						
						
							
							Add cutlass support for blackwell fp8 blockwise gemm ( #14383 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shu Wang <shuw@nvidia.com > 
						
						
					 
					
						2025-05-08 15:09:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4f605a6de5 
					 
					
						
						
							
							Fix noisy warning for uncalibrated q_scale/p_scale ( #17414 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-08 15:56:59 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8342e3abd1 
					 
					
						
						
							
							[CI] Prune down lm-eval small tests ( #17012 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-08 19:00:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a83a0f92b5 
					 
					
						
						
							
							[Test] Attempt all TPU V1 tests, even if some of them fail. ( #17334 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yarong Mu <ymu@google.com > 
						
						
					 
					
						2025-05-08 17:20:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						226a4272cf 
					 
					
						
						
							
							[V1] Improve VLLM_ALLOW_INSECURE_SERIALIZATION logging ( #17860 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-08 16:57:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec54d73c31 
					 
					
						
						
							
							[CI] Fix test_collective_rpc ( #17858 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-08 16:47:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a944f8ede7 
					 
					
						
						
							
							[Misc] Delete LoRA-related redundancy code ( #17841 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-08 06:02:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						015815fe01 
					 
					
						
						
							
							[Bugfix] use_fast failing to be propagated to Qwen2-VL image processor ( #17838 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-08 05:39:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e4ca6e3a99 
					 
					
						
						
							
							Fix transient dependency error in docs build ( #17848 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-08 03:42:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53d0cb7423 
					 
					
						
						
							
							[Misc] add chatbox integration ( #17828 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-08 10:05:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f50dcb7c21 
					 
					
						
						
							
							[Easy] Eliminate c10::optional usage in vllm/csrc ( #17819 )  
						
						 
						
						
						
						
					 
					
						2025-05-08 03:05:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a1e19b635d 
					 
					
						
						
							
							[Doc] Fix a typo in the file name ( #17836 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-08 18:04:18 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bb239a730f 
					 
					
						
						
							
							[Bugfix] Fix quark fp8 format loading on AMD GPUs ( #12612 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Felix Marty <felmarty@amd.com >
Signed-off-by: kewang2 <kewang2@amd.com >
Co-authored-by: kewang2 <kewang2@amd.com > 
						
						
					 
					
						2025-05-08 02:53:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a463555dee 
					 
					
						
						
							
							[TPU] Fix the test_sampler ( #17820 )  
						
						 
						
						
						
						
					 
					
						2025-05-08 05:51:33 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ca04b97c93 
					 
					
						
						
							
							[Bugfix] Fix tool call template validation for Mistral models ( #17644 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rick Yuan <yuan821120@gmail.com >
Signed-off-by: RIck Yuan <yuan821120@gmail.com >
Co-authored-by: Aaron Pham <Aaronpham0103@gmail.com > 
						
						
					 
					
						2025-05-08 09:47:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0a9bbaa104 
					 
					
						
						
							
							[Misc] support model prefix & add deepseek vl2 tiny fused moe config ( #17763 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 唯勤 <xsank.mz@alibaba-inc.com >
Co-authored-by: 唯勤 <xsank.mz@alibaba-inc.com > 
						
						
					 
					
						2025-05-08 07:50:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						39956efb3f 
					 
					
						
						
							
							[Bugfix] Fix bad words for Mistral models ( #17753 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Qiong Zhou Huang <qiong@phonic.co > 
						
						
					 
					
						2025-05-07 23:32:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						597051e56f 
					 
					
						
						
							
							[Qwen3]add qwen3-235b-bf16 fused moe config on A100 ( #17715 )  
						
						 
						
						
						
						
					 
					
						2025-05-07 23:09:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						96722aa81d 
					 
					
						
						
							
							[Frontend] Chat template fallbacks for multimodal models ( #17805 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-07 23:05:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						843b222723 
					 
					
						
						
							
							[Hardware][Intel-Gaudi] Support Automatic Prefix Caching on HPU ( #17648 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai > 
						
						
					 
					
						2025-05-07 22:37:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e515668edf 
					 
					
						
						
							
							[Hardware][Power] Enable compressed tensor W8A8 INT8 quantization for POWER ( #17153 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Akash Kaothalkar <akash.kaothalkar@ibm.com >
Co-authored-by: Akash Kaothalkar <akash.kaothalkar@ibm.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-07 22:35:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a499e70d5 
					 
					
						
						
							
							[Kernel][Hardware][AMD] Bf16 mfma opt for ROCm skinny GEMMs ( #17071 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hashem Hashemi <hashem.hashemi@amd.com >
Signed-off-by: charlifu <charlifu@amd.com >
Co-authored-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-05-07 22:34:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6930a41116 
					 
					
						
						
							
							[V1] Add VLLM_ALLOW_INSECURE_SERIALIZATION env var ( #17490 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-08 13:34:02 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						998eea4a0e 
					 
					
						
						
							
							Only log non-default CLI args for online serving ( #17803 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-07 22:33:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c747d84576 
					 
					
						
						
							
							[Installation] OpenTelemetry version update ( #17771 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mikhail Podvitskii <podvitskiymichael@gmail.com > 
						
						
					 
					
						2025-05-07 22:32:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2da14a05a 
					 
					
						
						
							
							Improve exception reporting in MP engine ( #17800 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Vadim Markovtsev <vadim@poolside.ai > 
						
						
					 
					
						2025-05-08 05:32:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ea2adb802 
					 
					
						
						
							
							[Core] Support full cuda graph in v1 ( #16072 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chanh Nguyen <cnguyen@linkedin.com >
Co-authored-by: Chanh Nguyen <cnguyen@linkedin.com > 
						
						
					 
					
						2025-05-07 22:30:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d13ca0e24 
					 
					
						
						
							
							[BugFix] Fix --disable-log-stats in V1 server mode ( #17600 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-08 04:08:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						66ab3b13c9 
					 
					
						
						
							
							Don't call the venv vllm ( #17810 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-08 04:06:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a8238bbdb0 
					 
					
						
						
							
							[Chore][Doc] uses model id determined from OpenAI client ( #17815 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-05-08 01:48:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d43f914d42 
					 
					
						
						
							
							[Core][Feature] Input metadata dump on crash ( #13407 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Wallas Santos <wallashss@ibm.com > 
						
						
					 
					
						2025-05-07 22:15:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed5272cf21 
					 
					
						
						
							
							[BugFix] Avoid secondary missing MultiprocExecutor.workers error ( #17811 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-07 21:55:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c20ef40fd0 
					 
					
						
						
							
							[Hardware][TPU][V1] Multi-LoRA implementation for the V1 TPU backend ( #14238 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Akshat Tripathi <akshat@krai.ai >
Signed-off-by: Chengji Yao <chengjiyao@google.com >
Co-authored-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-05-07 16:28:47 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						db593aa67f 
					 
					
						
						
							
							[Quantization] Quark MXFP4 format loading  ( #16943 )  
						
						 
						
						
						
						
					 
					
						2025-05-07 15:05:05 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f98e307588 
					 
					
						
						
							
							[Bugfix] Fix missing lora name mapping for lora without prefix ( #17793 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-07 16:17:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						646a31e51e 
					 
					
						
						
							
							Fix and simplify deprecated=True CLI kwarg ( #17781 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-07 16:51:06 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						be8ff88e66 
					 
					
						
						
							
							[Bugfix] Fix Video IO error for short video ( #17791 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-07 15:36:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1a6af1453d 
					 
					
						
						
							
							Only depend on importlib-metadata for Python < 3.10 ( #17776 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Christian Heimes <christian@python.org > 
						
						
					 
					
						2025-05-07 07:51:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32aa74c09c 
					 
					
						
						
							
							[ROCm][FP8][Kernel] FP8 quantization fused into Custom Paged Attention ( #17139 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-07 07:12:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7377dd0307 
					 
					
						
						
							
							[doc] update the issue link ( #17782 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-07 20:29:05 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98c89e16ff 
					 
					
						
						
							
							Make key optional for rotary embedding ( #17566 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-05-07 00:11:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						324a3119b0 
					 
					
						
						
							
							Fix test_memory_usage_no_spec ( #17754 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yong Hoon Shin <yhshin@meta.com > 
						
						
					 
					
						2025-05-07 00:10:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a15c2603a 
					 
					
						
						
							
							[Frontend] Add missing chat templates for various MLLMs ( #17758 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-07 00:10:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						043e4c4955 
					 
					
						
						
							
							Add NeuronxDistributedInference support, Speculative Decoding, Dynamic on-device sampling ( #16357 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Satyajith Chilappagari <satchill@amazon.com >
Co-authored-by: Aaron Dou <yzdou@amazon.com >
Co-authored-by: Shashwat Srijan <sssrijan@amazon.com >
Co-authored-by: Chongming Ni <chongmni@amazon.com >
Co-authored-by: Amulya Ballakur <amulyaab@amazon.com >
Co-authored-by: Patrick Lange <patlange@amazon.com >
Co-authored-by: Elaine Zhao <elaineyz@amazon.com >
Co-authored-by: Lin Lin Pan <tailinpa@amazon.com >
Co-authored-by: Navyadhara Gogineni <navyadha@amazon.com >
Co-authored-by: Yishan McNabb <yishanm@amazon.com >
Co-authored-by: Mrinal Shukla <181322398+mrinalks@users.noreply.github.com > 
						
						
					 
					
						2025-05-07 00:07:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba7703e659 
					 
					
						
						
							
							[Misc] Remove  qlora_adapter_name_or_path ( #17699 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-06 23:10:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f80ae5bdcf 
					 
					
						
						
							
							[Kernel] Use fused rmsnorm for some models like qwen3 series ( #17735 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: evian <eviantai@u.nus.edu >
Co-authored-by: evian <eviantai@u.nus.edu > 
						
						
					 
					
						2025-05-06 23:10:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1a45a61387 
					 
					
						
						
							
							[Kernel] GGUF MoeVec kernel ( #16780 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: SzymonOzog <szymon.ozog@aleph-alpha.com >
Signed-off-by: SzymonOzog <szymon.ozog@gmail.com >
Signed-off-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-06 23:07:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3e9d5060e 
					 
					
						
						
							
							[Misc] Use apply_rotary_emb from vllm_flash_attn for Qwen2-VL vision RoPE ( #17726 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-07 04:51:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						822de7fb94 
					 
					
						
						
							
							[Misc] Split model loader ( #17712 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-05-07 12:42:26 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d84d836d1 
					 
					
						
						
							
							[BugFix][Spec Decode] Fix hidden size mismatch between target and eagle head ( #17740 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-05-06 19:51:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						950b71186f 
					 
					
						
						
							
							Replace lm-eval bash script with pytest and use enforce_eager for faster CI ( #17717 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-06 18:00:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e50a1f1a9c 
					 
					
						
						
							
							[TPU] Add kernel test for moe_pallas ( #17496 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-06 17:59:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a17cef70ea 
					 
					
						
						
							
							Removed unused marlin cuda code ( #17684 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-06 17:59:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						18dd5e01f2 
					 
					
						
						
							
							[Model] Mamba2 causal conv1d Refactor to Split Prefill and Decode Requests for Corresponding Kernels ( #17146 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com > 
						
						
					 
					
						2025-05-06 17:59:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6de3e13413 
					 
					
						
						
							
							Add logging for torch nightly version ( #17669 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Wang <elainewy@meta.com > 
						
						
					 
					
						2025-05-07 00:45:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed3a1d2106 
					 
					
						
						
							
							[ROCm] fix num_stages for default moe config to avoid triton OutOfResource error ( #17744 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hongxia Yang <hongxia.yang@amd.com > 
						
						
					 
					
						2025-05-07 00:39:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						022afbeb4e 
					 
					
						
						
							
							Fix doc build performance ( #17748 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-07 00:36:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f925e5777 
					 
					
						
						
							
							[Kernel] Unified Triton kernel that doesn't distinguish between prefill + decode ( #16828 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com >
Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com >
Co-authored-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-06 18:21:48 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de906b95f9 
					 
					
						
						
							
							[Bugfix] Fix for the condition to accept empty encoder inputs for mllama ( #17732 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-06 19:59:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d456aea71f 
					 
					
						
						
							
							[Misc] Add Next Edit Prediction (NEP) datasets support in benchmark_serving.py ( #16839 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: dtransposed <damian@damian-ml-machine.europe-west3-b .c.jetbrains-grazie.internal>
Signed-off-by: dtransposed <>
Co-authored-by: dtransposed <damian@damian-ml-machine.europe-west3-b .c.jetbrains-grazie.internal> 
						
						
					 
					
						2025-05-06 15:38:45 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						621ca2c0ab 
					 
					
						
						
							
							[TPU] Increase block size and reset block shapes ( #16458 )  
						
						 
						
						
						
						
					 
					
						2025-05-06 13:55:04 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6115b11582 
					 
					
						
						
							
							Make right sidebar more readable in "Supported Models" ( #17723 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-06 16:48:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b8c390747 
					 
					
						
						
							
							[Bugfix] Fix modality limits in vision language example ( #17721 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-06 16:12:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7525d5f3d5 
					 
					
						
						
							
							[doc] Add RAG Integration example ( #17692 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-06 16:10:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aabcd2cae3 
					 
					
						
						
							
							[v1] Introduce KVCacheBlocks as interface between Scheduler and KVCacheManager ( #17479 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-06 08:50:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d115460a7 
					 
					
						
						
							
							[Docs] Use gh-file to add links to tool_calling.md ( #17709 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-05-06 15:27:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						175bda67a1 
					 
					
						
						
							
							[Feat] Add deprecated=True to CLI args ( #17426 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-05-06 08:11:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cba31c47c4 
					 
					
						
						
							
							[v1] AttentionMetadata for each layer ( #17394 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-05-06 07:58:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6fed02068 
					 
					
						
						
							
							[V1][PP] Support PP for MultiprocExecutor ( #14219 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang1.li <jiang1.li@intel.com >
Signed-off-by: jiang.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-05-06 07:58:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d419aa5dc4 
					 
					
						
						
							
							[V1] Enable TPU V1 backend by default ( #17673 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-06 06:49:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f9bc5a0693 
					 
					
						
						
							
							[Bugfix] Fix triton import with local TritonPlaceholder ( #17446 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mengqing Cao <cmq0113@163.com > 
						
						
					 
					
						2025-05-06 17:53:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05e1f96419 
					 
					
						
						
							
							Fix dockerfilegraph pre-commit hook ( #17698 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-06 08:56:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6eae34533a 
					 
					
						
						
							
							[Misc] Fix ScalarType float4 naming  ( #17690 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-06 01:07:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63ced7b43f 
					 
					
						
						
							
							[Doc] Update notes for H2O-VL and Gemma3 ( #17219 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-06 07:51:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc47ba32f8 
					 
					
						
						
							
							[Bugfix] Fixed prompt length for random dataset ( #17408 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mikhail Podvitskii <podvitskiymichael@gmail.com > 
						
						
					 
					
						2025-05-06 07:00:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						edbf2d609e 
					 
					
						
						
							
							[easy] Fix logspam on PiecewiseBackend errors ( #17138 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-05-05 23:46:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						999328be0d 
					 
					
						
						
							
							[Model] Add GraniteMoeHybrid 4.0 model ( #17497 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas Ortner <boh@zurich.ibm.com >
Signed-off-by: Stanislaw Wozniak <stw@zurich.ibm.com >
Co-authored-by: Thomas Ortner <boh@zurich.ibm.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Tyler Michael Smith <tysmith@redhat.com > 
						
						
					 
					
						2025-05-06 12:00:31 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98834fefaa 
					 
					
						
						
							
							Update nm to rht in doc links + refine fp8 doc ( #17678 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-06 00:41:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90bd2ae172 
					 
					
						
						
							
							[Bugfix] LoRA - Retire unused maxnreg LoRA kernel argument ( #17677 )  
						
						 
						
						
						
						
					 
					
						2025-05-05 17:34:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5941e0b7ea 
					 
					
						
						
							
							[TPU][V1] Add support for top-logprobs ( #17072 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-05-05 14:20:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9765940824 
					 
					
						
						
							
							[TPU] Enable gemma3-27b with TP>1 on multi-chips. ( #17335 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com > 
						
						
					 
					
						2025-05-05 14:19:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5ea5c514da 
					 
					
						
						
							
							[BugFix] Increase timeout for startup failure test ( #17642 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-05-05 20:53:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3efde8176 
					 
					
						
						
							
							[Benchmarks] Remove invalid option under V1 engine ( #17651 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-05 16:30:22 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aea302be6c 
					 
					
						
						
							
							Use git-path commit in hook ( #17616 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Thomas J. Fan <thomasjpfan@gmail.com > 
						
						
					 
					
						2025-05-05 17:55:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc05b90d86 
					 
					
						
						
							
							[Doc] Fix broken cuda installation doc rendering ( #17654 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-05 17:52:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1d0c9d6b2d 
					 
					
						
						
							
							[Kernel] some optimizations for dense marlin and moe marlin ( #16850 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com > 
						
						
					 
					
						2025-05-05 09:39:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f62cad6431 
					 
					
						
						
							
							[Build/CI] Upgrade CUTLASS to 3.9.2 ( #17641 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-05-04 19:23:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5394ad7387 
					 
					
						
						
							
							[Bugfix] fix KeyError on top logprobs are special tokens ( #17637 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-05-04 19:22:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68e1ee0072 
					 
					
						
						
							
							[Bugfix][Easy] Fix whitespace in shm_broadcast.py logging ( #17635 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-05-04 19:20:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2858830c39 
					 
					
						
						
							
							[Bugfix] Prioritize dtype in root config before checking text config ( #17629 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-04 12:43:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6484ef3c3 
					 
					
						
						
							
							Add full API docs and improve the UX of navigating them ( #17485 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-03 19:42:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						46fae69cf0 
					 
					
						
						
							
							[Misc] V0 fallback for --enable-prompt-embeds ( #17615 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-03 22:59:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f66f1e0fa3 
					 
					
						
						
							
							[Bugfix] Fix broken Qwen2.5-omni tests ( #17613 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-03 17:08:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						887d7af882 
					 
					
						
						
							
							[Core] Gate prompt_embeds behind a feature flag ( #17607 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-04 00:19:20 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a92842454c 
					 
					
						
						
							
							[Bugfix][ROCm] Using device_type because on ROCm the API is still torch.cuda ( #17601 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-02 22:25:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8386fa61d 
					 
					
						
						
							
							[Build/CI] Upgrade CUTLASS to 3.9.1 ( #17602 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-05-02 22:25:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87baebebd8 
					 
					
						
						
							
							[Frontend][TPU] Add TPU default max-num-batched-tokens based on device name  ( #17508 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-05-02 21:42:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e3d0a1d190 
					 
					
						
						
							
							[Quantizaton] [AMD] Add support for running DeepSeek int8 w8a8 MoE on ROCm ( #17558 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-05-02 21:41:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d47b605eca 
					 
					
						
						
							
							Update test requirements to CUDA 12.8 ( #17576 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com > 
						
						
					 
					
						2025-05-02 21:40:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						22c6f6397f 
					 
					
						
						
							
							[Neuron][Build] Require setuptools >= 77.0.3 for PEP 639 ( #17603 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Liangfu Chen <liangfc@amazon.com > 
						
						
					 
					
						2025-05-03 02:41:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ec97e2cc5 
					 
					
						
						
							
							[release] Add command to clean up Docker containers/images in TPU release machine ( #17606 )  
						
						 
						
						
						
						
					 
					
						2025-05-02 18:54:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9b103a1d76 
					 
					
						
						
							
							fix typo in logging ( #17605 )  
						
						 
						
						
						
						
					 
					
						2025-05-02 18:04:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b90b0852e9 
					 
					
						
						
							
							[easy] Print number of needed GPUs in skip message ( #17594 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-05-02 15:27:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9352cdb56d 
					 
					
						
						
							
							[Hardware][AMD] Improve OAM device ID + llama4 Maverick MOE tuning ( #16263 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com >
Co-authored-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-05-02 19:44:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						182f40ea8b 
					 
					
						
						
							
							Add NVIDIA TensorRT Model Optimizer in vLLM documentation ( #17561 )  
						
						 
						
						
						
						
					 
					
						2025-05-02 11:36:46 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e887d2e0c 
					 
					
						
						
							
							permute/unpermute kernel for moe optimization ( #14568 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Caleb_Du <Caleb_Du@zju.edu.cn > 
						
						
					 
					
						2025-05-02 11:31:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0f87d8f7b2 
					 
					
						
						
							
							[BugFix][Attention] Fix sliding window attention in V1 giving incorrect results ( #17574 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-02 11:01:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c33d67321 
					 
					
						
						
							
							[Bugfix] fix tmp_out and exp_sums dimensions ( #17438 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hui Liu <96135754+hliuca@users.noreply.github.com > 
						
						
					 
					
						2025-05-02 16:44:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb234955df 
					 
					
						
						
							
							[Misc] Clean up input processing ( #17582 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-02 08:11:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a500cd0b6 
					 
					
						
						
							
							[doc] miss result ( #17589 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-02 07:04:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						868c546da4 
					 
					
						
						
							
							Support W8A8 INT8 MoE for compressed-tensors ( #16745 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-02 10:03:32 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						99404f53c7 
					 
					
						
						
							
							[Security] Fix image hash collision ( #17378 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-02 08:36:39 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						785d75a03b 
					 
					
						
						
							
							Automatically tell users that dict args must be valid JSON in CLI ( #17577 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-02 05:24:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d1479ca4b 
					 
					
						
						
							
							[doc] add the print result ( #17584 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-02 05:24:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b8b0859b5c 
					 
					
						
						
							
							add more pytorch related tests for torch nightly ( #17422 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Wang <elainewy@meta.com > 
						
						
					 
					
						2025-05-02 03:29:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d7543862bd 
					 
					
						
						
							
							[Misc] Rename assets for testing ( #17575 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-02 03:29:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c777df79f7 
					 
					
						
						
							
							[BugFix] Fix Memory Leak ( #17567 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com > 
						
						
					 
					
						2025-05-02 01:07:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc2a77d7f1 
					 
					
						
						
							
							[Core] [Bugfix] Add Input Embeddings ( #15428 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andrew Sansom <andrew@protopia.ai >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: 临景 <linjing.yx@alibaba-inc.com >
Co-authored-by: Bryce1010 <bryceyx@gmail.com >
Co-authored-by: Nan2018 <nan@protopia.ai >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-02 01:06:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e2de9b9e9 
					 
					
						
						
							
							[Bugifx] Remove TritonPlaceholder from sys.modules ( #17317 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-02 00:45:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						109e15a335 
					 
					
						
						
							
							Add pt_load_map_location to allow loading to cuda ( #16869 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jerry Zhang <jerryzh168@gmail.com > 
						
						
					 
					
						2025-05-01 23:23:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f192ca90e6 
					 
					
						
						
							
							Fix PixtralHF missing spatial_merge_size ( #17571 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-01 22:14:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f89d0e11bf 
					 
					
						
						
							
							[Misc] Continue refactoring model tests ( #17573 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-01 22:06:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4003d11fc 
					 
					
						
						
							
							Check if bitblas is installed during support check ( #17572 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-02 04:32:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						292fc59d61 
					 
					
						
						
							
							[CI] Actually run tests/kv_transfer/test_disagg.py in CI ( #17555 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-02 04:05:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						afcb3f8863 
					 
					
						
						
							
							[Attention] MLA move o_proj q_proj into cuda-graph region ( #17484 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-02 03:16:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						afb12e4294 
					 
					
						
						
							
							[Doc] note that not all unit tests pass on CPU platforms ( #17554 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-05-02 02:57:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24aebae177 
					 
					
						
						
							
							[Bugfix] Disable gptq_bitblas for <SM80 to fix GPTQ on V100/T4 ( #17541 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-05-01 17:59:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						39c0813a7f 
					 
					
						
						
							
							[V1][Spec Decode] Apply torch.compile & cudagraph to EAGLE3 ( #17504 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qizixi <qizixi@meta.com > 
						
						
					 
					
						2025-05-01 16:19:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9b70e2b4c1 
					 
					
						
						
							
							[Misc][Tools][Benchmark] Publish script to auto tune server parameters ( #17207 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-05-01 19:53:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						173daac19d 
					 
					
						
						
							
							[Bug]change the position of cuda_graph_sizes in dataclasses ( #17548 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: CXIAAAAA <cxia0209@gmail.com > 
						
						
					 
					
						2025-05-01 11:52:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04f2cfc894 
					 
					
						
						
							
							Remove duplicate code from dbrx.py ( #17550 )  
						
						 
						
						
						
						
					 
					
						2025-05-01 11:51:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						811a6c0972 
					 
					
						
						
							
							[ROCM] Add gfx950 to the custom attention archs ( #16034 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jpvillam <Juan.Villamizar@amd.com >
Signed-off-by: seungrokjung <seungrok.jung@amd.com >
Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com >
Co-authored-by: seungrokjung <seungrok.jung@amd.com >
Co-authored-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-05-01 11:18:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9b1769dd9a 
					 
					
						
						
							
							[Bugfix] Fix lint error ( #17547 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-01 11:12:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61c299f81f 
					 
					
						
						
							
							[Misc]add configurable cuda graph size ( #17201 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: CXIAAAAA <cxia0209@gmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-01 11:04:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4acfa3354a 
					 
					
						
						
							
							[ROCm] update installation guide to include build aiter from source instructions ( #17542 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hongxia Yang <hongxia.yang@amd.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-05-01 11:01:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88c8304104 
					 
					
						
						
							
							[Model] Refactor Ovis2 to support original tokenizer ( #17537 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-05-01 11:00:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6768ff4a22 
					 
					
						
						
							
							Move the last arguments in arg_utils.py to be in their final groups ( #17531 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-01 10:31:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f2e7af9b86 
					 
					
						
						
							
							[CI/Build] Remove awscli dependency ( #17532 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-01 09:20:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7423cf0a9b 
					 
					
						
						
							
							[Misc] refactor example - cpu_offload_lmcache ( #17460 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-01 15:05:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						460a2b1100 
					 
					
						
						
							
							[torch.compile] Add torch inductor pass for fusing silu_and_mul with subsequent scaled_fp8_quant operations ( #10867 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sage Moore <sage@neuralmagic.com > 
						
						
					 
					
						2025-05-01 07:59:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						28566d73b3 
					 
					
						
						
							
							[ROCm] remove unsupported archs from rocm triton flash-attention supported list ( #17536 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hongxia Yang <hongxia.yang@amd.com > 
						
						
					 
					
						2025-05-01 07:54:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98060b001d 
					 
					
						
						
							
							[Feature][Frontend]: Deprecate --enable-reasoning ( #17452 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-05-01 06:46:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f5a3c655b2 
					 
					
						
						
							
							[FEAT] [ROCm]: Add Qwen/Qwen3-235B-A22B-FP8 TP4 triton fused moe config ( #17535 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-05-01 06:37:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7169f87ad0 
					 
					
						
						
							
							[doc] add streamlit integration ( #17522 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-05-01 13:34:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b74d888c63 
					 
					
						
						
							
							Fix more broken speculative decode tests ( #17450 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-05-01 06:05:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2007d4d54f 
					 
					
						
						
							
							[FEAT] [ROCm]: Add Qwen/Qwen3-30B-A3B-FP8 fused moe config for MI300X ( #17530 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-05-01 06:03:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48e925fab5 
					 
					
						
						
							
							[Misc] Clean up test docstrings and names ( #17521 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-01 05:19:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1903c0b8a3 
					 
					
						
						
							
							[Frontend] Show progress bar for adding requests ( #17525 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-05-01 05:15:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86a1f67a3b 
					 
					
						
						
							
							[Bugfix][Benchmarks] Allow benchmark of deepspeed-mii backend to select a model ( #17285 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Teruaki Ishizaki <teruaki.ishizaki@ntt.com > 
						
						
					 
					
						2025-05-01 11:54:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a257d9bccc 
					 
					
						
						
							
							Improve configs - ObservabilityConfig ( #17453 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-05-01 03:52:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						015069b017 
					 
					
						
						
							
							[Misc] Optimize the Qwen3_ReasoningParser extract_reasoning_content ( #17515 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-05-01 03:29:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fbefc8a78d 
					 
					
						
						
							
							[Core] Enable IPv6 with vllm.utils.make_zmq_socket() ( #16506 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-05-01 09:38:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						26bc4bbcd8 
					 
					
						
						
							
							Avoid overwriting vllm_compile_cache.py ( #17418 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Keyun Tong <tongkeyun@gmail.com > 
						
						
					 
					
						2025-05-01 07:30:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c3d767201 
					 
					
						
						
							
							[BugFix] Fix mla cpu - missing 3 required positional arguments ( #17494 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-05-01 14:36:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						13cf6b6236 
					 
					
						
						
							
							[BugFix] fix speculative decoding memory leak when speculation is disabled ( #15506 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Noah Yoshida <noahcy117@gmail.com > 
						
						
					 
					
						2025-04-30 23:28:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90d0a54c4d 
					 
					
						
						
							
							[ROCm] Effort to reduce the number of environment variables in command line ( #17229 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hongxia Yang <hongxia.yang@amd.com > 
						
						
					 
					
						2025-04-30 23:27:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7a0a146c54 
					 
					
						
						
							
							[Build] Require setuptools >= 77.0.3 for PEP 639 ( #17389 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-30 23:25:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7ab643e425 
					 
					
						
						
							
							FIxing the AMD test failures caused by PR#16457 ( #17511 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com > 
						
						
					 
					
						2025-04-30 23:23:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						afb4429b4f 
					 
					
						
						
							
							[CI/Build] Reorganize models tests ( #17459 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-30 23:03:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa4502e7f3 
					 
					
						
						
							
							[CI][Bugfix] Fix failing V1 Test due to missing 'cache_salt' arg ( #17500 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-30 21:03:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						17b4d85f63 
					 
					
						
						
							
							[CI][TPU] Skip structured outputs+spec decode tests on TPU ( #17510 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-30 20:36:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1144a8efe7 
					 
					
						
						
							
							[Bugfix] Temporarily disable gptq_bitblas on ROCm ( #17411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yan Cangang <nalanzeyu@gmail.com > 
						
						
					 
					
						2025-04-30 19:51:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08fb5587b4 
					 
					
						
						
							
							[Bugfix][ROCm] Fix import error on ROCm ( #17495 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-04-30 19:51:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dbc18e7816 
					 
					
						
						
							
							[CI][TPU] Skip Multimodal test ( #17488 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com > 
						
						
					 
					
						2025-04-30 19:51:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						02bd654846 
					 
					
						
						
							
							[Misc] Rename Audios -> Audio in Qwen2audio Processing ( #17507 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-04-30 19:51:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						200bbf92e8 
					 
					
						
						
							
							Bump Compressed Tensors version to 0.9.4 ( #17478 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rahul Tuli <rtuli@redhat.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-30 15:24:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						81ecf425f0 
					 
					
						
						
							
							[v1][Spec Decode] Make sliding window compatible with eagle prefix caching ( #17398 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-04-30 18:25:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						42d9a2c4c7 
					 
					
						
						
							
							doc: fix bug report Github template formatting ( #17486 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-04-30 10:03:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ac74d098e 
					 
					
						
						
							
							[doc] add install tips ( #17373 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-30 17:02:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						584f5fb4c6 
					 
					
						
						
							
							[Bugfix][ROCm] Restrict ray version due to a breaking release ( #17480 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-04-30 09:59:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d586ddc691 
					 
					
						
						
							
							[BugFix] Fix authorization of openai_transcription_client.py ( #17321 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zh Wang <rekind133@outlook.com > 
						
						
					 
					
						2025-04-30 09:51:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0b7e701dd4 
					 
					
						
						
							
							[Docs] Update optimization.md doc ( #17482 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-30 09:34:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						947f2f5375 
					 
					
						
						
							
							[V1] Allow turning off pickle fallback in vllm.v1.serial_utils ( #17427 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-04-30 16:10:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						739e03b344 
					 
					
						
						
							
							[Bugfix] Fixed mistral tokenizer path when pointing to file ( #17457 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pete Savage <psavage@redhat.com > 
						
						
					 
					
						2025-04-30 08:08:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da4e7687b5 
					 
					
						
						
							
							[Fix] Support passing args to logger ( #17425 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-04-30 08:06:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						39317cf42b 
					 
					
						
						
							
							[Docs] Add command for running mypy tests from CI ( #17475 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-30 08:06:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2990cee95b 
					 
					
						
						
							
							[Feature] The Qwen3 reasoning parser supports  guided decoding ( #17466 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-30 07:48:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0be6d05b5e 
					 
					
						
						
							
							[V1][Metrics] add support for kv event publishing ( #16750 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: alec-flowers <aflowers@nvidia.com >
Signed-off-by: Mark McLoughlin <markmc@redhat.com >
Co-authored-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-04-30 07:44:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						77073c77bc 
					 
					
						
						
							
							[Core] Prevent side-channel attacks via cache salting ( #17045 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com > 
						
						
					 
					
						2025-04-30 20:27:21 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a7d5b016bd 
					 
					
						
						
							
							[TPU][V1][CI] Update regression test baseline for v6 CI ( #17064 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-30 04:03:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d803786731 
					 
					
						
						
							
							[V1][Bugfix]: vllm v1 verison metric num_gpu_blocks is None ( #15755 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-04-30 18:20:39 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1534d389af 
					 
					
						
						
							
							[Misc] Remove deprecated files ( #17447 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-30 01:52:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ece5a8b0b6 
					 
					
						
						
							
							Make the _apply_rotary_emb compatible with dynamo ( #17435 )  
						
						 
						
						
						
						
					 
					
						2025-04-30 07:52:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54072f315f 
					 
					
						
						
							
							[MODEL ADDITION] Ovis2 Model Addition ( #15826 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Marco <121761685+mlinmg@users.noreply.github.com >
Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <2037008807@qq.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-04-30 07:33:29 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						be633fba0f 
					 
					
						
						
							
							[Bugfix] Fix AttributeError: 'State' object has no attribute 'engine_client' ( #17434 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-30 00:11:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed6cfb90c8 
					 
					
						
						
							
							[Hardware][Intel GPU] Upgrade to torch 2.7 ( #17444 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com >
Co-authored-by: Qiming Zhang <qiming1.zhang@intel.com > 
						
						
					 
					
						2025-04-30 00:03:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ed9f6047e 
					 
					
						
						
							
							[Intel GPU] [CI]Fix XPU ci, setuptools >=80.0 have build issue ( #17298 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kunshang Ji <kunshang.ji@intel.com > 
						
						
					 
					
						2025-04-29 22:54:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a44c4f1d2f 
					 
					
						
						
							
							Support LoRA for Mistral3 ( #17428 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-29 21:10:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88fcf00dda 
					 
					
						
						
							
							Fix some speculative decode tests with tl.dot ( #17371 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Huy Do <huydhn@gmail.com > 
						
						
					 
					
						2025-04-29 19:41:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1f569b1b9 
					 
					
						
						
							
							Fix call to logger.info_once ( #17416 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-29 19:39:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						13698db634 
					 
					
						
						
							
							Improve configs - ModelConfig ( #17130 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-30 10:38:22 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c4f59afc3 
					 
					
						
						
							
							Update PyTorch to 2.7.0 ( #16859 )  
						
						 
						
						
						
						
					 
					
						2025-04-29 19:08:04 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1c2bc7ead0 
					 
					
						
						
							
							Truncation control for embedding models ( #14776 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gabriel Marinho <gmarinho@ibm.com >
Signed-off-by: Max de Bayser <mbayser@br.ibm.com >
Co-authored-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-04-30 09:24:57 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4055130a85 
					 
					
						
						
							
							[release] Always git fetch all to get latest tag on TPU release ( #17322 )  
						
						 
						
						
						
						
					 
					
						2025-04-29 17:52:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						34120f5acd 
					 
					
						
						
							
							[V1][Feature] Enable Speculative Decoding with Structured Outputs ( #14702 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai >
Signed-off-by: Benjamin Chislett <chislett.ben@gmail.com > 
						
						
					 
					
						2025-04-30 00:02:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7489ec0bab 
					 
					
						
						
							
							Remove Bamba 9B from CI ( #17407 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-29 21:10:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						70788bdbdc 
					 
					
						
						
							
							[V1][Spec Decode] Apply torch.compile & cudagraph to EAGLE ( #17211 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bryan Lu <yuzhelu@amazon.com > 
						
						
					 
					
						2025-04-29 21:10:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9c1b59e59 
					 
					
						
						
							
							Fix: Python package installation for opentelmetry ( #17049 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Dilip Gowda Bhagavan <dilip.bhagavan@ibm.com > 
						
						
					 
					
						2025-04-29 20:20:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0350809f3a 
					 
					
						
						
							
							Remove Falcon3 2x7B from CI ( #17404 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-29 19:52:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6977dbd15 
					 
					
						
						
							
							Simplify (and fix) passing of guided decoding backend options ( #17008 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-29 19:02:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2fa2a50bf9 
					 
					
						
						
							
							[Bugfix] Fix Minicpm-O-int4 GPTQ model inference ( #17397 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-29 18:21:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						08e15defa9 
					 
					
						
						
							
							[CI/Build] Add retry mechanism for add-apt-repository ( #17107 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-29 10:40:52 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b37685afbb 
					 
					
						
						
							
							[CI] Uses Python 3.11 for TPU ( #17359 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-04-29 17:39:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						792595b59d 
					 
					
						
						
							
							[TPU][V1][CI] Replace python3 setup.py develop with standard pip install --e on TPU ( #17374 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-29 10:36:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c1c788312 
					 
					
						
						
							
							[Doc][Typo] Fixing label in new model requests link in overview.md ( #17400 )  
						
						 
						
						
						
						
					 
					
						2025-04-29 10:29:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56d64fbe30 
					 
					
						
						
							
							[Docs] Propose a deprecation policy for the project ( #17063 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-04-29 10:29:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						608968b7c5 
					 
					
						
						
							
							Enabling multi-group kernel tests. ( #17115 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com > 
						
						
					 
					
						2025-04-29 10:27:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						06ffc7e1d3 
					 
					
						
						
							
							[Misc][ROCm] Exclude cutlass_mla_decode for ROCm build ( #17289 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tianyuan Wu <Tianyuan.Wu@amd.com > 
						
						
					 
					
						2025-04-29 10:26:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d3cf61b89b 
					 
					
						
						
							
							fix gemma3 results all zero ( #17364 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mayuyuace <qiming1.zhang@intel.com > 
						
						
					 
					
						2025-04-29 09:40:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a39203f99e 
					 
					
						
						
							
							[Bugfix] add qwen3 reasoning-parser fix content is None when disable … ( #17369 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mofanke <mofanke@gmail.com > 
						
						
					 
					
						2025-04-29 16:32:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24e6ad3f16 
					 
					
						
						
							
							[V1] Remove num_input_tokens from attn_metadata ( #17193 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-04-29 09:28:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ef5d106bb 
					 
					
						
						
							
							Improve literal dataclass field conversion to argparse argument ( #17391 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-29 16:25:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0ed27ef66c 
					 
					
						
						
							
							Fix: Spelling of inference ( #17387 )  
						
						 
						
						
						
						
					 
					
						2025-04-29 09:23:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						900edfa8d4 
					 
					
						
						
							
							Transformers backend tweaks ( #17365 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-29 09:08:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						88ad9ec6b2 
					 
					
						
						
							
							[Frontend] Support chat_template_kwargs in LLM.chat ( #17356 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-29 22:03:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						40896bdf3f 
					 
					
						
						
							
							pre-commit autoupdate (#17380 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-29 06:46:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						00ee37efa2 
					 
					
						
						
							
							[Bugfix] Clean up MiniMax-VL and fix processing ( #17354 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-29 20:42:16 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						890f104cdf 
					 
					
						
						
							
							[Doc] Fix QWen3MOE info ( #17381 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-29 12:38:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4a5e13149a 
					 
					
						
						
							
							Update docs requirements ( #17379 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-29 11:35:47 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						97cc8729f0 
					 
					
						
						
							
							[Model] Ignore rotary embed load for Cohere model ( #17319 )  
						
						 
						
						
						
						
					 
					
						2025-04-29 00:30:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4464109219 
					 
					
						
						
							
							[Build][Bugfix] Restrict setuptools version to <80 ( #17320 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-04-29 00:17:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						193e78e35d 
					 
					
						
						
							
							[Fix] Documentation spacing in compilation config help text ( #17342 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zerohertz <ohg3417@gmail.com > 
						
						
					 
					
						2025-04-29 00:16:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bdb2cddafc 
					 
					
						
						
							
							[Misc]Use a platform independent interface to obtain the device attributes ( #17100 )  
						
						 
						
						
						
						
					 
					
						2025-04-29 06:59:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ebb3930d28 
					 
					
						
						
							
							[Misc] Move config fields to MultiModalConfig ( #17343 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-29 06:37:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cde384cd92 
					 
					
						
						
							
							[Model] support MiniMax-VL-01 model ( #16328 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qingjun <qingjun@minimaxi.com > 
						
						
					 
					
						2025-04-29 12:05:50 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						96e06e3cb7 
					 
					
						
						
							
							[Misc] Add a Jinja template to support Mistral3 function calling ( #17195 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-28 19:53:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						17eb306fcc 
					 
					
						
						
							
							[Bugfix] Add contiguous call inside rope kernel wrapper ( #17091 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 苏政渊 <suzhengyuan@moonshot.cn >
Co-authored-by: 苏政渊 <suzhengyuan@moonshot.cn > 
						
						
					 
					
						2025-04-28 19:24:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						165cb56329 
					 
					
						
						
							
							Ignore '<string>' filepath ( #17330 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-04-28 19:23:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6da8a8ff2 
					 
					
						
						
							
							[Bugfix] Fix numel() downcast in fused_layernorm_dynamic_per_token_quant.cu ( #17316 )  
						
						 
						
						
						
						
					 
					
						2025-04-28 19:23:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4ac4fa04d 
					 
					
						
						
							
							[model] make llama4 compatible with pure dense layers ( #17315 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucia Fang <fanglu@fb.com > 
						
						
					 
					
						2025-04-29 10:22:22 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e136000595 
					 
					
						
						
							
							[V1][Spec Decode] Make Eagle model arch config driven ( #17323 )  
						
						 
						
						
						
						
					 
					
						2025-04-29 10:22:02 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86d9fc29cb 
					 
					
						
						
							
							implement Structural Tag with Guidance backend ( #17333 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michal Moskal <michal@moskal.me > 
						
						
					 
					
						2025-04-29 02:21:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						506475de5f 
					 
					
						
						
							
							[Optim] Compute multimodal hash only once per item ( #17314 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-29 09:40:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cfe4532093 
					 
					
						
						
							
							[Benchmark] Add single turn MTBench to Serving Bench ( #17202 )  
						
						 
						
						
						
						
					 
					
						2025-04-28 16:46:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8fc88d63f1 
					 
					
						
						
							
							[Model] Add tuned triton fused_moe configs for Qwen3Moe ( #17328 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-28 15:20:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6e74fd4945 
					 
					
						
						
							
							Support loading transformers models with named parameters ( #16868 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex <alexwu@character.ai > 
						
						
					 
					
						2025-04-28 23:15:58 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dcbac4cb4b 
					 
					
						
						
							
							[Model] Qwen3 Dense FP8 Compat Fixes ( #17318 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <xmo@berkeley.edu > 
						
						
					 
					
						2025-04-28 14:12:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed2462030f 
					 
					
						
						
							
							[Bugfix] Fix moe weight losing all extra attrs after process_weights_after_loading. ( #16854 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-04-28 21:05:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cc5befbced 
					 
					
						
						
							
							[BugFix] Fix cascade attention - RuntimeError: scheduler_metadata must have shape (metadata_size) ( #17283 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-04-28 13:55:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c89cd96a8 
					 
					
						
						
							
							[Chore] cleanup license indicators in light of SPDX ( #17259 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz >
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-28 19:43:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0304dc504 
					 
					
						
						
							
							[Security] Don't bind tcp zmq socket to all interfaces ( #17197 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-28 10:08:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c7941cca18 
					 
					
						
						
							
							Explicitly explain quant method override ordering and ensure all overrides are ordered ( #17256 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-28 16:55:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6dd32aa07 
					 
					
						
						
							
							Make name of compressed-tensors quant method consistent across vLLM ( #17255 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-28 16:28:13 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f94886946e 
					 
					
						
						
							
							Improve conversion from dataclass configs to argparse arguments ( #17303 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-28 16:22:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72dfe4c74f 
					 
					
						
						
							
							[Docs] Add a security guide ( #17230 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-04-28 15:12:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8b464d9660 
					 
					
						
						
							
							[Misc] Clean up Qwen2.5-Omni code ( #17301 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-28 06:20:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						889ebb2638 
					 
					
						
						
							
							[Misc] Minor typo/grammar in platforms/interface.py ( #17307 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-28 05:45:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ad986c28b 
					 
					
						
						
							
							[doc] update wrong model id ( #17287 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-28 04:20:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						344e193b7d 
					 
					
						
						
							
							[Bugfix] Add missing get_language_model to new MLLMs ( #17300 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-28 04:09:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fb1c933ade 
					 
					
						
						
							
							Add missing class docstring for PromptAdapterConfig ( #17302 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-28 04:06:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72c5b97231 
					 
					
						
						
							
							Update tpu_worker.py 's typo ( #17288 )  
						
						 
						
						
						
						
					 
					
						2025-04-28 04:01:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fa93cd9f60 
					 
					
						
						
							
							[Model] Add Granite Speech Support ( #16246 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com >
Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-04-28 10:05:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aec9674dbe 
					 
					
						
						
							
							[Core] Remove legacy input mapper/processor from V0 ( #15686 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-28 15:38:48 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7fcc4223dc 
					 
					
						
						
							
							[Minor][Models] Pass partial_rotary_factor parameter to rope ( #17266 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: evian <eviantai@u.nus.edu >
Co-authored-by: evian <eviantai@u.nus.edu > 
						
						
					 
					
						2025-04-28 04:28:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8262a3e23b 
					 
					
						
						
							
							[Misc] Validate stop_token_ids contents ( #17268 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-28 03:54:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f211331c48 
					 
					
						
						
							
							[Doc] small fix ( #17277 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-28 03:53:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9053d0b134 
					 
					
						
						
							
							[Doc] Fix wrong github link in LMCache examples ( #17274 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: KuntaiDu <kuntai@uchicago.edu > 
						
						
					 
					
						2025-04-28 03:09:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb3f2d8d10 
					 
					
						
						
							
							[Bugfix] Fix Mistral3 spatial merge error ( #17270 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-27 19:40:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c12df53b60 
					 
					
						
						
							
							[Bugfix] Fix cutlass dispatch for fp8/int8 to properly invoke M<=16 c… ( #16751 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ther-LF <2639852836@qq.com > 
						
						
					 
					
						2025-04-27 19:38:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d1aeea7553 
					 
					
						
						
							
							[Bugfix] Fix missing ARG in Dockerfile for arm64 platforms ( #17261 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: lkm-schulz <44176356+lkm-schulz@users.noreply.github.com > 
						
						
					 
					
						2025-04-27 19:38:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d8bccde686 
					 
					
						
						
							
							[BugFix] Fix vllm_flash_attn install issues ( #17267 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-04-27 17:27:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						20e489eaa1 
					 
					
						
						
							
							[V1][Spec Decode] Make eagle compatible with prefix caching. ( #17137 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: LiuXiaoxuanPKU <lilyliupku@gmail.com > 
						
						
					 
					
						2025-04-27 09:29:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4213475ec7 
					 
					
						
						
							
							[Metrics] Fix minor inconsistencies in bucket progression ( #17262 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-27 16:19:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d92879baf6 
					 
					
						
						
							
							[doc] Add feature status legend ( #17257 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-27 08:17:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						690fe019f0 
					 
					
						
						
							
							[Feature] support sequence parallelism using compilation pass ( #16155 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cascade812 <cascade812@outlook.com >
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com >
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-04-27 06:29:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed7a29d9f8 
					 
					
						
						
							
							[NVIDIA] Support Cutlass MLA for Blackwell GPUs ( #16032 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kaixih <kaixih@nvidia.com > 
						
						
					 
					
						2025-04-27 06:29:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						756848e79e 
					 
					
						
						
							
							[Bugfix] Fix Lora Name Parsing ( #17196 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com >
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-27 20:33:09 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						18445edd0f 
					 
					
						
						
							
							[Misc] Change buckets of histogram_iteration_tokens to [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096] to represent number of tokens ( #17033 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfc-gh-zhwang <flex.wang@snowflake.com > 
						
						
					 
					
						2025-04-27 12:30:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						30215ca61f 
					 
					
						
						
							
							[MISC] Use string annotation types for class definitions ( #17244 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com > 
						
						
					 
					
						2025-04-27 08:39:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						838cedade7 
					 
					
						
						
							
							[Bugfix] Get a specific type of layer from forward context ( #17222 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-04-27 00:58:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4283a28c2f 
					 
					
						
						
							
							[Bugfix] Fix QWen2 VL multimodal mapping ( #17240 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-27 05:53:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93a126fbc7 
					 
					
						
						
							
							[Misc] Make cached tokenizer pickle-compatible ( #17048 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-27 13:05:00 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e4b351a0c 
					 
					
						
						
							
							[Kernel][Triton][FP8] Adding fp8 and variable length sequence support to Triton FAv2 kernel ( #12591 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-04-27 00:35:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9869453c42 
					 
					
						
						
							
							Update test_flash_attn.py ( #17102 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ShuaibinLi <lishuaibin@live.cn > 
						
						
					 
					
						2025-04-26 22:17:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3642c59aa8 
					 
					
						
						
							
							[CI/Build] remove -t for run-lm-eval-gsm-hf-baseline.sh ( #16271 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-26 18:25:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						43eea2953b 
					 
					
						
						
							
							[Minor] Fix lint error in main branch ( #17233 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-26 11:10:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						de7eb10ce4 
					 
					
						
						
							
							[Bugfix] Fix Qwen2.5-Omni M-RoPE position ids generation ( #16878 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: imkero <kerorek@outlook.com > 
						
						
					 
					
						2025-04-26 10:41:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fd11a325b8 
					 
					
						
						
							
							[MISC] rename interval to max_recent_requests ( #14285 )  
						
						 
						
						
						
						
					 
					
						2025-04-26 16:59:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d17e20310 
					 
					
						
						
							
							Disable the torch.compile cache checks when VLLM_DISABLE_COMPILE_CACHE=1 ( #16573 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-04-26 09:17:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						10fd1d7380 
					 
					
						
						
							
							[Bugfix] fix error due to an uninitialized tokenizer when using skip_tokenizer_init with num_scheduler_steps ( #9276 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: changjun.lee <pord7457@gmail.com > 
						
						
					 
					
						2025-04-26 11:51:17 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						52b4f4a8d7 
					 
					
						
						
							
							[Docs] Update structured output doc for V1 ( #17135 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-26 15:12:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e782e0a170 
					 
					
						
						
							
							[Chore] added stubs for vllm_flash_attn during development mode ( #17228 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Pham <contact@aarnphm.xyz > 
						
						
					 
					
						2025-04-26 07:45:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc2ceca5c5 
					 
					
						
						
							
							[BUGFIX] use random for NONE_HASH only when PYTHONHASHSEED not set ( #17088 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Andy Xie <andy.xning@gmail.com > 
						
						
					 
					
						2025-04-26 14:34:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f8acd01ff7 
					 
					
						
						
							
							[V1] Add structural_tag support using xgrammar ( #17085 )  
						
						 
						
						
						
						
					 
					
						2025-04-26 14:06:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c48334d405 
					 
					
						
						
							
							[Hardware][Intel-Gaudi] Update hpu-extension and update bucketing system for HPU device ( #17186 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai > 
						
						
					 
					
						2025-04-26 05:55:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						909fdaf152 
					 
					
						
						
							
							[Bugfix] Fix standard models tests ( #17217 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-26 02:26:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c1c926d00 
					 
					
						
						
							
							[Bugfix] Fix missing int type for -n in multi-image example ( #17223 )  
						
						 
						
						
						
						
					 
					
						2025-04-26 08:49:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						df6f3ce883 
					 
					
						
						
							
							[Core] Remove prompt string from engine core data structures ( #17214 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-25 23:41:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						513f074766 
					 
					
						
						
							
							[CI/test] Fix Eagle Correctness Test ( #17209 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-25 23:40:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b07bf83c7d 
					 
					
						
						
							
							[BugFix] Avoid race conditions in zero-copy tensor transmission ( #17203 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-26 06:00:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53e8cf53a4 
					 
					
						
						
							
							[V1][Metrics] Allow V1 AsyncLLM to use custom logger ( #14661 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Zijing Liu <liuzijing2014@gmail.com >
Signed-off-by: Mark McLoughlin <markmc@redhat.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Mark McLoughlin <markmc@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-25 22:05:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54271bb766 
					 
					
						
						
							
							[ROCm][Misc] Follow-ups for Skinny Gemms on ROCm. ( #17011 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com > 
						
						
					 
					
						2025-04-25 22:05:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e96f56efb 
					 
					
						
						
							
							Allocate kv_cache with stride order ( #16605 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shuw <shuw@nvidia.com > 
						
						
					 
					
						2025-04-25 22:03:31 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b278911229 
					 
					
						
						
							
							[Minor][Models] Fix Return Types of Llama & Eagle ( #17220 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-25 21:54:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7bd0c7745c 
					 
					
						
						
							
							[Doc] Minor fix for the vLLM TPU setup page ( #17206 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yarong Mu <ymu@google.com > 
						
						
					 
					
						2025-04-26 04:39:56 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1cf0719ebd 
					 
					
						
						
							
							[Minor][Spec Decode] Add use_eagle to SpeculativeConfig ( #17213 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-25 21:08:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						537d5ee025 
					 
					
						
						
							
							[doc] add Anything LLM integration ( #17216 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-25 21:03:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c8e5be35f7 
					 
					
						
						
							
							[MISC][AMD] Add unused annotation to rocm kernel file ( #17097 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <lufang@fb.com > 
						
						
					 
					
						2025-04-25 20:33:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6e72e1e4f 
					 
					
						
						
							
							[Bugfix] [pytorch] Patch AOTAutogradCache._get_shape_env ( #17142 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: James Wu <jjwu@meta.com > 
						
						
					 
					
						2025-04-26 11:28:20 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5e83a7277f 
					 
					
						
						
							
							[v1] [P/D] Adding LMCache KV connector for v1 ( #16625 )  
						
						 
						
						
						
						
					 
					
						2025-04-26 03:03:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68af5f6c5c 
					 
					
						
						
							
							[AMD][FP8][BugFix] Remove V1 check in arg_utils.py for FP8 since it is not necessary ( #17215 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com > 
						
						
					 
					
						2025-04-25 19:55:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8de2901fea 
					 
					
						
						
							
							[Bugfix] gemma[2,3] interleaved attention when sliding window is disabled ( #17180 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chen Zhang <zhangch99@outlook.com > 
						
						
					 
					
						2025-04-25 19:53:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c53e0730cb 
					 
					
						
						
							
							[Misc] Refine ray_serve_deepseek example ( #17204 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-04-25 16:06:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a0e619e62a 
					 
					
						
						
							
							[V1][Spec Decode] EAGLE-3 Support ( #16937 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bryan Lu <yuzhelu@amazon.com >
Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai >
Co-authored-by: Bryan Lu <yuzhelu@amazon.com > 
						
						
					 
					
						2025-04-25 15:43:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						70116459c3 
					 
					
						
						
							
							[BugFix][Frontend] Fix LLM.chat() tokenization ( #16081 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-25 22:20:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65e262b93b 
					 
					
						
						
							
							Fix Python packaging edge cases ( #17159 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Christian Heimes <christian@python.org > 
						
						
					 
					
						2025-04-26 06:15:07 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						43faa0461a 
					 
					
						
						
							
							[Bugfix] Fix hybrid model tests ( #17182 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-25 15:14:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						48cb2109b6 
					 
					
						
						
							
							[V1] Move usage stats to worker and start logging TPU hardware ( #16211 )  
						
						 
						
						
						
						
					 
					
						2025-04-25 14:06:01 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a5450f11c9 
					 
					
						
						
							
							[Security] Use safe serialization and fix zmq setup for mooncake pipe ( #17192 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com >
Co-authored-by: Shangming Cai <caishangming@linux.alibaba.com > 
						
						
					 
					
						2025-04-25 16:53:23 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d98ab5ec6 
					 
					
						
						
							
							[Misc] Inline Molmo requirements ( #17190 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-25 16:41:44 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						df5c879527 
					 
					
						
						
							
							[doc] update wrong hf model links ( #17184 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-25 16:40:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						423e9f1cbe 
					 
					
						
						
							
							Use Transformers helper get_text_config() instead of checking for text_config ( #17105 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-25 08:47:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0bd7f8fca5 
					 
					
						
						
							
							Bump Transformers to 4.51.3 ( #17116 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-25 08:34:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d5615af9ae 
					 
					
						
						
							
							[Bugfix] Fix Mistral ChatCompletionRequest Body Exception ( #16769 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jasmond Loh <Jasmond.Loh@hotmail.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-04-25 07:26:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						19dcc02a72 
					 
					
						
						
							
							[Bugfix] Fix mistral model tests ( #17181 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-25 06:03:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7feae92c1f 
					 
					
						
						
							
							[Doc] Move todo out of beam search docstring ( #17183 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-04-25 04:44:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f851b84266 
					 
					
						
						
							
							[Doc] Add two links to disagg_prefill.md ( #17168 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-04-25 10:23:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc966e9cc6 
					 
					
						
						
							
							Only turn on FastIncrementalDetokenizer when tokenizers >= 0.21.1 ( #17158 )  
						
						 
						
						
						
						
					 
					
						2025-04-25 17:10:32 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ef19e67d2c 
					 
					
						
						
							
							[Doc] Add headings to improve gptqmodel.md ( #17164 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-04-25 01:13:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a41351f363 
					 
					
						
						
							
							[Quantization][FP8] Add support for FP8 models with input_scale for output projection and QK quantization ( #15734 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Randall Smith <Randall.Smith@amd.com >
Signed-off-by: Luka Govedič <lgovedic@redhat.com >
Co-authored-by: Luka Govedič <lgovedic@redhat.com > 
						
						
					 
					
						2025-04-25 00:45:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6aae216b4e 
					 
					
						
						
							
							[Bugfix] remove fallback in guided_json (int range, patterns) ( #16725 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: csy1204 <josang1204@gmail.com >
Co-authored-by: 조상연[플레이스 AI] <sang-yeon.cho@navercorp.com > 
						
						
					 
					
						2025-04-25 06:54:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b22980a1dc 
					 
					
						
						
							
							[Perf]Optimize rotary_emb implementation to use Triton operator for improved inference performance ( #16457 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cynthieye <yexin93@qq.com >
Co-authored-by: MagnetoWang <magnetowang@outlook.com > 
						
						
					 
					
						2025-04-25 14:52:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						881f735827 
					 
					
						
						
							
							[Misc] Benchmark Serving Script Support Appending Results ( #17028 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-04-24 22:53:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2f54045508 
					 
					
						
						
							
							[Bugfix][Misc] Use TritonPlaceholderModule to defensively import triton ( #15099 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mengqing Cao <cmq0113@163.com > 
						
						
					 
					
						2025-04-24 22:51:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5aa6efb9a5 
					 
					
						
						
							
							[Misc] Clean up redundant code in uniproc_executor.py ( #16762 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lifu Huang <lifu.hlf@gmail.com > 
						
						
					 
					
						2025-04-24 22:49:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ca0234478 
					 
					
						
						
							
							Move missed SchedulerConfig args into scheduler config group in EngineArgs ( #17131 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-24 22:48:53 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						649818995f 
					 
					
						
						
							
							[Docs] Fix True->true in supported_models.md ( #17141 )  
						
						 
						
						
						
						
					 
					
						2025-04-25 04:20:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7a0a9da72b 
					 
					
						
						
							
							[Doc] V1 : Update LoRA status ( #17133 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: varun sundar rabindranath <vsundarr@redhat.com >
Co-authored-by: varun sundar rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-04-24 20:17:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						69bff9bc89 
					 
					
						
						
							
							fix float16 support for kimi-vl ( #17156 )  
						
						 
						
						... 
						
						
						
						Co-authored-by: zhouzaida <zhouzaida@msh.team > 
						
						
					 
					
						2025-04-24 20:16:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41ca7eb491 
					 
					
						
						
							
							[Attention] FA3 decode perf improvement - single mma warp group support for head dim 128 ( #16864 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-04-24 20:12:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eef364723c 
					 
					
						
						
							
							[FEAT] [ROCm]: AITER Fused MOE V1 Support ( #16752 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-04-25 11:06:50 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d6e187e88 
					 
					
						
						
							
							Use custom address for listening socket ( #15988 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jens Glaser <glaserj@ornl.gov > 
						
						
					 
					
						2025-04-25 01:57:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9420a1fc30 
					 
					
						
						
							
							Better error message for missing mistral params.json ( #17132 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-24 23:43:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						583e900996 
					 
					
						
						
							
							[Misc] Add example to run DeepSeek with Ray Serve LLM ( #17134 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-04-24 22:25:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05e1fbfc52 
					 
					
						
						
							
							Add chat template for Llama 4 models ( #16428 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Max de Bayser <mbayser@br.ibm.com > 
						
						
					 
					
						2025-04-24 20:19:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe92176321 
					 
					
						
						
							
							Add collective_rpc to llm engine ( #16999 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yinghai Lu <yinghai@thinkingmachines.ai > 
						
						
					 
					
						2025-04-24 20:16:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6d0df0ebeb 
					 
					
						
						
							
							[Docs] Generate correct github links for decorated functions ( #17125 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-24 10:39:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0fa939e2d1 
					 
					
						
						
							
							Improve configs - LoRAConfig + PromptAdapterConfig ( #16980 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-24 10:29:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0422ce109f 
					 
					
						
						
							
							Add :markdownhelp: to EngineArgs docs so markdown docstrings render properly ( #17124 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-24 10:28:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						47bdee409c 
					 
					
						
						
							
							Molmo Requirements ( #17026 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Eyshika Agarwal <eyshikaengineer@gmail.com >
Signed-off-by: eyshika <eyshikaengineer@gmail.com > 
						
						
					 
					
						2025-04-24 10:08:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						49f189439d 
					 
					
						
						
							
							existing torch installation pip command fix for docs ( #17059 )  
						
						 
						
						
						
						
					 
					
						2025-04-24 10:07:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5adf6f6b7f 
					 
					
						
						
							
							Updating builkite job for IBM Power  ( #17111 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaruni Aggarwal <aaruniagg@gmail.com > 
						
						
					 
					
						2025-04-24 10:06:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4115f19958 
					 
					
						
						
							
							[CI] Add automation for the tool-calling github label ( #17118 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-24 09:22:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						340d7b1b21 
					 
					
						
						
							
							[V1][Spec Decoding] Add num_drafts and num_accepted_tokens_per_position metrics ( #16665 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Mark McLoughlin <markmc@redhat.com > 
						
						
					 
					
						2025-04-24 08:57:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1bcbcbf574 
					 
					
						
						
							
							[Misc] refactor example series - structured outputs ( #17040 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-24 07:49:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82e43b2d7e 
					 
					
						
						
							
							Add missing rocm_skinny_gemms kernel test to CI ( #17060 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-24 07:49:37 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						67309a1cb5 
					 
					
						
						
							
							[Frontend] Using matryoshka_dimensions control the allowed output dimensions. ( #16970 )  
						
						 
						
						
						
						
					 
					
						2025-04-24 07:06:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b724afe343 
					 
					
						
						
							
							[V1][Structured Output] Clear xgrammar compiler object when engine core shut down to avoid nanobind leaked warning ( #16954 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shen-shanshan <467638484@qq.com > 
						
						
					 
					
						2025-04-24 06:15:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						21f4f1c9a4 
					 
					
						
						
							
							Improve static type checking in LoRAModelRunnerMixin ( #17104 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-24 06:14:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b0c1f6202d 
					 
					
						
						
							
							[Misc] Remove OLMo2 config copy ( #17066 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-24 06:14:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c0dfd97519 
					 
					
						
						
							
							[V1][PP] Optimization: continue scheduling prefill chunks ( #17080 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Rui Qiao <ruisearch42@gmail.com > 
						
						
					 
					
						2025-04-24 05:27:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a9138e85b1 
					 
					
						
						
							
							Fix OOT registration test ( #17099 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-24 04:44:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0a05ed57e6 
					 
					
						
						
							
							Simplify TokenizerGroup ( #16790 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-24 04:43:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						14288d1332 
					 
					
						
						
							
							Disable enforce_eager for V1 TPU sampler and structured output tests ( #17016 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-24 02:50:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b411418ff0 
					 
					
						
						
							
							[Chore] Remove Sampler from Model Code ( #17084 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-24 02:49:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2bc0f72ae5 
					 
					
						
						
							
							Add docs for runai_streamer_sharded ( #17093 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Omer Dayan (SW-GPU) <omer@run.ai >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-04-24 01:03:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9c1244de57 
					 
					
						
						
							
							[doc] update to hyperlink ( #17096 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-24 00:58:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						db2f8d915c 
					 
					
						
						
							
							[V1] Update structured output ( #16812 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-23 23:57:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6167c0e5d2 
					 
					
						
						
							
							[Bugfix][Core] add seq_id_to_seq_group clearing to avoid memory leak when s… ( #16472 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 开哲 <kaizhe.zy@alibaba-inc.com >
Co-authored-by: 开哲 <kaizhe.zy@alibaba-inc.com > 
						
						
					 
					
						2025-04-24 11:25:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed2e464653 
					 
					
						
						
							
							Addendum Fix to support FIPS enabled machines with MD5 hashing ( #17043 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sydarb <areebsyed237@gmail.com > 
						
						
					 
					
						2025-04-23 19:55:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c8ed8ee48 
					 
					
						
						
							
							More informative error when using Transformers backend ( #16988 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-23 19:54:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed50f46641 
					 
					
						
						
							
							[Bugfix] Enable V1 usage stats ( #16986 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-23 19:54:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						46e678bcff 
					 
					
						
						
							
							[Minor] Use larger batch sizes for A100/B100/B200/MI300x ( #17073 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-23 19:18:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b2427f995 
					 
					
						
						
							
							[Quantization]add prefix for commandA quantized model ( #17017 )  
						
						 
						
						
						
						
					 
					
						2025-04-23 17:32:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b07d741661 
					 
					
						
						
							
							[CI/Build] workaround for CI build failure ( #17070 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: csy1204 <josang1204@gmail.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-23 16:14:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41fb013d29 
					 
					
						
						
							
							[V1][Spec Decode] Always use argmax for sampling draft tokens  ( #16899 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-23 14:57:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						32d4b669d0 
					 
					
						
						
							
							[BugFix][V1] Fix int32 token index overflow when preparing input ids ( #16806 )  
						
						 
						
						
						
						
					 
					
						2025-04-23 12:12:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3cde34a4a4 
					 
					
						
						
							
							[Frontend] Support guidance:no-additional-properties for compatibility with xgrammar ( #15949 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com > 
						
						
					 
					
						2025-04-23 18:34:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bdb3660312 
					 
					
						
						
							
							Use @property and private field for data_parallel_rank_local ( #17053 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-23 08:50:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f3a21e9c68 
					 
					
						
						
							
							CacheConfig.block_size should always be int when used (#17052 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-23 08:50:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e630d680e 
					 
					
						
						
							
							Improve Transformers backend model loading QoL ( #17039 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-23 07:33:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						af869f6dff 
					 
					
						
						
							
							[CI] Update structured-output label automation ( #17055 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-23 07:33:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						53c0fa1e25 
					 
					
						
						
							
							Ensure that pid passed to kill_process_tree is int for mypy ( #17051 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-23 07:32:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f7912cba3d 
					 
					
						
						
							
							[Doc] Add top anchor and a note to quantization/bitblas.md ( #17042 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-04-23 07:32:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6317a5174a 
					 
					
						
						
							
							Categorize tests/kernels/ based on kernel type ( #16799 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-23 09:21:07 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa72d9a4ea 
					 
					
						
						
							
							Mistral-format support for compressed-tensors ( #16803 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-23 08:46:23 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce17db8085 
					 
					
						
						
							
							[CI] Run v1/test_serial_utils.py in CI ( #16996 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-23 01:13:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8c87a9ad46 
					 
					
						
						
							
							[Bugfix] Fix AssertionError: skip_special_tokens=False is not supported for Mistral tokenizers ( #16964 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-23 07:24:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec69124eb4 
					 
					
						
						
							
							[Misc] Improve readability of get_open_port function. ( #17024 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: gitover22 <qidizou88@gmail.com > 
						
						
					 
					
						2025-04-23 06:16:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d0da99fb70 
					 
					
						
						
							
							[BugFix] llama4 fa3 fix - RuntimeError: scheduler_metadata must have shape (metadata_size) ( #16998 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-04-22 21:49:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2f195c429 
					 
					
						
						
							
							[V1] Avoid socket errors during shutdown when requests are in in-flight ( #16807 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-23 12:36:29 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						047797ef90 
					 
					
						
						
							
							[Bugfix] Triton FA function takes no keyword arguments ( #16902 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-04-22 21:35:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eb8ef4224d 
					 
					
						
						
							
							[doc] add download path tips ( #17013 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-23 04:06:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56a735261c 
					 
					
						
						
							
							[INTEL-HPU][v0] Port delayed sampling to upstream ( #16949 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michal Adamczyk <michal.adamczyk@intel.com >
Signed-off-by: Chendi Xue <chendi.xue@intel.com >
Co-authored-by: Michal Adamczyk <madamczyk@habana.ai > 
						
						
					 
					
						2025-04-22 20:14:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e1cf90e099 
					 
					
						
						
							
							[misc] tune some env vars for GB200 ( #16992 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-04-23 10:59:48 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6bc1e30ef9 
					 
					
						
						
							
							Revert "[Misc] Add S3 environment variables for better support of MinIO." ( #17021 )  
						
						 
						
						
						
						
					 
					
						2025-04-22 19:22:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7e081ba7ca 
					 
					
						
						
							
							[BugFix] Revert ROCm Custom Paged Attention Env Flag Check ( #17022 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-04-22 19:17:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1e013fa388 
					 
					
						
						
							
							[V1][DP] More robust DP/EP dummy request coordination ( #16277 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-22 19:12:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bc7c4d206b 
					 
					
						
						
							
							[Kernel][ROCM] Upstream prefix prefill speed up for vLLM V1 ( #13305 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sage Moore <sage@neuralmagic.com >
Signed-off-by: root <root@banff-cyxtera-s73-5.ctr.dcgpu >
Signed-off-by: Aleksandr Malyshev <maleksan@amd.com >
Signed-off-by: root <root@banff-cyxtera-s65-4.amd.com >
Signed-off-by: maleksan85 <maleksan@amd.com >
Signed-off-by: <>
Co-authored-by: Sage Moore <sage@neuralmagic.com >
Co-authored-by: root <root@banff-cyxtera-s73-5.ctr.dcgpu >
Co-authored-by: Aleksandr Malyshev <maleksan@amd.com >
Co-authored-by: qli88 <qiang.li2@amd.com >
Co-authored-by: root <root@banff-cyxtera-s65-4.amd.com > 
						
						
					 
					
						2025-04-22 19:11:56 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f67e9e9f22 
					 
					
						
						
							
							add Dockerfile build vllm against torch nightly ( #16936 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yang Wang <elainewy@meta.com > 
						
						
					 
					
						2025-04-22 19:08:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						36fe78769f 
					 
					
						
						
							
							[Bugfix] validate urls object for multimodal content parts ( #16990 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-04-23 09:43:06 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83d933718c 
					 
					
						
						
							
							[Core][V1][TPU] Enable structured decoding on TPU V1 ( #16499 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-04-22 18:05:23 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5175b884f7 
					 
					
						
						
							
							[BugFix] Remove default multiproc executor collective_rpc timeout ( #17000 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-22 23:27:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5536b30a4c 
					 
					
						
						
							
							Fencing Kernels Tests for enabling on AMD ( #16929 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com > 
						
						
					 
					
						2025-04-22 09:32:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f58fb9718 
					 
					
						
						
							
							Add assertion for no objects while hashing hf_config ( #16930 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-04-22 09:32:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						30bc3e0f66 
					 
					
						
						
							
							[FEAT][ROCm]: Support AITER MLA ( #15893 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Co-authored-by: qli88 <qiang.li2@amd.com > 
						
						
					 
					
						2025-04-22 09:31:13 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f34410715f 
					 
					
						
						
							
							[frontend] enhance tool_calls type check ( #16882 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-22 15:40:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68d4c33202 
					 
					
						
						
							
							[Misc] Add S3 environment variables for better support of MinIO. ( #16977 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-22 14:27:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f961d7f6ef 
					 
					
						
						
							
							[BugFix] Pass in correct VLLM config in FlashInfer backend ( #13207 ) ( #16973 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 苏政渊 <suzhengyuan@moonshot.cn >
Co-authored-by: 苏政渊 <suzhengyuan@moonshot.cn > 
						
						
					 
					
						2025-04-22 06:44:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d059110498 
					 
					
						
						
							
							Improve configs - SpeculativeConfig ( #16971 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-22 12:55:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						571e8dd65e 
					 
					
						
						
							
							[Bugfix] Fix distributed bug again in Qwen2.5-VL & Qwen2.5-Omni ( #16974 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: fyabc <suyang.fy@alibaba-inc.com > 
						
						
					 
					
						2025-04-22 12:23:17 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b91c927f6 
					 
					
						
						
							
							[Misc] refactor example series ( #16972 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-22 11:44:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e237f0035 
					 
					
						
						
							
							[FEAT][ROCm] Integrate Paged Attention Kernel from AITER ( #15001 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com >
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com > 
						
						
					 
					
						2025-04-22 02:46:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8f7bace7c3 
					 
					
						
						
							
							[Doc] Improve documentation for multimodal CLI args ( #16960 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-22 08:35:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e4d6144232 
					 
					
						
						
							
							[BugFix] Fix incremental detokenization perf issue ( #16963 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-22 08:16:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8d32dc603d 
					 
					
						
						
							
							[Kernel] Support Microsoft Runtime Kernel Lib for our Low Precision Computation - BitBLAS ( #6036 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: xinyuxiao <xinyuxiao2024@gmail.com >
Co-authored-by: xinyuxiao <xinyuxiao2024@gmail.com > 
						
						
					 
					
						2025-04-22 09:01:36 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c4ab9f3e71 
					 
					
						
						
							
							[V1] Remove pre-allocation for KV cache ( #16941 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-22 00:52:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2689d5c027 
					 
					
						
						
							
							[Model] Use autoweightloader for mamba ( #16950 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: sfeng33 <4florafeng@gmail.com > 
						
						
					 
					
						2025-04-22 07:48:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						acba33a0f1 
					 
					
						
						
							
							[Bugfix] Fix the issue where llm.generate cannot be called repeatedly after setting GuidedDecodingParams ( #16767 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com >
Signed-off-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-22 06:02:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a114bf20a3 
					 
					
						
						
							
							[Perf] Optimize _update_states for GPU model runner ( #16910 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: snowcharm <snowcharmqq@gmail.com > 
						
						
					 
					
						2025-04-22 14:01:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3097ce3a32 
					 
					
						
						
							
							[Doc] Update ai_accelerator/hpu-gaudi.inc.md ( #16956 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-04-22 05:33:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6da9322c8 
					 
					
						
						
							
							[Bugfix] Fix f-string for Python 3.9-3.11 ( #16962 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-21 21:45:55 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71ce44047f 
					 
					
						
						
							
							Support S3 Sharded loading with RunAI Model Streamer ( #16317 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Omer Dayan (SW-GPU) <omer@run.ai >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-04-21 21:21:49 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						188b7f9b8c 
					 
					
						
						
							
							[Performance][ROCm] Add skinny gemms for unquantized linear on ROCm ( #15830 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: charlifu <charlifu@amd.com >
Co-authored-by: Tyler Michael Smith <tysmith@redhat.com > 
						
						
					 
					
						2025-04-21 20:46:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b9b4746950 
					 
					
						
						
							
							[V1] Remove additional_config check ( #16710 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-04-21 20:45:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b8a2ab76f 
					 
					
						
						
							
							[Kernel] Add expert_map support to Cutlass FP8 MOE ( #16861 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: varun sundar rabindranath <vsundarr@redhat.com >
Co-authored-by: varun sundar rabindranath <vsundarr@redhat.com > 
						
						
					 
					
						2025-04-21 20:44:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c9acbf1141 
					 
					
						
						
							
							[Misc] Remove the chunked prefill warning for LoRA  ( #16925 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-21 20:44:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b794cae8d 
					 
					
						
						
							
							[ROCm] Add aiter tkw1 kernel for Llama4 fp8 ( #16727 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: kliuae <kuanfu.liu@embeddedllm.com >
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com >
Co-authored-by: vllmellm <vllm.ellm@embeddedllm.com > 
						
						
					 
					
						2025-04-21 20:42:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0e4254492f 
					 
					
						
						
							
							[Bugfix]: fix issue with n>1 sampling on v1 requests overriding each other ( #16863 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jeffrey Li <jeffrey.dot.li@gmail.com > 
						
						
					 
					
						2025-04-22 11:40:19 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1311913f55 
					 
					
						
						
							
							[BugFix][Spec Decode] No in-place update to draft probs ( #16952 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-21 19:54:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						29f395c97c 
					 
					
						
						
							
							[Doc] Remove unnecessary V1 flag ( #16924 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-21 21:04:38 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fa3bba2a53 
					 
					
						
						
							
							[TPU][V1] Enable Top-P ( #16843 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-22 00:46:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						986537f1c3 
					 
					
						
						
							
							[V1] V1 FlashInfer Attention ( #16684 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Aurick Qiao <qiao@aurick.net > 
						
						
					 
					
						2025-04-22 00:38:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						210207525e 
					 
					
						
						
							
							[TPU][V1] Capture multimodal encoder during model compilation ( #15051 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michael Goin <mgoin64@gmail.com >
Signed-off-by: NickLucche <nlucches@redhat.com >
Co-authored-by: Michael Goin <mgoin64@gmail.com >
Co-authored-by: Siyuan Liu <lsiyuan@google.com > 
						
						
					 
					
						2025-04-21 18:36:59 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71eda0bb76 
					 
					
						
						
							
							Update Qwen1.5-MoE-W4A16-compressed-tensors.yaml ( #16946 )  
						
						 
						
						
						
						
					 
					
						2025-04-21 18:35:32 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						471fe65630 
					 
					
						
						
							
							[TPU][V1] Implicitly adjust page size when there's SMEM OOM ( #16871 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-04-21 15:43:13 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a0fba5cf4 
					 
					
						
						
							
							[V1][Spec Decode] Handle draft tokens beyond max_model_len ( #16087 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-21 12:38:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						299ebb62b2 
					 
					
						
						
							
							[Core] Speed up decode by remove synchronizing operation in sampler ( #16436 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chanh Nguyen <cnguyen@linkedin.com >
Co-authored-by: Chanh Nguyen <cnguyen@linkedin.com > 
						
						
					 
					
						2025-04-21 18:18:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f728ab8e35 
					 
					
						
						
							
							[Doc] mention how to install in CPU editable mode ( #16923 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-04-21 17:45:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63e26fff78 
					 
					
						
						
							
							[doc] install required python3-dev apt package ( #16888 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Xia <david@davidxia.com > 
						
						
					 
					
						2025-04-21 16:15:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe3462c774 
					 
					
						
						
							
							[XPU][Bugfix] minor fix for XPU ( #15591 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yan ma <yan.ma@intel.com > 
						
						
					 
					
						2025-04-22 00:02:57 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3b34fd5273 
					 
					
						
						
							
							Raise error for data-parallel with benchmark_throughput ( #16737 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kartik Ramesh <kartikx2000@gmail.com >
Co-authored-by: Simon Mo <simon.mo@hey.com > 
						
						
					 
					
						2025-04-21 23:51:43 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55d6d3fdb8 
					 
					
						
						
							
							[Bugfix] Fix GLM rotary_dim issue and support v1 ( #16912 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-21 14:26:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7272bfae77 
					 
					
						
						
							
							[Misc] Refactor platform to get device specific stream and event ( #14411 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shen-shanshan <467638484@qq.com > 
						
						
					 
					
						2025-04-21 21:25:49 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d9ac9e3dc5 
					 
					
						
						
							
							[Misc] fix collect_env version parse ( #15267 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com > 
						
						
					 
					
						2025-04-21 20:29:40 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d41faaf9df 
					 
					
						
						
							
							Restore buffers when wake up from level 2 sleep ( #16564 ) ( #16889 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Han <zh950713@gmail.com > 
						
						
					 
					
						2025-04-21 20:18:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b34f33438a 
					 
					
						
						
							
							[Doc] Split dummy_processor_inputs() in Multimodal Docs ( #16915 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-04-21 11:10:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						26c0406555 
					 
					
						
						
							
							[Bugfix] Fix distributed bug in Qwen2.5-VL & Qwen2.5-Omni ( #16907 )  
						
						 
						
						
						
						
					 
					
						2025-04-21 10:25:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4c41278b77 
					 
					
						
						
							
							[CI/CD][V1] Add spec decode tests to CI ( #16900 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-20 22:37:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bb3605db85 
					 
					
						
						
							
							[Bugfix] Fix v1/spec_decode/test_ngram.py ( #16895 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: qizixi <qizixi@meta.com > 
						
						
					 
					
						2025-04-20 20:54:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fe742aef5a 
					 
					
						
						
							
							[easy] Pass compile_fx only the config patches ( #16845 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-04-20 12:25:19 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4b07d36891 
					 
					
						
						
							
							Improve configs - CacheConfig ( #16835 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-20 12:25:04 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87aaadef73 
					 
					
						
						
							
							Serialize tensors using int8 views ( #16866 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Staszek Pasko <staszek@gmail.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-19 10:28:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						682e0b6d2f 
					 
					
						
						
							
							Log how much time loading a compiled artifact takes ( #16848 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-04-19 16:50:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d6195a748b 
					 
					
						
						
							
							[doc] update hyperlink ( #16877 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-19 16:40:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						205d84aaa9 
					 
					
						
						
							
							[VLM] Clean up models ( #16873 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-19 12:13:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5124f5bf51 
					 
					
						
						
							
							[Model] Qwen2.5-Omni Cleanup  ( #16872 )  
						
						 
						
						
						
						
					 
					
						2025-04-19 09:37:02 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83f3c3bd91 
					 
					
						
						
							
							[Model] Refactor Phi-4-multimodal to use merged processor and support V1 ( #15477 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-19 02:26:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d9737ca1c6 
					 
					
						
						
							
							[V1][Misc] stop update prefix cache stats when logs_stats is disabled ( #16460 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: vie-serendipity <2733147505@qq.com > 
						
						
					 
					
						2025-04-19 02:25:19 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9d4ca19d50 
					 
					
						
						
							
							[Misc] Benchmarks for audio models ( #16505 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-19 02:24:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2ef0dc53b8 
					 
					
						
						
							
							[Frontend] Add sampling params to v1/audio/transcriptions endpoint ( #16591 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jannis Schönleber <joennlae@gmail.com >
Signed-off-by: NickLucche <nlucches@redhat.com >
Co-authored-by: Jannis Schönleber <joennlae@gmail.com > 
						
						
					 
					
						2025-04-19 07:03:54 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1d4680fad2 
					 
					
						
						
							
							[rocm][MI300] llama4 maverick fp8 moe config tp8 ( #16847 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Divakar Verma <divakar.verma@amd.com > 
						
						
					 
					
						2025-04-19 06:21:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2c1bd848a6 
					 
					
						
						
							
							[Model][VLM] Add Qwen2.5-Omni model support (thinker only) ( #15130 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: fyabc <suyang.fy@alibaba-inc.com >
Signed-off-by: Roger Wang <ywang@roblox.com >
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com >
Co-authored-by: Roger Wang <ywang@roblox.com >
Co-authored-by: Xiong Wang <wangxiongts@163.com > 
						
						
					 
					
						2025-04-18 23:14:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5c9121203c 
					 
					
						
						
							
							[release] Publish neuron docker image ( #16733 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: omrishiv <327609+omrishiv@users.noreply.github.com > 
						
						
					 
					
						2025-04-18 17:11:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						490b1698a5 
					 
					
						
						
							
							[Doc] Updated Llama section in tool calling docs to have llama 3.2 config info ( #16857 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jmho <jaylenho734@gmail.com > 
						
						
					 
					
						2025-04-18 23:28:53 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a5e29de88 
					 
					
						
						
							
							[Misc] refactor examples series - Chat Completion Client With Tools ( #16829 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-18 23:24:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d3ab3689f 
					 
					
						
						
							
							[New Model]: Snowflake Arctic Embed (Family)  ( #16649 )  
						
						 
						
						
						
						
					 
					
						2025-04-18 08:11:57 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						686623c5e7 
					 
					
						
						
							
							Fix nullable_kvs fallback ( #16837 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-18 05:58:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aadb656562 
					 
					
						
						
							
							[Misc] Clean up Kimi-VL ( #16833 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-18 05:15:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87e067de41 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for BigCode, GPT-J ( #16823 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jonghyun Choe <andy.choe729@gmail.com > 
						
						
					 
					
						2025-04-18 10:42:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						26507f8973 
					 
					
						
						
							
							[Docs] Fix a link and grammar issue in production-stack.md ( #16809 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-04-18 06:42:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9c1d5b456d 
					 
					
						
						
							
							[Doc] add podman setup instructions for official image ( #16796 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nathan Weinberg <nweinber@redhat.com > 
						
						
					 
					
						2025-04-18 06:10:49 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e31045f95c 
					 
					
						
						
							
							[Bugfix] fix pp for llama4 ( #16746 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com > 
						
						
					 
					
						2025-04-18 13:51:30 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aaec845f8e 
					 
					
						
						
							
							[ROCm] [Attention] Cleanup ROCm output passing ( #16431 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Luka Govedič <lgovedic@redhat.com > 
						
						
					 
					
						2025-04-18 05:46:45 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7bdfd29a35 
					 
					
						
						
							
							[Misc] add collect_env to cli and docker image ( #16759 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-04-17 22:13:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e78587a64c 
					 
					
						
						
							
							Improve-mm-and-pooler-and-decoding-configs ( #16789 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-17 22:13:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7eb4255628 
					 
					
						
						
							
							[BugFix] Accuracy fix for llama4 int4 - improperly casted scales ( #16801 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-04-17 22:13:29 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6a0f547561 
					 
					
						
						
							
							Add hardware print to TPU V1 test ( #16792 )  
						
						 
						
						
						
						
					 
					
						2025-04-17 22:13:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						30ed81b7ca 
					 
					
						
						
							
							[V1][Structured Output] Minor modification to _validate_structured_output() ( #16748 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shen-shanshan <467638484@qq.com > 
						
						
					 
					
						2025-04-18 13:12:54 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7a4a5de729 
					 
					
						
						
							
							[Misc] Update outdated note: LMCache now supports chunked prefill ( #16697 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-18 05:12:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c16fb5dae8 
					 
					
						
						
							
							[Doc] Improve help examples for --compilation-config ( #16729 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-17 21:22:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e37073efd7 
					 
					
						
						
							
							Add property-based testing for vLLM endpoints using an API defined by an OpenAPI 3.1 schema ( #16721 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tarun Kumar <takumar@redhat.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-17 21:08:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						183dad7a85 
					 
					
						
						
							
							[Attention] Update to lastest FA3 code ( #13111 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-04-17 15:14:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3408e47159 
					 
					
						
						
							
							[P/D][V1] KV Connector API V1 ( #15960 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ApostaC <yihua98@uchicago.edu >
Signed-off-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com >
Signed-off-by: remi <remi@mistral.ai >
Co-authored-by: rshaw@neuralmagic.com  <robertgshaw2@gmail.com >
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com >
Co-authored-by: Rémi Delacourt <54138269+Flechman@users.noreply.github.com >
Co-authored-by: Tyler Michael Smith <tysmith@redhat.com > 
						
						
					 
					
						2025-04-17 13:22:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0377b8310b 
					 
					
						
						
							
							[MLA] Simplification to batch P/D reordering ( #16673 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-17 16:12:09 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e4755f7fac 
					 
					
						
						
							
							[V1][Metrics] Fix http metrics middleware ( #15894 )  
						
						 
						
						
						
						
					 
					
						2025-04-17 19:52:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						92edf35826 
					 
					
						
						
							
							[ROCM] enable aiter fused moe kernel for llama4 bf16 checkpoints ( #16674 )  
						
						 
						
						
						
						
					 
					
						2025-04-17 11:44:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eb5819b2d9 
					 
					
						
						
							
							[V1][TPU] Enable Top K ( #15489 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com >
Signed-off-by: Hyesoo Yang <hyeygit@gmail.com >
Co-authored-by: Hyesoo Yang <hyeygit@gmail.com > 
						
						
					 
					
						2025-04-17 18:18:11 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5989f4684d 
					 
					
						
						
							
							[TPU][V1] Fix padding recompilation when max-num-batched-tokens is not even ( #16726 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-17 18:09:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5125d72f02 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for olmoe,opt,orion,persimmon,phi3_small ( #16548 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-04-17 17:48:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a018e555fd 
					 
					
						
						
							
							[Kernel] Add fp8_w8a8 fused MoE kernel tuning configs for DeepSeek V3/R1 on NVIDIA H20 ( #16753 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: ximing.wxm <ximing.wxm@antgroup.com >
Co-authored-by: ximing.wxm <ximing.wxm@antgroup.com > 
						
						
					 
					
						2025-04-18 00:01:30 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6211b92273 
					 
					
						
						
							
							[Bugfix]Fix index out of range error in api server log ( #16787 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: WangErXiao <863579016@qq.com > 
						
						
					 
					
						2025-04-17 09:01:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05fcd1b430 
					 
					
						
						
							
							[V1][Perf] Faster incremental detokenization ( #15137 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-17 07:45:24 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c02d6a137 
					 
					
						
						
							
							[Doc] Changed explanation of generation_tokens_total and prompt_tokens_total counter type metrics to avoid confusion ( #16784 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: insukim1994 <insu.kim@moreh.io > 
						
						
					 
					
						2025-04-17 14:10:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						11c3b98491 
					 
					
						
						
							
							[Doc] Document Matryoshka Representation Learning support ( #16770 )  
						
						 
						
						
						
						
					 
					
						2025-04-17 13:37:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dbe7f07001 
					 
					
						
						
							
							[Doc] Make sure to update vLLM when installing latest code ( #16781 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-17 06:53:31 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c69bf4ee06 
					 
					
						
						
							
							fix: hyperlink ( #16778 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-17 11:34:20 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d27ea94034 
					 
					
						
						
							
							Improve configs - TokenizerPoolConfig + DeviceConfig ( #16603 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-17 11:19:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						99ed526101 
					 
					
						
						
							
							[Misc] refactor examples series - lmcache ( #16758 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-17 11:02:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						207da28186 
					 
					
						
						
							
							[Doc] Fix a 404 link in installation/cpu.md ( #16773 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: windsonsea <haifeng.yao@daocloud.io > 
						
						
					 
					
						2025-04-17 10:46:21 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5b1aca2ae3 
					 
					
						
						
							
							[Bugfix] Fix GLM4 model ( #16618 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: intervitens <intervitens@tutanota.com > 
						
						
					 
					
						2025-04-17 03:35:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d8e557b5e5 
					 
					
						
						
							
							[doc] add open-webui example ( #16747 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-17 18:27:32 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61a44a0b22 
					 
					
						
						
							
							[Doc] Add more tips to avoid OOM ( #16765 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-17 09:54:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a6481525b8 
					 
					
						
						
							
							[misc] ignore marlin_moe_wna16 local gen codes ( #16760 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DefTruth <qiustudent_r@163.com > 
						
						
					 
					
						2025-04-17 17:15:14 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8cac35ba43 
					 
					
						
						
							
							[Ray] Improve documentation on batch inference ( #16609 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Richard Liaw <rliaw@berkeley.edu > 
						
						
					 
					
						2025-04-16 22:19:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9dbf7a2dc1 
					 
					
						
						
							
							[V1] Remove log noise when idle ( #16735 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-16 21:34:08 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						607029e515 
					 
					
						
						
							
							[Bugfix] Revert max_prompt_len validation for decoder-only models. ( #16741 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: David Heineman <david@davidheineman.com > 
						
						
					 
					
						2025-04-16 21:33:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb072ce93b 
					 
					
						
						
							
							[Bugfix] Update Florence-2 tokenizer to make grounding tasks work ( #16734 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-17 04:17:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						95aca283b4 
					 
					
						
						
							
							[rocm][V0] fix selection logic for custom PA in V0 ( #16426 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Divakar Verma <divakar.verma@amd.com > 
						
						
					 
					
						2025-04-16 19:52:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2b05b8ce69 
					 
					
						
						
							
							[V1][Frontend] Improve Shutdown And Logs ( #11737 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rshaw@neuralmagic.com  <rshaw@neuralmagic.com >
Signed-off-by: Andrew Feldman <afeldman@neuralmagic.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: rshaw@neuralmagic.com  <rshaw@neuralmagic.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Russell Bryant <rbryant@redhat.com >
Co-authored-by: Andrew Feldman <afeldman@neuralmagic.com >
Co-authored-by: afeldman-nm <156691304+afeldman-nm@users.noreply.github.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-16 19:48:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3c776dcefb 
					 
					
						
						
							
							Adding vllm buildkite job for IBM Power ( #16679 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaruni Aggarwal <aaruniagg@gmail.com > 
						
						
					 
					
						2025-04-17 10:47:47 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2cbd4d2999 
					 
					
						
						
							
							[V1][Spec Dec Bug Fix] Respect Spec Dec Method Specification ( #16636 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Bryan Lu <yuzhelu@amazon.com > 
						
						
					 
					
						2025-04-16 19:47:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3092375e27 
					 
					
						
						
							
							[V1][Performance] Implement custom serializaton for MultiModalKwargs [Rebased] ( #16432 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Staszek Pasko <staszek@gmail.com >
Signed-off-by: Nick Hill <nhill@redhat.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-16 19:28:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3cd91dc955 
					 
					
						
						
							
							Help user create custom model for Transformers backend remote code models ( #16719 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-17 01:05:59 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8a7368e069 
					 
					
						
						
							
							[Misc] Remove redundant comment ( #16703 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com > 
						
						
					 
					
						2025-04-17 00:44:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93e561ec4d 
					 
					
						
						
							
							Improve error for structured output backend selection ( #16717 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-17 00:35:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e1b004839a 
					 
					
						
						
							
							[Hardware] Add processor inputs to platform validation ( #16680 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Joe Runde <Joseph.Runde@ibm.com > 
						
						
					 
					
						2025-04-16 09:28:42 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ee378f3d49 
					 
					
						
						
							
							[Model] support modernbert  ( #16648 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: 唯勤 <xsank.mz@alibaba-inc.com >
Co-authored-by: 唯勤 <xsank.mz@alibaba-inc.com > 
						
						
					 
					
						2025-04-16 05:30:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e82ee40de3 
					 
					
						
						
							
							[Bugfix][Kernel] fix potential cuda graph broken for merge_attn_states kernel ( #16693 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DefTruth <qiustudent_r@163.com > 
						
						
					 
					
						2025-04-16 03:31:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						facbe2a114 
					 
					
						
						
							
							[Doc] Improve OOM troubleshooting ( #16704 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-16 18:29:48 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7168920491 
					 
					
						
						
							
							[Misc] refactor examples series ( #16708 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-16 10:16:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						21378a2323 
					 
					
						
						
							
							[CI] Cleanup additional_dependencies: [toml] for pre-commit yapf hook ( #16405 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kay Yan <kay.yan@daocloud.io > 
						
						
					 
					
						2025-04-16 10:05:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						976711d9db 
					 
					
						
						
							
							[V1][Structured Output] Move xgrammar related utils to backend_xgrammar.py ( #16578 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shen-shanshan <467638484@qq.com > 
						
						
					 
					
						2025-04-16 17:01:36 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						44fa4d556c 
					 
					
						
						
							
							[ROCM] Bind triton version to 3.2 in requirements-built.txt  ( #16664 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Sage Moore <sage@neuralmagic.com > 
						
						
					 
					
						2025-04-16 14:05:28 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3ac98edcb1 
					 
					
						
						
							
							[Feature] add model aware kv ops helper ( #16020 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: billishyahao <bill.he@amd.com > 
						
						
					 
					
						2025-04-15 23:00:43 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						966c742ed2 
					 
					
						
						
							
							Disable remote caching when calling compile_fx ( #16611 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-04-15 22:18:28 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d7d05f4b6 
					 
					
						
						
							
							[Misc] Modify LRUCache touch ( #16689 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-16 04:51:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						96bb8aa68b 
					 
					
						
						
							
							[Bugfix] fix gpu docker image mis benchmarks dir ( #16628 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-04-15 21:21:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3badb0213b 
					 
					
						
						
							
							[Model] Add PLaMo2 ( #14323 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com >
Signed-off-by: shemmi <shemmi@preferred.jp >
Co-authored-by: Kento Nozawa <nzw0301@preferred.jp >
Co-authored-by: Hiroaki Mikami <mhiroaki@preferred.jp >
Co-authored-by: Calvin Metzger <metzger@preferred.jp > 
						
						
					 
					
						2025-04-15 19:31:30 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fdcb850f14 
					 
					
						
						
							
							[Misc] Enable vLLM to Dynamically Load LoRA from a Remote Server ( #10546 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Angky William <angkywilliam@Angkys-MacBook-Pro.local >
Co-authored-by: Angky William <angkywilliam@Angkys-MacBook-Pro.local > 
						
						
					 
					
						2025-04-15 22:31:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						54a66e5fee 
					 
					
						
						
							
							[Misc] Update compressed-tensors WNA16 to support zero-points ( #14211 )  
						
						 
						
						
						
						
					 
					
						2025-04-15 07:33:51 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						280d62b8a2 
					 
					
						
						
							
							[Kernel] Remove redundant Exp calculations ( #16123 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DefTruth <qiustudent_r@163.com > 
						
						
					 
					
						2025-04-15 12:58:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1666e66443 
					 
					
						
						
							
							Add "/server_info" endpoint in api_server to retrieve the vllm_config.  ( #16572 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Xihui Cang <xihuicang@gmail.com > 
						
						
					 
					
						2025-04-15 11:50:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1575c1701a 
					 
					
						
						
							
							[CI/Build] Fix LoRA OOM ( #16624 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-15 16:38:19 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6ae996a873 
					 
					
						
						
							
							[Misc] refactor argument parsing in examples ( #16635 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-15 08:05:30 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b590adfdc1 
					 
					
						
						
							
							Fix vLLM x torch.compile config caching ( #16491 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-04-14 23:11:11 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4fe16c75b 
					 
					
						
						
							
							Add vllm bench [latency, throughput] CLI commands ( #16508 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-14 23:10:35 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bc5dd4f669 
					 
					
						
						
							
							[Bugfix] Fix broken GritLM model and tests (missing pooling_metadata) ( #16631 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Pooya Davoodi <pooya.davoodi@parasail.io > 
						
						
					 
					
						2025-04-14 23:09:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dbb036cf61 
					 
					
						
						
							
							[Bugfix] Fix tests/kernels/test_mamba_ssm_ssd.py ( #16623 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com > 
						
						
					 
					
						2025-04-15 05:35:38 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						70e7ed841d 
					 
					
						
						
							
							[BugFix]: Update minimum pyzmq version ( #16549 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Taneem Ibrahim <taneem.ibrahim@gmail.com >
Co-authored-by: mgoin <michael@neuralmagic.com > 
						
						
					 
					
						2025-04-14 20:06:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d06ba4ed3f 
					 
					
						
						
							
							[Kernel] moe wna16 marlin kernel ( #14447 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com >
Co-authored-by: Michael Goin <michael@neuralmagic.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-14 20:05:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6b40996ae8 
					 
					
						
						
							
							[Core][Bugfix] Fix Offline MM Beam Search ( #16390 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-04-15 10:33:02 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d2020acac7 
					 
					
						
						
							
							config check sleep mode support oot platforms ( #16562 )  
						
						 
						
						
						
						
					 
					
						2025-04-14 16:31:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1eb3c2ed48 
					 
					
						
						
							
							[DOC][TPU] Add core idea about avoiding recompilation after warmup ( #16614 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-04-14 21:56:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c64ee87267 
					 
					
						
						
							
							[Hardware][TPU] Add torchvision to tpu dependency file ( #16616 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com > 
						
						
					 
					
						2025-04-14 17:50:46 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b1308b84a3 
					 
					
						
						
							
							[Model][VLM] Add Kimi-VL model support ( #16387 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: courage17340 <courage17340@163.com > 
						
						
					 
					
						2025-04-14 21:41:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7b5ecf79bd 
					 
					
						
						
							
							s390x: Fix PyArrow build and add CPU test script for Buildkite CI ( #16036 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nishan Acharya <Nishan.Acharya@ibm.com > 
						
						
					 
					
						2025-04-14 10:55:32 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9883a18859 
					 
					
						
						
							
							Fix triton install condition on CPU ( #16600 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-14 17:06:01 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b3f2fddd17 
					 
					
						
						
							
							[TPU][V1] Fix exponential padding when max-num-batched-tokens is not a power of 2 ( #16596 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-14 17:01:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa29841ede 
					 
					
						
						
							
							[Bugfix] Multi-modal caches not acting like LRU caches ( #16593 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-14 09:24:16 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6bf27affb6 
					 
					
						
						
							
							[fix]: Dockerfile.ppc64le fixes for opencv-python and hf-xet ( #16048 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Md. Shafi Hussain <Md.Shafi.Hussain@ibm.com > 
						
						
					 
					
						2025-04-14 17:08:39 +01:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1dd23386ec 
					 
					
						
						
							
							[Misc] Update usage with mooncake lib for kv transfer ( #16523 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com > 
						
						
					 
					
						2025-04-14 11:31:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7cbfc10943 
					 
					
						
						
							
							[Misc] refactor examples ( #16563 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-14 09:59:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce4ddd2d1a 
					 
					
						
						
							
							[Misc] remove warning if triton>=3.2.0 ( #16553 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DefTruth <qiustudent_r@163.com > 
						
						
					 
					
						2025-04-14 02:39:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e51929ebca 
					 
					
						
						
							
							Improve configs - SchedulerConfig ( #16533 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-14 17:24:16 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc1b4a6f13 
					 
					
						
						
							
							[Core][V0] Enable regex support with xgrammar ( #13228 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-14 10:13:38 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						63d2705edb 
					 
					
						
						
							
							[Benchmark][Bugfix] Fix SonnetDataset default values in benchmark_throughput.py ( #16556 )  
						
						 
						
						
						
						
					 
					
						2025-04-13 17:20:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d085a44082 
					 
					
						
						
							
							Enable PTPC FP8 for CompressedTensorsW8A8Fp8MoEMethod (triton fused_moe) ( #16537 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-13 14:55:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f49e5aff11 
					 
					
						
						
							
							[V1][Spec Decode] KV cache slots for eagle heads ( #16370 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: LiuXiaoxuanPKU <lilyliupku@gmail.com > 
						
						
					 
					
						2025-04-12 19:42:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6c11ecf8d3 
					 
					
						
						
							
							[Bugfix] Validate logit biases to prevent out of vocab ids crashing engine ( #16529 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ryan McConville <ryan@ryanmcconville.com > 
						
						
					 
					
						2025-04-12 20:19:19 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93e5f3c5fb 
					 
					
						
						
							
							[Perf] Optimize Preparing Inputs for GPU Model Runner ( #16484 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: snowcharm <snowcharmqq@gmail.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-12 22:54:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						70363bccfa 
					 
					
						
						
							
							Fix syntaxWarning: invalid escape sequence '\s' ( #16532 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jie Fu <jiefu@tencent.com > 
						
						
					 
					
						2025-04-12 14:39:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3cdc57669f 
					 
					
						
						
							
							[Misc] Delete redundant code ( #16530 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com >
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn > 
						
						
					 
					
						2025-04-12 11:21:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						68bb122eb4 
					 
					
						
						
							
							[MISC] Make GroupCoordinator compatible with out-of-tree devices ( #16464 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: hzji210@gmail.com  <hzji210@gmail.com > 
						
						
					 
					
						2025-04-12 09:20:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d9fc8cd9da 
					 
					
						
						
							
							[V1] Enable multi-input by default ( #15799 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-12 08:52:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f069f3ea74 
					 
					
						
						
							
							[Misc] Openai transcription client example use same Whisper model ( #16487 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-12 07:27:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c5bc0e7fcc 
					 
					
						
						
							
							[Misc] Update chat utils tests ( #16520 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-12 06:48:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4a3a518722 
					 
					
						
						
							
							fix: spelling ( #16466 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tianer Zhou <ezhoureal@gmail.com > 
						
						
					 
					
						2025-04-11 23:24:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fbf722c6e6 
					 
					
						
						
							
							[Frontend] support matryoshka representation / support embedding API dimensions ( #16331 )  
						
						 
						
						
						
						
					 
					
						2025-04-11 23:23:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e92d7085bf 
					 
					
						
						
							
							[Feature][V1] Add xgrammar to support minLength, maxLength with test ( #16516 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Leon Seidel <leon.seidel@fau.de > 
						
						
					 
					
						2025-04-11 23:22:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bd6028d6b0 
					 
					
						
						
							
							Optimized topk for topk=1 (Llama-4) ( #16512 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-12 14:21:08 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						802329dee9 
					 
					
						
						
							
							[Doc] Update Llama4 Model Names in Supported Models ( #16509 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-04-12 02:53:10 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						41cc883c29 
					 
					
						
						
							
							[BugFix] Handle non-contiguous tensors properly when serializing ( #16492 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-11 17:54:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						57504a4bcf 
					 
					
						
						
							
							[CI][Bugfix] Add mistral_tool_use to Ci ( #16517 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-11 17:52:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed4792c990 
					 
					
						
						
							
							[Doc] Fix link to vLLM blog ( #16519 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yuan Tang <terrytangyuan@gmail.com > 
						
						
					 
					
						2025-04-11 17:39:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87b836ba77 
					 
					
						
						
							
							Bugfix for PixtralHF models without spatial_merge_size ( #16513 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-11 23:32:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56c76c2e0e 
					 
					
						
						
							
							[Bugfix] clean up duplicated code ( #16485 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gogs <gogs@fake.local >
Co-authored-by: Gogs <gogs@fake.local > 
						
						
					 
					
						2025-04-11 23:19:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c09632a66c 
					 
					
						
						
							
							Update openai_compatible_server.md ( #16507 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Christian Sears <csears@redhat.com > 
						
						
					 
					
						2025-04-11 22:54:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a3bf8d4a2b 
					 
					
						
						
							
							[Kernel] Add tuned FusedMoE kernel config for Llama4 Scout, TP=8 on H100  ( #16488 )  
						
						 
						
						
						
						
					 
					
						2025-04-12 06:26:55 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						16eda8c43a 
					 
					
						
						
							
							[Frontend] Added chat templates for LLaMa4 pythonic tool calling ( #16463 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com >
Co-authored-by: Kai Wu <kaiwu@meta.com > 
						
						
					 
					
						2025-04-12 06:26:17 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cd77382ac1 
					 
					
						
						
							
							Improve configs - LoadConfig ( #16422 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-11 20:27:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						71b9cde010 
					 
					
						
						
							
							[Bugfix] handle alignment of encoder_seq_lens in mllama.py ( #14784 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com > 
						
						
					 
					
						2025-04-11 19:59:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5285589f37 
					 
					
						
						
							
							[Doc] Document InternVL3 support ( #16495 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-11 19:41:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f41647ee6b 
					 
					
						
						
							
							[Kernel] Support W8A8 channel-wise weights and per-token activations in triton fused_moe_kernel ( #16366 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-11 17:54:08 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4d022cbc75 
					 
					
						
						
							
							[TPU][V1] Make --disable_chunked_mm_input mandatory for serving MM models ( #16483 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-11 17:06:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						70de35a881 
					 
					
						
						
							
							Fix erroneous "model doesn't support compile" warning ( #16486 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rzou <zou3519@gmail.com > 
						
						
					 
					
						2025-04-11 16:24:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						34b2cf3b33 
					 
					
						
						
							
							[Hardware][Intel-Gaudi] Multi-step scheduling implementation for HPU ( #12779 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tomasz Zielinski <tomasz.zielinski@intel.com > 
						
						
					 
					
						2025-04-11 07:38:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9e90c9f73f 
					 
					
						
						
							
							[Bugfix] Fix bugs of running Quark quantized models ( #16236 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaow <chaow@amd.com > 
						
						
					 
					
						2025-04-11 10:18:32 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e9528f6dc6 
					 
					
						
						
							
							[Kernel] support merge_attn_states CUDA kernel, 3x speedup ( #16173 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DefTruth <qiustudent_r@163.com > 
						
						
					 
					
						2025-04-11 06:50:50 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						51baa9c333 
					 
					
						
						
							
							Don't install triton on ppc64le platform ( #16470 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-11 10:11:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						35e076b3a8 
					 
					
						
						
							
							[Misc] update api_client example ( #16459 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-11 10:05:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a26f59ccbc 
					 
					
						
						
							
							[Misc] Raise error for V1 not supporting Long LoRA. ( #16415 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-11 01:51:20 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						aa3b3d76e0 
					 
					
						
						
							
							Enforce valid max_num_batched_tokens when disable_chunked_mm_input=True ( #16447 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-11 08:09:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f7030df3be 
					 
					
						
						
							
							[Core][LoRA][1/N] Add LoRA for EncoderDecoderModelRunner ( #15990 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-11 15:32:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						905e91e9ac 
					 
					
						
						
							
							Revert "[Model] use AutoWeightsLoader for deepseek_v2, internlm2" ( #16453 )  
						
						 
						
						
						
						
					 
					
						2025-04-11 06:44:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f8f9c0ba62 
					 
					
						
						
							
							[Bugfix] Don't set an upper bound on repetition penalty ( #16403 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com >
Co-authored-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-11 14:19:40 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dda811021a 
					 
					
						
						
							
							[CPU][Bugfix] Fix CPU docker issues ( #16454 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jiang.li <jiang1.li@intel.com > 
						
						
					 
					
						2025-04-11 14:19:07 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						93195146ea 
					 
					
						
						
							
							[Bugfix][VLM] Fix failing Phi-4-MM multi-images tests and add vision-speech test ( #16424 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-11 04:57:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed37599544 
					 
					
						
						
							
							Update supported_hardware.md for TPU INT8 ( #16437 )  
						
						 
						
						
						
						
					 
					
						2025-04-11 12:28:07 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						99ef59cf7f 
					 
					
						
						
							
							[Llama4] Enable attention temperature tuning by default for long context (>32k) ( #16439 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com >
Co-authored-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-04-10 21:26:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d544d141ec 
					 
					
						
						
							
							update benchmark_serving_structured_output to include auto backend ( #16438 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-04-11 12:25:52 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3e397a9484 
					 
					
						
						
							
							check input length of sonnet samples ( #16423 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: alexey-belyakov <alexey.belyakov@intel.com > 
						
						
					 
					
						2025-04-11 10:15:06 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						268c325078 
					 
					
						
						
							
							Fix range_ratio Bug in RandomDataset ( #16126 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: jadewang21 <jadewangcn@outlook.com > 
						
						
					 
					
						2025-04-10 15:31:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3cc9af88ff 
					 
					
						
						
							
							[TPU][V1] Disable per-request seed/Generator ( #16172 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-10 17:05:44 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7cd0bd7212 
					 
					
						
						
							
							[Bugfix] Fix output token length check logic ( #16419 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: look <eeslook@163.com > 
						
						
					 
					
						2025-04-10 20:16:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						56d4aefa33 
					 
					
						
						
							
							[VLM] Avoid unnecessary dummy multimodal data during processing ( #16416 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-10 19:32:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dd143ef541 
					 
					
						
						
							
							[V1] Zero-copy tensor/ndarray serialization/transmission ( #13790 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-10 19:23:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						daefed052c 
					 
					
						
						
							
							[Model] Reduce redundant computations in mamba2 blocks for Bamba-9B ( #15423 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com >
Co-authored-by: Yu Chin Fabian Lim <flim@sg.ibm.com > 
						
						
					 
					
						2025-04-10 19:07:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5fbab20e02 
					 
					
						
						
							
							[Bugfix] Fix bug when dataset is json ( #15899 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-04-10 18:35:41 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e8224f3dca 
					 
					
						
						
							
							[V1][Spec Decode] Eagle Model loading ( #16035 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: LiuXiaoxuanPKU <lilyliupku@gmail.com > 
						
						
					 
					
						2025-04-10 11:21:48 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9665313c39 
					 
					
						
						
							
							[V1] Set structured output backend to auto by default ( #15724 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-10 17:53:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0c54fc7273 
					 
					
						
						
							
							Improve configs - ParallelConfig ( #16332 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-10 17:34:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c1b57855ec 
					 
					
						
						
							
							[TPU][V1] Use language_model interface for getting text backbone in MM ( #16410 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-10 17:32:04 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						83b824c8b4 
					 
					
						
						
							
							[VLM] Remove BaseProcessingInfo.get_mm_max_tokens_per_item ( #16408 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-10 09:06:58 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7678fcd5b6 
					 
					
						
						
							
							Fix the torch version parsing logic ( #15857 )  
						
						 
						
						
						
						
					 
					
						2025-04-10 07:37:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8661c0241d 
					 
					
						
						
							
							[CI] Add auto update workflow for Dockerfile graph ( #11879 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: wineandchord <guoqizhou19@gmail.com > 
						
						
					 
					
						2025-04-10 13:43:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ce8d6b75fc 
					 
					
						
						
							
							[doc] update the wrong link ( #16401 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-10 21:02:37 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						61de3ef74b 
					 
					
						
						
							
							[Model] Remove image mm limit for LLaMa4  ( #16365 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com > 
						
						
					 
					
						2025-04-10 09:36:27 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec1f9c8c91 
					 
					
						
						
							
							Update Numba to 0.61.2 ( #16376 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: cyy <cyyever@outlook.com > 
						
						
					 
					
						2025-04-10 07:59:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						65e09094c4 
					 
					
						
						
							
							[doc] add download model tips ( #16389 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-10 07:45:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c70cf0fe06 
					 
					
						
						
							
							[Kernel] Use moe_wna16 kernel for compressed tensors wna16 moe models ( #16038 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-10 15:08:47 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a5d11a54dc 
					 
					
						
						
							
							[Bugfix] Fix validation error for text-only Mllama 3.2 ( #16377 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-10 14:19:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3d4c87758e 
					 
					
						
						
							
							[Misc] Update transformers version limits of multi-modal tests ( #16381 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-09 23:03:33 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a9bd832fc5 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for deepseek_v2, internlm2 ( #16383 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Ang <aaron.angyd@gmail.com > 
						
						
					 
					
						2025-04-09 23:01:00 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						417bcefbae 
					 
					
						
						
							
							fix sonnet dataset sample when prefix len is very small ( #16379 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chenyaaang <chenyangli@google.com > 
						
						
					 
					
						2025-04-10 05:35:07 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						baada0e737 
					 
					
						
						
							
							[Bugfix][TPU] Fix TPU validate_request ( #16369 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Michael Goin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-10 12:55:12 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						82eb61dd4c 
					 
					
						
						
							
							[misc] use tqdm.auto where appropriate ( #16290 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Kitor <bkitor@gigaio.com > 
						
						
					 
					
						2025-04-09 21:54:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0d4d06fe2f 
					 
					
						
						
							
							[CI][Bugfix] Pin triton version for CPU ( #16384 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <ywang@roblox.com > 
						
						
					 
					
						2025-04-10 04:35:00 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4aed0ca6a2 
					 
					
						
						
							
							[bugfix] Avoid the time consumption caused by creating dummy videos. ( #16371 )  
						
						 
						
						
						
						
					 
					
						2025-04-10 04:30:05 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1621b25288 
					 
					
						
						
							
							[TPU] Fix dummy loading OOM ( #16372 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-04-10 04:06:16 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a564797151 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for granite, granitemoe, granitemoeshared, grok1, mixtral ( #16325 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aaron Ang <aaron.angyd@gmail.com > 
						
						
					 
					
						2025-04-09 20:07:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1da6a09274 
					 
					
						
						
							
							[Bugfix]: do not shutdown server if skip_special_use=False for MistralTokenizer ( #14094 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-04-09 19:43:09 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1e44ffc3ff 
					 
					
						
						
							
							Add GLM-4-0414 support ( #16338 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: lvfei.lv <lvfei.lv@alibaba-inc.com >
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: yihong0618 <zouzou0208@gmail.com >
Signed-off-by: Lu Fang <fanglu@fb.com >
Signed-off-by: Ajay Vohra <ajayvohr@amazon.com >
Signed-off-by: NickLucche <nlucches@redhat.com >
Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com >
Co-authored-by: Accelerator1996 <lvfei.lv@alibaba-inc.com >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk >
Co-authored-by: Michael Goin <michael@neuralmagic.com >
Co-authored-by: yihong <zouzou0208@gmail.com >
Co-authored-by: Lucia Fang <116399278+luccafong@users.noreply.github.com >
Co-authored-by: ajayvohra2005 <ajayvohr@amazon.com >
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com >
Co-authored-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-04-10 09:19:42 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a454748544 
					 
					
						
						
							
							[TPU][V1] Refine tpu_model_runner to mitigate future recompilation issues ( #16275 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-04-09 18:51:51 -06:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1bff42c4b7 
					 
					
						
						
							
							[Misc] refactor Structured Outputs example ( #16322 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-09 23:32:42 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb391d85dc 
					 
					
						
						
							
							[Hardware] add platform-specific request validation api ( #16291 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Joe Runde <Joseph.Runde@ibm.com > 
						
						
					 
					
						2025-04-09 12:50:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fee5b8d37f 
					 
					
						
						
							
							[Build/CI] Add tracing deps to vllm container image ( #15224 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-09 19:14:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b2ce859bd2 
					 
					
						
						
							
							Fix benchmark_throughput.py --backend=hf ( #16352 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-09 19:09:28 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						566f10a929 
					 
					
						
						
							
							[CI]Fix hpu docker and numpy version for CI ( #16355 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chendi Xue <chendi.xue@intel.com > 
						
						
					 
					
						2025-04-09 17:52:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c3b5189137 
					 
					
						
						
							
							[Bugfix] catch AssertionError in MistralTokenizer as ValueError ( #16344 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-04-09 17:33:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a25866ac8d 
					 
					
						
						
							
							[Bugfix] Fix profiling.py ( #16202 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: zh Wang <rekind133@outlook.com > 
						
						
					 
					
						2025-04-09 17:03:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						098900d7c2 
					 
					
						
						
							
							Revert "Update label-tpu mergify and remove removal bot" ( #16350 )  
						
						 
						
						
						
						
					 
					
						2025-04-09 07:59:36 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						98d01d3ce2 
					 
					
						
						
							
							[Bugfix][Frontend] respect provided default guided decoding backend ( #15476 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com > 
						
						
					 
					
						2025-04-09 05:11:10 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d55244df31 
					 
					
						
						
							
							[Model] Add SupportsMultiModal.get_language_model interface ( #16007 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com > 
						
						
					 
					
						2025-04-09 04:12:54 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						04149cce27 
					 
					
						
						
							
							[BugFix] fix some typos found by typos. ( #16314 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yihong0618 <zouzou0208@gmail.com > 
						
						
					 
					
						2025-04-09 03:43:59 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24834f4894 
					 
					
						
						
							
							update neuron config ( #16289 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ajay Vohra <ajayvohr@amazon.com > 
						
						
					 
					
						2025-04-09 03:43:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ec7da6fcf3 
					 
					
						
						
							
							[BugFix] llama4 qknorm should be not shared across head ( #16311 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com > 
						
						
					 
					
						2025-04-09 00:59:14 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						819d548e8a 
					 
					
						
						
							
							[BugFix] logger is not callable ( #16312 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yihong0618 <zouzou0208@gmail.com > 
						
						
					 
					
						2025-04-09 00:59:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						477d2a8aa2 
					 
					
						
						
							
							Update label-tpu mergify and remove removal bot ( #16298 )  
						
						 
						
						
						
						
					 
					
						2025-04-09 07:56:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e484e02857 
					 
					
						
						
							
							[Bugfix] Avoid transferring cached multi-modal items from P0 to P1 ( #16273 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-09 00:51:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24f6b9a713 
					 
					
						
						
							
							[Misc] Fix test_sharded_state_loader.py( #16004 ) ( #16005 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: lvfei.lv <lvfei.lv@alibaba-inc.com > 
						
						
					 
					
						2025-04-09 14:47:30 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9cdde47289 
					 
					
						
						
							
							[BugFix] Fix fusion test and add them to CI ( #16287 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: luka <luka@neuralmagic.com > 
						
						
					 
					
						2025-04-08 23:46:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b1eb4ca152 
					 
					
						
						
							
							[TPU] Update PyTorch/XLA ( #16288 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-04-09 14:46:32 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87b4ac56c2 
					 
					
						
						
							
							[CI][Bugfix] Fix bad tolerance for test_batch_base64_embedding ( #16221 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-09 04:14:46 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						cb84e45ac7 
					 
					
						
						
							
							[Core] Upgrade to xgrammar 0.1.18, add cache size limit ( #16283 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-08 19:13:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4716377fbc 
					 
					
						
						
							
							[Feature] Estimate max-model-len use available KV cache memory ( #16168 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-04-08 19:12:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4e9cf8c1dd 
					 
					
						
						
							
							[Bugfix] fix gettid method is not define ( #16084 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-04-08 19:12:44 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2976dc27e9 
					 
					
						
						
							
							[Bug] [ROCm] Fix Llama 4 Enablement Bug on ROCm: V0 ROCmFlashAttentionImpl and Triton Fused MoE bugs ( #16198 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com >
Signed-off-by: kliuae <kuanfu.liu@embeddedllm.com >
Co-authored-by: Hongxia Yang <hongxia.yang@amd.com >
Co-authored-by: kliuae <kuanfu.liu@embeddedllm.com > 
						
						
					 
					
						2025-04-08 19:12:34 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						102bf967f0 
					 
					
						
						
							
							[Model] Add smolvlm support ( #16017 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-08 19:12:17 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f4b09b525 
					 
					
						
						
							
							Add support to modelopt quantization of Mixtral model ( #15961 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Yue <yueshen@nvidia.com > 
						
						
					 
					
						2025-04-09 01:53:31 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86c3369eb8 
					 
					
						
						
							
							[CI/Build] Fix CI LoRA failure ( #16270 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jee Jee Li <pandaleefree@gmail.com > 
						
						
					 
					
						2025-04-09 09:13:56 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2755c34a8f 
					 
					
						
						
							
							[V1] Update structured output offline inference example ( #15721 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Russell Bryant <rbryant@redhat.com > 
						
						
					 
					
						2025-04-08 22:34:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						db10422184 
					 
					
						
						
							
							[Bugfix] fix deepseek fp16 scale bug ( #14809 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-08 16:56:09 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e1a2c699dd 
					 
					
						
						
							
							[BugFix] Fix Llama4 - Index Error When Single Request Near Max Context ( #16209 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com > 
						
						
					 
					
						2025-04-08 18:56:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0115ccd5c0 
					 
					
						
						
							
							Add warning that content below line in template will be removed ( #16276 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-08 18:18:40 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						40b4284fe3 
					 
					
						
						
							
							[Bugfix] Handle process_weights_after_loading for QKVCrossParallelLinear ( #15328 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-08 10:02:23 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4ebc0b9640 
					 
					
						
						
							
							[Bugfix] Proper input validation for multi-modal encoder-decoder models ( #16156 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-08 09:45:21 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc96fd54c6 
					 
					
						
						
							
							[Misc] Avoid stripping meaningful whitespace from nvidia-smi topo -m output in collect_env.py ( #16272 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: imkero <kerorek@outlook.com > 
						
						
					 
					
						2025-04-08 16:08:09 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1f5d13ab9f 
					 
					
						
						
							
							[New Model]: jinaai/jina-embeddings-v3 ( #16120 )  
						
						 
						
						
						
						
					 
					
						2025-04-08 08:39:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						90cb44eb02 
					 
					
						
						
							
							Update to transformers==4.51.1 ( #16257 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-08 06:53:39 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e11880deea 
					 
					
						
						
							
							[Bugfix] Remove triton do_bench fast_flush arg ( #16256 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-04-08 13:51:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9351f91be9 
					 
					
						
						
							
							[BugFix][ROCm] Fix GGUF MoE Dispatch Block_Dim for ROCm ( #16247 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tianyuan Wu <Tianyuan.Wu@amd.com > 
						
						
					 
					
						2025-04-08 05:10:26 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						5a1e1c8353 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for phimoe,qwen2_moe,qwen3_moe ( #16203 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-04-08 04:05:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						69ecaa7c79 
					 
					
						
						
							
							[Misc] Add warning for multimodal data in LLM.beam_search ( #16241 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com > 
						
						
					 
					
						2025-04-08 04:05:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f00899ff7 
					 
					
						
						
							
							[Misc] format and refactor some examples ( #16252 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-08 10:42:32 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						995e3d1f41 
					 
					
						
						
							
							[Docs] Add Slides from Singapore Meetup ( #16213 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: simon-mo <simon.mo@hey.com > 
						
						
					 
					
						2025-04-08 07:20:22 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b4ac449a83 
					 
					
						
						
							
							[Misc] Merge the logs of pp layers partitions ( #16225 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kebe <mail@kebe7jun.com > 
						
						
					 
					
						2025-04-08 00:18:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8e5314a468 
					 
					
						
						
							
							[V1] Add disable_chunked_mm_input arg to disable partial mm input prefill ( #15837 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-07 23:24:07 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						87918e40c4 
					 
					
						
						
							
							[torch.compile][TPU] Make @support_torch_compile work for XLA backend ( #15782 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Siyuan Liu <lsiyuan@google.com >
Signed-off-by: mgoin <mgoin64@gmail.com >
Co-authored-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-08 14:23:53 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f6b32efb7f 
					 
					
						
						
							
							[Bugfix] Fix and reorganize broken GGUF tests and bump gguf version ( #16194 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-08 13:38:13 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b99733d092 
					 
					
						
						
							
							[Bugfix] Do not skip "empty" parts of chats that are parsable ( #16219 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <mgoin64@gmail.com > 
						
						
					 
					
						2025-04-08 05:14:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						05a015d6a5 
					 
					
						
						
							
							Add warning for Attention backends that do not support irope yet ( #16212 )  
						
						 
						
						
						
						
					 
					
						2025-04-08 03:59:26 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad971af8c7 
					 
					
						
						
							
							[Bugfix] fix use-ep bug to enable ep by dp/tp size > 1 ( #16161 )  
						
						 
						
						
						
						
					 
					
						2025-04-07 20:48:47 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f2ebb6f541 
					 
					
						
						
							
							[V1] Scatter and gather placeholders in the model runner ( #16076 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Signed-off-by: mgoin <mgoin64@gmail.com >
Signed-off-by: Roger Wang <ywang@roblox.com >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: mgoin <mgoin64@gmail.com >
Co-authored-by: Jennifer Zhao <ai.jenniferzhao@gmail.com > 
						
						
					 
					
						2025-04-08 10:43:41 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						1d01211264 
					 
					
						
						
							
							Update BASE_IMAGE to 2.22 release of Neuron ( #16218 )  
						
						 
						
						
						
						
					 
					
						2025-04-07 19:11:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						f94ab12f79 
					 
					
						
						
							
							[Misc] Update compressed-tensors to version 0.9.3 ( #16196 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Miles Williams <42222518+mlsw@users.noreply.github.com > 
						
						
					 
					
						2025-04-07 19:09:06 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						a865bc1ca6 
					 
					
						
						
							
							[core] do not send error across process ( #16174 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: youkaichao <youkaichao@gmail.com > 
						
						
					 
					
						2025-04-07 19:09:03 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						21802c4b6d 
					 
					
						
						
							
							[ROCm][Bugfix][FP8] Make fp8 quant respect fused modules mapping ( #16031 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: mgoin <michael@neuralmagic.com > 
						
						
					 
					
						2025-04-07 21:28:14 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						652907b354 
					 
					
						
						
							
							Torchao ( #14231 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: drisspg <drisspguessous@gmail.com > 
						
						
					 
					
						2025-04-07 19:39:28 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						24f1c01e0f 
					 
					
						
						
							
							[Bugfix][V0] XGrammar structured output supports Enum ( #15878 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Leon Seidel <leon.seidel@fau.de > 
						
						
					 
					
						2025-04-07 22:38:25 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fad6e2538e 
					 
					
						
						
							
							[Misc] add description attribute in CLI ( #15921 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-07 22:30:35 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7f6d47c1a2 
					 
					
						
						
							
							[V1][BugFix] Exit properly if engine core fails during startup ( #16137 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Nick Hill <nhill@redhat.com > 
						
						
					 
					
						2025-04-07 15:30:15 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3147586ebd 
					 
					
						
						
							
							[Bugfix] Fix guidance backend for Qwen models ( #16210 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai > 
						
						
					 
					
						2025-04-07 22:15:43 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ed636d99ca 
					 
					
						
						
							
							[Misc] Move Llama 4 projector call into encoder execution ( #16201 )  
						
						 
						
						
						
						
					 
					
						2025-04-07 14:02:05 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						090c856d76 
					 
					
						
						
							
							[Misc] Human-readable max-model-len cli arg ( #16181 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: NickLucche <nlucches@redhat.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-07 14:40:58 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ad434d4cfe 
					 
					
						
						
							
							Print the warning only once ( #16193 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com > 
						
						
					 
					
						2025-04-07 18:30:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						66d433b94f 
					 
					
						
						
							
							[V1] Revert the default max_num_seqs to V0 values for most hardware ( #16158 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-07 13:54:36 -04:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						027b204ff1 
					 
					
						
						
							
							[Bugfix] Re-enable support for ChatGLMForConditionalGeneration ( #16187 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-07 23:15:58 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						55dcce91df 
					 
					
						
						
							
							Upstream Llama4 Support to Main ( #16113 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com >
Signed-off-by: Chris Thi <chris.c.thi@gmail.com >
Signed-off-by: drisspg <drisspguessous@gmail.com >
Signed-off-by: Jon Swenson <jmswen@gmail.com >
Signed-off-by: Keyun Tong <tongkeyun@gmail.com >
Signed-off-by: Lu Fang <fanglu@meta.com >
Signed-off-by: Xiaodong Wang <xdwang@meta.com >
Signed-off-by: Yang Chen <yangche@fb.com >
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com >
Signed-off-by: Yong Hoon Shin <yhshin@meta.com >
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com >
Signed-off-by: Lu Fang <lufang@fb.com >
Signed-off-by: Lu Fang <fanglu@fb.com >
Signed-off-by: Lucia Fang <fanglu@fb.com >
Signed-off-by: Roger Wang <ywang@roblox.com >
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk >
Co-authored-by: Lu Fang <fanglu@fb.com >
Co-authored-by: Roger Wang <ywang@roblox.com >
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-07 08:06:27 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						8017c8db7f 
					 
					
						
						
							
							[Doc]Update image to latest version ( #16186 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: WangErXiao <863579016@qq.com > 
						
						
					 
					
						2025-04-07 14:17:39 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						dc3529dbf6 
					 
					
						
						
							
							[Misc] improve example mlpspeculator and llm_engine_example ( #16175 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-07 11:53:52 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7699258ef0 
					 
					
						
						
							
							[Model] Add Qwen3 and Qwen3MoE ( #15289 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: YamPengLi <yampayne.lyp@alibaba-inc.com >
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com > 
						
						
					 
					
						2025-04-07 04:06:41 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						e9ba99f296 
					 
					
						
						
							
							[V1][Structured Output] Add supports_structured_output() method to Platform ( #16148 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: shen-shanshan <467638484@qq.com > 
						
						
					 
					
						2025-04-07 11:06:24 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						7c80368710 
					 
					
						
						
							
							[VLM] Florence-2 supports online serving ( #16164 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-07 04:04:02 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						95d63f38c0 
					 
					
						
						
							
							doc: fix some typos in doc ( #16154 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yihong0618 <zouzou0208@gmail.com > 
						
						
					 
					
						2025-04-07 05:32:06 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						bb8dab821e 
					 
					
						
						
							
							[CI] Set max transformers version for Ultravox model test  ( #16149 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <ywang@roblox.com > 
						
						
					 
					
						2025-04-07 04:37:58 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						fc0f87768a 
					 
					
						
						
							
							[Bugfix] Make dummy encoder prompt padding alternative and add missing warnings ( #16129 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-07 04:07:15 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0a57386721 
					 
					
						
						
							
							[Misc] Update Mistral-3.1 example ( #16147 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk > 
						
						
					 
					
						2025-04-07 03:57:37 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3749e28774 
					 
					
						
						
							
							[V1][Minor] Minor simplification for get_computed_blocks  ( #16139 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu > 
						
						
					 
					
						2025-04-06 20:38:12 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						86fc2321ff 
					 
					
						
						
							
							[Metrics] Add bucket for request_latency, time_to_first_token and time_per_output_token ( #15202 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Kay Yan <kay.yan@daocloud.io > 
						
						
					 
					
						2025-04-06 20:34:51 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2549c0dfef 
					 
					
						
						
							
							Fix requires-python ( #16132 )  
						
						 
						
						
						
						
					 
					
						2025-04-06 19:22:25 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b10e519895 
					 
					
						
						
							
							[V1][Minor] Optimize get_cached_block ( #16135 )  
						
						 
						
						
						
						
					 
					
						2025-04-06 20:48:14 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9bde5ba127 
					 
					
						
						
							
							[TPU] Update PyTorch/XLA ( #16130 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Chengji Yao <chengjiyao@google.com > 
						
						
					 
					
						2025-04-06 18:25:55 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						72c8f1ad04 
					 
					
						
						
							
							[Misc] update requires-python in pyproject.toml ( #16116 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-06 14:56:34 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						da224daaa9 
					 
					
						
						
							
							[Bugfix] add hf_token to EngineArgs ( #16093 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: paolovic <paul-philipp.luley@uzh.ch >
Co-authored-by: paolovic <paul-philipp.luley@uzh.ch > 
						
						
					 
					
						2025-04-06 14:47:33 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						3a100b9278 
					 
					
						
						
							
							[Bugfix] LoRA : Fix the order in which the kernels process LoRAs  ( #16040 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Varun Sundar Rabindranath <varun@neuralmagic.com >
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com > 
						
						
					 
					
						2025-04-06 14:04:50 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						242a637aea 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for stablelm,starcoder2,zamba2 ( #16103 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io > 
						
						
					 
					
						2025-04-06 05:52:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						c2a9671510 
					 
					
						
						
							
							[Misc] Improve model redirect to accept json dictionary ( #16119 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Isotr0py <2037008807@qq.com > 
						
						
					 
					
						2025-04-06 05:51:45 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d5ae4f7f42 
					 
					
						
						
							
							[Doc][Bugfix] Add missing EOF in k8s deploy doc ( #16025 )  
						
						 
						
						
						
						
					 
					
						2025-04-06 12:10:57 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						b6c502a150 
					 
					
						
						
							
							[Misc] refactor example eagle ( #16100 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-06 09:42:48 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						9ca710e525 
					 
					
						
						
							
							[CI][V1] Fix passing tokenizer as kwarg to validate_guidance_grammar ( #16117 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Roger Wang <ywang@roblox.com > 
						
						
					 
					
						2025-04-06 16:18:00 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						eb07c8cb5b 
					 
					
						
						
							
							[Frontend] Fix typo in tool chat templates for llama3.2 and toolace ( #14501 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Ben Jackson <ben@ben.com > 
						
						
					 
					
						2025-04-06 07:44:36 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						ba10801961 
					 
					
						
						
							
							[Benchmark] Add sampling parameters to benchmark_serving. ( #16022 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Hyesoo Yang <hyeygit@gmail.com > 
						
						
					 
					
						2025-04-06 12:30:35 +08:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						620fc2d09e 
					 
					
						
						
							
							[Model] fix model testing for TeleChat2ForCausalLM and V0 llama4 ( #16112 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Lu Fang <fanglu@fb.com > 
						
						
					 
					
						2025-04-05 21:23:40 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						29283eaa7e 
					 
					
						
						
							
							[Model] use AutoWeightsLoader for phi, gemma, deepseek ( #16088 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jonghyun Choe <andy.choe729@gmail.com > 
						
						
					 
					
						2025-04-05 20:34:38 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						2fa66ef713 
					 
					
						
						
							
							[Bugfix] fix use_atomic_add support of marlin kernel when using v1 engine ( #15946 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Jinzhen Lin <linjinzhen@hotmail.com > 
						
						
					 
					
						2025-04-05 20:04:22 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						13affc432d 
					 
					
						
						
							
							[Misc] Remove redundant code ( #16098 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com > 
						
						
					 
					
						2025-04-05 20:03:50 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d8f094a92a 
					 
					
						
						
							
							[Misc] format output for encoder_decoder.py ( #16095 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-05 19:57:18 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						97ae6d777f 
					 
					
						
						
							
							Fix some capitalisations in generated examples doc titles ( #16094 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com > 
						
						
					 
					
						2025-04-05 13:44:03 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6baeee70d1 
					 
					
						
						
							
							Revert "doc: add info for macos clang errors ( #16049 )" ( #16091 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yihong0618 <zouzou0208@gmail.com > 
						
						
					 
					
						2025-04-05 11:51:51 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						d2517a4939 
					 
					
						
						
							
							[doc] fix 404 ( #16082 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: reidliu41 <reid201711@gmail.com >
Co-authored-by: reidliu41 <reid201711@gmail.com > 
						
						
					 
					
						2025-04-05 11:39:18 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						6342adc438 
					 
					
						
						
							
							fix: support clang17 for macos and fix the real libomp ( #16086 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: yihong0618 <zouzou0208@gmail.com > 
						
						
					 
					
						2025-04-05 11:00:12 +00:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						0adba91547 
					 
					
						
						
							
							[CI] Fix benchmark script level ( #16089 )  
						
						 
						
						
						
						
					 
					
						2025-04-05 03:36:01 -07:00  
					
					
						 
						
						
							
							
							
							
							
							 
						
					 
				 
			
				
					
						
					 
					
						
						
							
						
						4285e423a6 
					 
					
						
						
							
							[Misc] Auto detect bitsandbytes pre-quantized models ( #16027 )  
						
						 
						
						... 
						
						
						
						Signed-off-by: Tristan Leclercq <tristanleclercq@gmail.com > 
						
						
					 
					
						2025-04-04 23:30:45 -07:00