CMB-AI-LAB
/

lagkv_cache

kyleliang commited on Jun 4

Commit

dafad47

verified ·

1 Parent(s): d9addd5

Upload folder using huggingface_hub

Files changed (1) hide show

custom_generate/generate.py CHANGED Viewed

@@ -17,6 +17,15 @@ from transformers import DynamicCache, GenerationConfig
 from typing import Any, Dict, List, Optional, Tuple
 class LagKVCache(DynamicCache):
     """
     A KV compression algorithm that as described in the [LagKV paper](https://arxiv.org/abs/2504.04704).

 from typing import Any, Dict, List, Optional, Tuple
+UNSUPPORTED_GENERATION_ARGS = [
+    "cache_implementation",  # cache-related arguments, here we always use SinkCache
+    "cache_config",
+    "return_legacy_cache",
+    "num_beams",  # beam search (and cousin techniques) are not supported
+    "compile_config",  # SinkCache doesn't support torch.compile
+    "assistant_model",  # it also doesn't support speculative decoding
+]
 class LagKVCache(DynamicCache):
     """
     A KV compression algorithm that as described in the [LagKV paper](https://arxiv.org/abs/2504.04704).