Bipartite KVCache
pip install transformers
pip install sentencepiece
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
pip install vllm
cd Bi-KV
KVCACHE_NUM=5 WORKER_NUM=5 python init.py # set number of kvcache and worker
cd Bi-KV
bash run.sh