{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":683573931,"defaultBranch":"main","name":"Awesome-LLM-Inference","ownerLogin":"DefTruth","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-08-27T02:32:15.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/31974251?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1726646816.0","currentOid":""},"activityList":{"items":[{"before":"e52134c6be6b25205a8e595790bddba5aa7d633e","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-18T08:06:56.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"bae75a8170977a1a9c7946572083c95054922198","after":null,"ref":"refs/heads/DefTruth-patch-2","pushedAt":"2024-09-18T08:06:46.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"829da5ab59be37c591d12194ac5c2fc12bd65f0a","after":"5dc2039ddd7bd03e56713a53ccb126a1b7fecbcb","ref":"refs/heads/main","pushedAt":"2024-09-18T08:06:43.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[InstInfer] InstInfer: In-Storage Attention Offloading for Cost-Effective Long-Context LLM Inference (#65)","shortMessageHtmlLink":"🔥[InstInfer] InstInfer: In-Storage Attention Offloading for Cost-Effe…"}},{"before":null,"after":"bae75a8170977a1a9c7946572083c95054922198","ref":"refs/heads/DefTruth-patch-2","pushedAt":"2024-09-18T08:06:27.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[InstInfer] InstInfer: In-Storage Attention Offloading for Cost-Effective Long-Context LLM Inference","shortMessageHtmlLink":"🔥[InstInfer] InstInfer: In-Storage Attention Offloading for Cost-Effe…"}},{"before":"72477708428a8f546247979d5289d514b78275a2","after":"829da5ab59be37c591d12194ac5c2fc12bd65f0a","ref":"refs/heads/main","pushedAt":"2024-09-18T05:09:50.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Bump up to v2.4 (#64)","shortMessageHtmlLink":"Bump up to v2.4 (#64)"}},{"before":null,"after":"e52134c6be6b25205a8e595790bddba5aa7d633e","ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-18T05:09:38.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Bump up to v2.4","shortMessageHtmlLink":"Bump up to v2.4"}},{"before":"2779d7255b076e527cba2e91ebe69ac19f6b5237","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-17T08:10:48.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"efb983bb3bfa58e318233c840d36c5cb98e2ca3d","after":"72477708428a8f546247979d5289d514b78275a2","ref":"refs/heads/main","pushedAt":"2024-09-17T08:10:45.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[Inf-MLLM] Inf-MLLM: Efficient Streaming Inference of Multimodal Large Language Models on a Single GPU (#63)","shortMessageHtmlLink":"🔥[Inf-MLLM] Inf-MLLM: Efficient Streaming Inference of Multimodal Lar…"}},{"before":null,"after":"2779d7255b076e527cba2e91ebe69ac19f6b5237","ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-17T08:10:28.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[Inf-MLLM] Inf-MLLM: Efficient Streaming Inference of Multimodal Large Language Models on a Single GPU","shortMessageHtmlLink":"🔥[Inf-MLLM] Inf-MLLM: Efficient Streaming Inference of Multimodal Lar…"}},{"before":"f37cac9185e51bc6c1ca07111583a5c3521977eb","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-17T05:30:39.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"f0860e8477e24ddb8bb39bcb52e6e19d9116f150","after":"efb983bb3bfa58e318233c840d36c5cb98e2ca3d","ref":"refs/heads/main","pushedAt":"2024-09-17T05:30:09.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[RetrievalAttention] Accelerating Long-Context LLM Inference via Vector Retrieval (#62)","shortMessageHtmlLink":"🔥[RetrievalAttention] Accelerating Long-Context LLM Inference via Vec…"}},{"before":null,"after":"f37cac9185e51bc6c1ca07111583a5c3521977eb","ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-17T05:29:19.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[RetrievalAttention] Accelerating Long-Context LLM Inference via Vector Retrieval","shortMessageHtmlLink":"🔥[RetrievalAttention] Accelerating Long-Context LLM Inference via Vec…"}},{"before":"a0ab6088d8f06b1ab305a4149a3f4c2e6e2f334f","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-09T01:25:00.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"9361bbfe09fa92a91e685182c7a75c0eab36d020","after":"f0860e8477e24ddb8bb39bcb52e6e19d9116f150","ref":"refs/heads/main","pushedAt":"2024-09-09T01:24:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Bump up to v2.3 (#61)","shortMessageHtmlLink":"Bump up to v2.3 (#61)"}},{"before":null,"after":"a0ab6088d8f06b1ab305a4149a3f4c2e6e2f334f","ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-09T01:24:44.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Bump up to v2.3","shortMessageHtmlLink":"Bump up to v2.3"}},{"before":"b1792778d9b0fd090b19eb88156cb26f9706f2c3","after":"9361bbfe09fa92a91e685182c7a75c0eab36d020","ref":"refs/heads/main","pushedAt":"2024-09-09T01:23:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[TEE]Confidential Computing on nVIDIA H100 GPU: A Performance Benchmark Study","shortMessageHtmlLink":"🔥[TEE]Confidential Computing on nVIDIA H100 GPU: A Performance Benchm…"}},{"before":"671dcc07722bdbab4cec42923401e2510923d3ba","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-05T01:40:26.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"9df7bcf142524889447edd600efbd9b4d7f19cc6","after":"b1792778d9b0fd090b19eb88156cb26f9706f2c3","ref":"refs/heads/main","pushedAt":"2024-09-05T01:40:23.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[SpMM] High Performance Unstructured SpMM Computation Using Tensor Cores (#60)","shortMessageHtmlLink":"🔥[SpMM] High Performance Unstructured SpMM Computation Using Tensor C…"}},{"before":null,"after":"671dcc07722bdbab4cec42923401e2510923d3ba","ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-05T01:40:07.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[SpMM] High Performance Unstructured SpMM Computation Using Tensor Cores","shortMessageHtmlLink":"🔥[SpMM] High Performance Unstructured SpMM Computation Using Tensor C…"}},{"before":"f34588c6bde52e9626b83c3df30b983943bfaf46","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-05T01:35:35.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"6d7e9f8aeae4a7cde8f697748902b61977f2f5e2","after":"9df7bcf142524889447edd600efbd9b4d7f19cc6","ref":"refs/heads/main","pushedAt":"2024-09-05T01:35:32.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[CHESS] CHESS : Optimizing LLM Inference via Channel-Wise Thresholding and Selective Sparsification (#59)","shortMessageHtmlLink":"🔥[CHESS] CHESS : Optimizing LLM Inference via Channel-Wise Thresholdi…"}},{"before":null,"after":"f34588c6bde52e9626b83c3df30b983943bfaf46","ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-05T01:35:13.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥[CHESS] CHESS : Optimizing LLM Inference via Channel-Wise Thresholding and Selective Sparsification","shortMessageHtmlLink":"🔥[CHESS] CHESS : Optimizing LLM Inference via Channel-Wise Thresholdi…"}},{"before":"11a744680be5a2c4ba74a2697fa02ac9abd694bf","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-04T06:21:41.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"445dab92015d9d20d8ef4822038b0b9bbaaa2fe2","after":"6d7e9f8aeae4a7cde8f697748902b61977f2f5e2","ref":"refs/heads/main","pushedAt":"2024-09-04T06:21:38.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Bump up to v2.2 (#58)","shortMessageHtmlLink":"Bump up to v2.2 (#58)"}},{"before":null,"after":"11a744680be5a2c4ba74a2697fa02ac9abd694bf","ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-04T06:21:27.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Bump up to v2.2","shortMessageHtmlLink":"Bump up to v2.2"}},{"before":"42ff7958412e03191b1efe70e736774748442277","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-04T06:20:20.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"e1ec28220acf8552e9f6a048075ecb2106dab4ce","after":"445dab92015d9d20d8ef4822038b0b9bbaaa2fe2","ref":"refs/heads/main","pushedAt":"2024-09-04T06:20:17.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥🔥[Context Distillation] Efficient LLM Context Distillation (#57)","shortMessageHtmlLink":"🔥🔥[Context Distillation] Efficient LLM Context Distillation (#57)"}},{"before":null,"after":"42ff7958412e03191b1efe70e736774748442277","ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-04T06:20:06.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥🔥[Context Distillation] Efficient LLM Context Distillation","shortMessageHtmlLink":"🔥🔥[Context Distillation] Efficient LLM Context Distillation"}},{"before":"5bc400cc00888a70c8bebc7e3785e6bc2a98846a","after":null,"ref":"refs/heads/DefTruth-patch-1","pushedAt":"2024-09-04T06:17:01.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"}},{"before":"4ba8aae9f892f45d6bca2ebe56a4bd5a62364e33","after":"e1ec28220acf8552e9f6a048075ecb2106dab4ce","ref":"refs/heads/main","pushedAt":"2024-09-04T06:16:58.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"🔥🔥[Prompt Compression] Prompt Compression with Context-Aware Sentence Encoding for Fast and Improved LLM Inference (#56)","shortMessageHtmlLink":"🔥🔥[Prompt Compression] Prompt Compression with Context-Aware Sentence…"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOS0xOFQwODowNjo1Ni4wMDAwMDBazwAAAAS5YiIe","startCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOS0xOFQwODowNjo1Ni4wMDAwMDBazwAAAAS5YiIe","endCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wOS0wNFQwNjoxNjo1OC4wMDAwMDBazwAAAASsU9nI"}},"title":"Activity · DefTruth/Awesome-LLM-Inference"}