fix: rate limit overlimit count

zijiren233 · zijiren233 · commit c79df947e243 · 2025-03-09T16:36:44.000+08:00
diff --git a/service/aiproxy/common/rpmlimit/rate-limit.go b/service/aiproxy/common/rpmlimit/rate-limit.go
@@ -2,7 +2,10 @@ package rpmlimit
 
 import (
 	"context"
+	"errors"
 	"fmt"
+	"strconv"
+	"strings"
 	"time"
 
 	"github.com/labring/sealos/service/aiproxy/common"
@@ -13,33 +16,54 @@ var inMemoryRateLimiter InMemoryRateLimiter
 
 const (
 	groupModelRPMKey = "group_model_rpm:%s:%s"
+	overLimitRPMKey  = "over_limit_rpm:%s:%s"
 )
 
 var pushRequestScript = `
 local key = KEYS[1]
+local over_limit_key = KEYS[2]
 local window = tonumber(ARGV[1])
 local current_time = tonumber(ARGV[2])
+local max_requests = tonumber(ARGV[3])
 local cutoff = current_time - window
 
 redis.call('ZREMRANGEBYSCORE', key, '-inf', cutoff)
-redis.call('ZADD', key, current_time, current_time)
-redis.call('PEXPIRE', key, window / 1000)
-return redis.call('ZCOUNT', key, cutoff, current_time)
+redis.call('ZREMRANGEBYSCORE', over_limit_key, '-inf', cutoff)
+local count = redis.call('ZCOUNT', key, cutoff, current_time)
+local over_limit_count = redis.call('ZCOUNT', over_limit_key, cutoff, current_time)
+
+if count < max_requests then
+    redis.call('ZADD', key, current_time, current_time)
+    redis.call('PEXPIRE', key, window / 1000)
+	count = count + 1
+else
+    redis.call('ZADD', over_limit_key, current_time, current_time)
+    redis.call('PEXPIRE', over_limit_key, window / 1000)
+	over_limit_count = over_limit_count + 1
+end
+
+return string.format("%d:%d", count, over_limit_count)
 `
 
 var getRequestCountScript = `
-local pattern = ARGV[1]
-local window = tonumber(ARGV[2])
-local current_time = tonumber(ARGV[3])
+local pattern = KEYS[1]
+local over_limit_pattern = KEYS[2]
+local window = tonumber(ARGV[1])
+local current_time = tonumber(ARGV[2])
 local cutoff = current_time - window
 
-local keys = redis.call('KEYS', pattern)
 local total = 0
 
+local keys = redis.call('KEYS', pattern)
 for _, key in ipairs(keys) do
 	redis.call('ZREMRANGEBYSCORE', key, '-inf', cutoff)
-    local count = redis.call('ZCOUNT', key, cutoff, current_time)
-    total = total + count
+    total = total + redis.call('ZCOUNT', key, cutoff, current_time)
+end
+
+local over_limit_keys = redis.call('KEYS', over_limit_pattern)
+for _, key in ipairs(over_limit_keys) do
+	redis.call('ZREMRANGEBYSCORE', key, '-inf', cutoff)
+    total = total + redis.call('ZCOUNT', key, cutoff, current_time)
 end
 
 return total
@@ -51,22 +75,26 @@ func GetRPM(ctx context.Context, group, model string) (int64, error) {
 	}
 
 	var pattern string
+	var overLimitPattern string
 	if group == "" && model == "" {
 		pattern = "group_model_rpm:*:*"
+		overLimitPattern = "over_limit_rpm:*:*"
 	} else if group == "" {
 		pattern = "group_model_rpm:*:" + model
+		overLimitPattern = "over_limit_rpm:*:" + model
 	} else if model == "" {
 		pattern = fmt.Sprintf("group_model_rpm:%s:*", group)
+		overLimitPattern = fmt.Sprintf("over_limit_rpm:%s:*", group)
 	} else {
 		pattern = fmt.Sprintf("group_model_rpm:%s:%s", group, model)
+		overLimitPattern = fmt.Sprintf("over_limit_rpm:%s:%s", group, model)
 	}
 
 	rdb := common.RDB
 	result, err := rdb.Eval(
 		ctx,
 		getRequestCountScript,
-		[]string{},
-		pattern,
+		[]string{pattern, overLimitPattern},
 		time.Minute.Microseconds(),
 		time.Now().UnixMicro(),
 	).Int64()
@@ -77,27 +105,41 @@ func GetRPM(ctx context.Context, group, model string) (int64, error) {
 }
 
 func redisRateLimitRequest(ctx context.Context, group, model string, maxRequestNum int64, duration time.Duration) (bool, error) {
-	result, err := PushRequest(ctx, group, model, duration)
+	result, _, err := PushRequest(ctx, group, model, maxRequestNum, duration)
 	if err != nil {
 		return false, err
 	}
 	return result <= maxRequestNum, nil
 }
 
-func PushRequest(ctx context.Context, group, model string, duration time.Duration) (int64, error) {
+func PushRequest(ctx context.Context, group, model string, maxRequestNum int64, duration time.Duration) (int64, int64, error) {
 	result, err := common.RDB.Eval(
 		ctx,
 		pushRequestScript,
 		[]string{
 			fmt.Sprintf(groupModelRPMKey, group, model),
+			fmt.Sprintf(overLimitRPMKey, group, model),
 		},
 		duration.Microseconds(),
 		time.Now().UnixMicro(),
-	).Int64()
+		maxRequestNum,
+	).Text()
 	if err != nil {
-		return 0, err
+		return 0, 0, err
 	}
-	return result, nil
+	count, overLimitCount, ok := strings.Cut(result, ":")
+	if !ok {
+		return 0, 0, errors.New("invalid result")
+	}
+	countInt, err := strconv.ParseInt(count, 10, 64)
+	if err != nil {
+		return 0, 0, err
+	}
+	overLimitCountInt, err := strconv.ParseInt(overLimitCount, 10, 64)
+	if err != nil {
+		return 0, 0, err
+	}
+	return countInt, overLimitCountInt, nil
 }
 
 func RateLimit(ctx context.Context, group, model string, maxRequestNum int64, duration time.Duration) (bool, error) {
diff --git a/service/aiproxy/middleware/distributor.go b/service/aiproxy/middleware/distributor.go
@@ -101,7 +101,7 @@ func checkGroupModelRPMAndTPM(c *gin.Context, group *model.GroupCache, mc *model
 			return ErrRequestRateLimitExceeded
 		}
 	} else if common.RedisEnabled {
-		_, err := rpmlimit.PushRequest(c.Request.Context(), group.ID, mc.Model, time.Minute)
+		_, _, err := rpmlimit.PushRequest(c.Request.Context(), group.ID, mc.Model, 1, time.Minute)
 		if err != nil {
 			log.Errorf("push request error: %s", err.Error())
 		}

Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@ func checkGroupModelRPMAndTPM(c gin.Context, group model.GroupCache, mc *model`
`101`	`101`	`return ErrRequestRateLimitExceeded`
`102`	`102`	`}`
`103`	`103`	`} else if common.RedisEnabled {`
`104`		`- _, err := rpmlimit.PushRequest(c.Request.Context(), group.ID, mc.Model, time.Minute)`
	`104`	`+ _, _, err := rpmlimit.PushRequest(c.Request.Context(), group.ID, mc.Model, 1, time.Minute)`
`105`	`105`	`if err != nil {`
`106`	`106`	`log.Errorf("push request error: %s", err.Error())`
`107`	`107`	`}`