Skip to content

Commit

Permalink
linuxc: try support faster new clone3(CLONE_INTO_CGROUP) syscall
Browse files Browse the repository at this point in the history
  • Loading branch information
criyle committed Feb 20, 2025
1 parent 60a1859 commit ffdf3e3
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 23 deletions.
2 changes: 2 additions & 0 deletions README.cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,8 @@ interface Output {

`systemd``init` 的发行版中运行时,`go-judge` 会使用 `dbus` 通知 `systemd` 来创建一个临时 `scope` 作为 `cgroup` 的根。

在高于 5.7 的内核中运行时,`go-judge` 会尝试更快的 `clone3(CLONE_INTO_CGROUP)` 方法.

#### 内存使用

控制进程通常会使用 `20M` 内存,每个容器进程最大会使用 `20M` 内存,每个请求最大会使用 `2 * 16M` + 总 copy out max 限制 * 2 内存。请注意,缓存文件会存储在宿主机的共享内存中 (`/dev/shm`),请保证其大小足够存储运行时最大可能文件。
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,8 @@ When running in containers, the `go-judge` will migrate all processed into `/api

When running in Linux distributions powered by `systemd`, the `go-judge` will contact `systemd` via `dbus` to create a transient scope as cgroup root.

When running with kernel >= 5.7, the `go-judge` will try faster `clone3(CLONE_INTO_CGROUP)` method.

#### Memory Usage

The controller will consume `20M` memory and each container will consume `20M` + size of tmpfs `2 * 128M`. For each request, it consumes as much as user program limit + extra limit (`16k`) + total copy out max. Notice that the cached file stores in the shared memory (`/dev/shm`) of the host, so please ensure enough size allocated.
Expand Down
79 changes: 60 additions & 19 deletions env/env_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ import (
"github.com/coreos/go-systemd/v22/dbus"
"github.com/criyle/go-judge/env/linuxcontainer"
"github.com/criyle/go-judge/env/pool"
"github.com/criyle/go-judge/envexec"
"github.com/criyle/go-sandbox/container"
"github.com/criyle/go-sandbox/pkg/cgroup"
"github.com/criyle/go-sandbox/pkg/forkexec"
"github.com/criyle/go-sandbox/pkg/mount"
"github.com/criyle/go-sandbox/runner"
ddbus "github.com/godbus/dbus/v5"
"github.com/google/shlex"
"golang.org/x/sys/unix"
Expand Down Expand Up @@ -141,26 +143,65 @@ func NewBuilder(c Config) (pool.EnvBuilder, map[string]any, error) {
if ct != nil {
cgroupControllers = ct.Names()
}
return linuxcontainer.NewEnvBuilder(linuxcontainer.Config{
Builder: b,
CgroupPool: cgroupPool,
WorkDir: workDir,
Cpuset: c.Cpuset,
CPURate: c.EnableCPURate,
Seccomp: seccomp,
}), map[string]any{
"cgroupType": cgroupType,
"mount": m,
"symbolicLink": symbolicLinks,
"maskedPaths": maskPaths,
"hostName": hostName,
"domainName": domainName,
"workDir": workDir,
"uid": cUID,
"gid": cGID,
conf := map[string]any{
"cgroupType": cgroupType,
"mount": m,
"symbolicLink": symbolicLinks,
"maskedPaths": maskPaths,
"hostName": hostName,
"domainName": domainName,
"workDir": workDir,
"uid": cUID,
"gid": cGID,

"cgroupControllers": cgroupControllers,
}, nil
"cgroupControllers": cgroupControllers,
}
if cgb != nil && cgroupType == cgroup.TypeV2 && major >= 5 && minor >= 7 {
c.Info("Running kernel ", major, ".", minor, " >= 5.7 with cgroup V2, trying faster clone3(CLONE_INTO_CGROUP)")
if b := func() pool.EnvBuilder {
b := linuxcontainer.NewEnvBuilder(linuxcontainer.Config{
Builder: b,
CgroupPool: cgroupPool,
WorkDir: workDir,
Cpuset: c.Cpuset,
CPURate: c.EnableCPURate,
Seccomp: seccomp,
CgroupFd: true,
})
e, err := b.Build()
if err != nil {
c.Info("Environment build failed: ", err)
return nil
}
defer e.Destroy()
p, err := e.Execve(context.TODO(), envexec.ExecveParam{
Args: []string{"/usr/bin/env"},
})
if err != nil {
c.Info("Environment run failed: ", err)
return nil
}
<-p.Done()
r := p.Result()
if r.Status == runner.StatusRunnerError {
c.Info("Environment result failed: ", r)
return nil
}
return b
}(); b != nil {
conf["clone3"] = true
return b, conf, nil
}
}

return linuxcontainer.NewEnvBuilder(linuxcontainer.Config{
Builder: b,
CgroupPool: cgroupPool,
WorkDir: workDir,
Cpuset: c.Cpuset,
CPURate: c.EnableCPURate,
Seccomp: seccomp,
}), conf, nil
}

func newCgroup(c Config) (cgroup.Cgroup, *cgroup.Controllers, error) {
Expand Down
5 changes: 5 additions & 0 deletions env/linuxcontainer/cgroup_wrapper_linux.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package linuxcontainer

import (
"os"
"time"

"github.com/criyle/go-judge/envexec"
Expand Down Expand Up @@ -63,3 +64,7 @@ func (c *wCgroup) Reset() error {
func (c *wCgroup) Destroy() error {
return c.cg.Destroy()
}

func (c *wCgroup) Open() (*os.File, error) {
return c.cg.Open()
}
3 changes: 3 additions & 0 deletions env/linuxcontainer/cgrouppool_linux.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package linuxcontainer

import (
"os"
"sync"
"time"

Expand All @@ -22,6 +23,8 @@ type Cgroup interface {
AddProc(int) error
Reset() error
Destroy() error

Open() (*os.File, error)
}

// CgroupPool implements pool of Cgroup
Expand Down
4 changes: 4 additions & 0 deletions env/linuxcontainer/envbuilder_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ type Config struct {
Seccomp []syscall.SockFilter
Cpuset string
CPURate bool
CgroupFd bool // whether to enable cgroup fd with clone3, kernel >= 5.7
}

type environmentBuilder struct {
Expand All @@ -25,6 +26,7 @@ type environmentBuilder struct {
seccomp []syscall.SockFilter
cpuset string
cpuRate bool
cgFd bool
}

// NewEnvBuilder creates builder for linux container pools
Expand All @@ -36,6 +38,7 @@ func NewEnvBuilder(c Config) pool.EnvBuilder {
seccomp: c.Seccomp,
cpuset: c.Cpuset,
cpuRate: c.CPURate,
cgFd: c.CgroupFd,
}
}

Expand All @@ -61,5 +64,6 @@ func (b *environmentBuilder) Build() (pool.Environment, error) {
cpuset: b.cpuset,
cpuRate: b.cpuRate,
seccomp: b.seccomp,
cgFd: b.cgFd,
}, nil
}
15 changes: 14 additions & 1 deletion env/linuxcontainer/environment_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type environ struct {
cpuset string
seccomp []syscall.SockFilter
cpuRate bool
cgFd bool
}

// Destroy destroys the environment
Expand All @@ -45,6 +46,7 @@ func (c *environ) Execve(ctx context.Context, param envexec.ExecveParam) (envexe
cg Cgroup
syncFunc func(int) error
err error
cgFd uintptr
)

limit := param.Limit
Expand All @@ -56,7 +58,16 @@ func (c *environ) Execve(ctx context.Context, param envexec.ExecveParam) (envexe
if err := c.setCgroupLimit(cg, limit); err != nil {
return nil, err
}
syncFunc = cg.AddProc
if c.cgFd {
f, err := cg.Open()
if err != nil {
return nil, fmt.Errorf("execve: failed to get cgroup fd %v", err)
}
defer f.Close()
cgFd = f.Fd()
} else {
syncFunc = cg.AddProc
}
}

rLimits := rlimit.RLimits{
Expand Down Expand Up @@ -92,6 +103,8 @@ func (c *environ) Execve(ctx context.Context, param envexec.ExecveParam) (envexe
}
return nil
},
SyncAfterExec: syncFunc == nil,
CgroupFD: cgFd,
}
proc := newProcess(func() runner.Result {
return c.Environment.Execve(ctx, p)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.23
require (
github.com/coreos/go-systemd/v22 v22.5.0
github.com/creack/pty v1.1.24
github.com/criyle/go-sandbox v0.10.9
github.com/criyle/go-sandbox v0.11.0
github.com/elastic/go-seccomp-bpf v1.5.0
github.com/elastic/go-ucfg v0.8.8
github.com/gin-contrib/zap v1.1.4
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
github.com/criyle/go-sandbox v0.10.9 h1:IYXVrfdSi8GgXlxBDNBINpH4VqnznxHi3R5J17d69fs=
github.com/criyle/go-sandbox v0.10.9/go.mod h1:9IZSv7cxcDkVaPSRufhMPLUg+7M7lTPAt8hjd/iMKFo=
github.com/criyle/go-sandbox v0.11.0 h1:1jkfLigilxQza3zPyF1aIGR5WdDuiMYE1rzVYWlJC9I=
github.com/criyle/go-sandbox v0.11.0/go.mod h1:9IZSv7cxcDkVaPSRufhMPLUg+7M7lTPAt8hjd/iMKFo=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down

0 comments on commit ffdf3e3

Please sign in to comment.