277 lines
11 KiB
Diff
277 lines
11 KiB
Diff
From a8d7747e2ebc37b63558475b73e1d0a2dee2625e Mon Sep 17 00:00:00 2001
|
|
From: Cory Snider <csnider@mirantis.com>
|
|
Date: Fri, 10 Mar 2023 15:29:27 -0500
|
|
Subject: [PATCH] libnet/d/overlay: add BPF-powered VNI matcher
|
|
|
|
Some newer distros such as RHEL 9 have stopped making the xt_u32 kernel
|
|
module available with the kernels they ship. They do ship the xt_bpf
|
|
kernel module, which can do everything xt_u32 can and more. Add an
|
|
alternative implementation of the iptables match rule which uses xt_bpf
|
|
to implement exactly the same logic as the u32 filter using a BPF
|
|
program. Try programming the BPF-powered rules as a fallback when
|
|
programming the u32-powered rules fails.
|
|
|
|
Signed-off-by: Cory Snider <csnider@mirantis.com>
|
|
---
|
|
.../docker/libnetwork/drivers/overlay/bpf.go | 47 +++++++++++++++++++
|
|
.../libnetwork/drivers/overlay/bpf_test.go | 14 ++++++
|
|
.../libnetwork/drivers/overlay/encryption.go | 37 +++++++++++++--
|
|
.../drivers/overlay/encryption_bpf.go | 17 +++++++
|
|
.../drivers/overlay/encryption_u32.go | 10 ++--
|
|
.../drivers/overlay/overlayutils/utils.go | 46 ++++++++++++++++++
|
|
6 files changed, 162 insertions(+), 9 deletions(-)
|
|
create mode 100644 components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/bpf.go
|
|
create mode 100644 components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/bpf_test.go
|
|
create mode 100644 components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption_bpf.go
|
|
create mode 100644 components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlayutils/utils.go
|
|
|
|
diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/bpf.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/bpf.go
|
|
new file mode 100644
|
|
index 00000000..cb96fb7a
|
|
--- /dev/null
|
|
+++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/bpf.go
|
|
@@ -0,0 +1,47 @@
|
|
+package overlay
|
|
+
|
|
+import (
|
|
+ "fmt"
|
|
+ "strings"
|
|
+
|
|
+ "golang.org/x/net/bpf"
|
|
+)
|
|
+
|
|
+// vniMatchBPF returns a BPF program suitable for passing to the iptables bpf
|
|
+// match which matches on the VXAN Network ID of encapsulated packets. The
|
|
+// program assumes that it will be used in a rule which only matches UDP
|
|
+// datagrams.
|
|
+func vniMatchBPF(vni uint32) []bpf.RawInstruction {
|
|
+ asm, err := bpf.Assemble([]bpf.Instruction{
|
|
+ bpf.LoadMemShift{Off: 0}, // ldx 4*([0] & 0xf) ; Load length of IPv4 header into X
|
|
+ bpf.LoadIndirect{Off: 12, Size: 4}, // ld [x + 12] ; Load VXLAN ID (UDP header + 4 bytes) into A
|
|
+ bpf.ALUOpConstant{Op: bpf.ALUOpAnd, Val: 0xffffff00}, // and #0xffffff00 ; VXLAN ID is in top 24 bits
|
|
+ bpf.JumpIf{Cond: bpf.JumpEqual, Val: vni << 8, SkipTrue: 1}, // jeq ($vni << 8), match
|
|
+ bpf.RetConstant{Val: 0}, // ret #0
|
|
+ bpf.RetConstant{Val: ^uint32(0)}, // match: ret #-1
|
|
+ })
|
|
+ // bpf.Assemble() only errors if an instruction is invalid. As the only variable
|
|
+ // part of the program is an instruction value for which the entire range is
|
|
+ // valid, whether the program can be successfully assembled is independent of
|
|
+ // the input. Given that the only recourse is to fix this function and
|
|
+ // recompile, there's little value in bubbling the error up to the caller.
|
|
+ if err != nil {
|
|
+ panic(err)
|
|
+ }
|
|
+ return asm
|
|
+}
|
|
+
|
|
+// marshalXTBPF marshals a BPF program into the "decimal" byte code format
|
|
+// which is suitable for passing to the [iptables bpf match].
|
|
+//
|
|
+// iptables -m bpf --bytecode
|
|
+//
|
|
+// [iptables bpf match]: https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
|
|
+func marshalXTBPF(prog []bpf.RawInstruction) string { //nolint:unused
|
|
+ var b strings.Builder
|
|
+ fmt.Fprintf(&b, "%d", len(prog))
|
|
+ for _, ins := range prog {
|
|
+ fmt.Fprintf(&b, ",%d %d %d %d", ins.Op, ins.Jt, ins.Jf, ins.K)
|
|
+ }
|
|
+ return b.String()
|
|
+}
|
|
diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/bpf_test.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/bpf_test.go
|
|
new file mode 100644
|
|
index 00000000..f636d14e
|
|
--- /dev/null
|
|
+++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/bpf_test.go
|
|
@@ -0,0 +1,14 @@
|
|
+package overlay
|
|
+
|
|
+import (
|
|
+ "testing"
|
|
+)
|
|
+
|
|
+func FuzzVNIMatchBPFDoesNotPanic(f *testing.F) {
|
|
+ for _, seed := range []uint32{0, 1, 42, 0xfffffe, 0xffffff, 0xfffffffe, 0xffffffff} {
|
|
+ f.Add(seed)
|
|
+ }
|
|
+ f.Fuzz(func(t *testing.T, vni uint32) {
|
|
+ _ = vniMatchBPF(vni)
|
|
+ })
|
|
+}
|
|
diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption.go
|
|
index 20843516..513de71e 100644
|
|
--- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption.go
|
|
+++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption.go
|
|
@@ -1,3 +1,6 @@
|
|
+//go:build linux
|
|
+// +build linux
|
|
+
|
|
package overlay
|
|
|
|
import (
|
|
@@ -12,9 +15,11 @@ import (
|
|
|
|
"strconv"
|
|
|
|
+ "github.com/docker/libnetwork/drivers/overlay/overlayutils"
|
|
"github.com/docker/libnetwork/iptables"
|
|
"github.com/docker/libnetwork/ns"
|
|
"github.com/docker/libnetwork/types"
|
|
+ "github.com/hashicorp/go-multierror"
|
|
"github.com/sirupsen/logrus"
|
|
"github.com/vishvananda/netlink"
|
|
)
|
|
@@ -226,7 +231,31 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
|
|
return nil
|
|
}
|
|
|
|
-func programMangle(vni uint32, add bool) (err error) {
|
|
+type matchVXLANFunc func(port, vni uint32) []string
|
|
+
|
|
+// programVXLANRuleFunc returns a function which tries calling programWithMatch
|
|
+// with the u32 match, falling back to the BPF match if installing u32 variant
|
|
+// of the rules fails.
|
|
+func programVXLANRuleFunc(programWithMatch func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error) func(vni uint32, add bool) error {
|
|
+ return func(vni uint32, add bool) error {
|
|
+ if add {
|
|
+ if err := programWithMatch(matchVXLANWithU32, vni, add); err != nil {
|
|
+ // That didn't work. Maybe the xt_u32 module isn't available? Try again with xt_bpf.
|
|
+ err2 := programWithMatch(matchVXLANWithBPF, vni, add)
|
|
+ if err2 != nil {
|
|
+ return multierror.Append(err, err2)
|
|
+ }
|
|
+ }
|
|
+ return nil
|
|
+ } else {
|
|
+ // Delete both flavours.
|
|
+ err := programWithMatch(matchVXLANWithU32, vni, add)
|
|
+ return multierror.Append(err, programWithMatch(matchVXLANWithBPF, vni, add)).ErrorOrNil()
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+var programMangle = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) (err error) {
|
|
var (
|
|
m = strconv.FormatUint(mark, 10)
|
|
chain = "OUTPUT"
|
|
@@ -249,9 +278,9 @@ func programMangle(vni uint32, add bool) (err error) {
|
|
}
|
|
|
|
return
|
|
-}
|
|
+})
|
|
|
|
-func programInput(vni uint32, add bool) (err error) {
|
|
+var programInput = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) (err error) {
|
|
var (
|
|
plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
|
|
ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...)
|
|
@@ -278,7 +307,7 @@ func programInput(vni uint32, add bool) (err error) {
|
|
}
|
|
|
|
return
|
|
-}
|
|
+})
|
|
|
|
func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
|
|
var (
|
|
diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption_bpf.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption_bpf.go
|
|
new file mode 100644
|
|
index 00000000..de57c217
|
|
--- /dev/null
|
|
+++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption_bpf.go
|
|
@@ -0,0 +1,17 @@
|
|
+package overlay
|
|
+
|
|
+import (
|
|
+ "strconv"
|
|
+)
|
|
+
|
|
+// matchVXLANWithBPF returns an iptables rule fragment which matches VXLAN
|
|
+// datagrams with the given destination port and VXLAN Network ID utilizing the
|
|
+// xt_bpf netfilter kernel module. The returned slice's backing array is
|
|
+// guaranteed not to alias any other slice's.
|
|
+func matchVXLANWithBPF(port, vni uint32) []string {
|
|
+ dport := strconv.FormatUint(uint64(port), 10)
|
|
+ vniMatch := marshalXTBPF(vniMatchBPF(vni))
|
|
+
|
|
+ // https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
|
|
+ return []string{"-p", "udp", "--dport", dport, "-m", "bpf", "--bytecode", vniMatch}
|
|
+}
|
|
diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption_u32.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption_u32.go
|
|
index c93f7c96..94a74031 100644
|
|
--- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption_u32.go
|
|
+++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption_u32.go
|
|
@@ -5,11 +5,11 @@ import (
|
|
"strconv"
|
|
)
|
|
|
|
-// matchVXLAN returns an iptables rule fragment which matches VXLAN datagrams
|
|
-// with the given destination port and VXLAN Network ID utilizing the xt_u32
|
|
-// netfilter kernel module. The returned slice's backing array is guaranteed not
|
|
-// to alias any other slice's.
|
|
-func matchVXLAN(port, vni uint32) []string {
|
|
+// matchVXLANWithU32 returns an iptables rule fragment which matches VXLAN
|
|
+// datagrams with the given destination port and VXLAN Network ID utilizing the
|
|
+// xt_u32 netfilter kernel module. The returned slice's backing array is
|
|
+// guaranteed not to alias any other slice's.
|
|
+func matchVXLANWithU32(port, vni uint32) []string {
|
|
dport := strconv.FormatUint(uint64(port), 10)
|
|
|
|
// The u32 expression language is documented in iptables-extensions(8).
|
|
diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlayutils/utils.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlayutils/utils.go
|
|
new file mode 100644
|
|
index 00000000..73136e8e
|
|
--- /dev/null
|
|
+++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlayutils/utils.go
|
|
@@ -0,0 +1,46 @@
|
|
+// Package overlayutils provides utility functions for overlay networks
|
|
+package overlayutils
|
|
+
|
|
+import (
|
|
+ "fmt"
|
|
+ "sync"
|
|
+)
|
|
+
|
|
+var (
|
|
+ mutex sync.RWMutex
|
|
+ vxlanUDPPort uint32
|
|
+)
|
|
+
|
|
+const defaultVXLANUDPPort = 4789
|
|
+
|
|
+func init() {
|
|
+ vxlanUDPPort = defaultVXLANUDPPort
|
|
+}
|
|
+
|
|
+// ConfigVXLANUDPPort configures the VXLAN UDP port (data path port) number.
|
|
+// If no port is set, the default (4789) is returned. Valid port numbers are
|
|
+// between 1024 and 49151.
|
|
+func ConfigVXLANUDPPort(vxlanPort uint32) error {
|
|
+ if vxlanPort == 0 {
|
|
+ vxlanPort = defaultVXLANUDPPort
|
|
+ }
|
|
+ // IANA procedures for each range in detail
|
|
+ // The Well Known Ports, aka the System Ports, from 0-1023
|
|
+ // The Registered Ports, aka the User Ports, from 1024-49151
|
|
+ // The Dynamic Ports, aka the Private Ports, from 49152-65535
|
|
+ // So we can allow range between 1024 to 49151
|
|
+ if vxlanPort < 1024 || vxlanPort > 49151 {
|
|
+ return fmt.Errorf("VXLAN UDP port number is not in valid range (1024-49151): %d", vxlanPort)
|
|
+ }
|
|
+ mutex.Lock()
|
|
+ vxlanUDPPort = vxlanPort
|
|
+ mutex.Unlock()
|
|
+ return nil
|
|
+}
|
|
+
|
|
+// VXLANUDPPort returns Vxlan UDP port number
|
|
+func VXLANUDPPort() uint32 {
|
|
+ mutex.RLock()
|
|
+ defer mutex.RUnlock()
|
|
+ return vxlanUDPPort
|
|
+}
|
|
--
|
|
2.33.0
|
|
|