瀏覽代碼

bpf: new package to assemble and disassemble Berkeley Packet Filter programs.

The package currently implements the operations defined by
https://www.kernel.org/doc/Documentation/networking/filter.txt , which
comprises the base BPF virtual machine plus the Linux kernel's extension
opcodes.

Updates golang/go#14982

Change-Id: Iafb43d80e067040e60465a9bfb7d5f2ca90cc2ae
Reviewed-on: https://go-review.googlesource.com/21212
Reviewed-by: Mikio Hara <mikioh.mikioh@gmail.com>
David Anderson 9 年之前
父節點
當前提交
3c208088b6
共有 7 個文件被更改,包括 942 次插入0 次删除
  1. 41 0
      bpf/asm.go
  2. 207 0
      bpf/constants.go
  3. 11 0
      bpf/doc.go
  4. 419 0
      bpf/instructions.go
  5. 184 0
      bpf/instructions_test.go
  6. 1 0
      bpf/testdata/all_instructions.bpf
  7. 79 0
      bpf/testdata/all_instructions.txt

+ 41 - 0
bpf/asm.go

@@ -0,0 +1,41 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bpf
+
+import "fmt"
+
+// Assemble converts insts into raw instructions suitable for loading
+// into a BPF virtual machine.
+//
+// Currently, no optimization is attempted, the assembled program flow
+// is exactly as provided.
+func Assemble(insts []Instruction) ([]RawInstruction, error) {
+	ret := make([]RawInstruction, len(insts))
+	var err error
+	for i, inst := range insts {
+		ret[i], err = inst.Assemble()
+		if err != nil {
+			return nil, fmt.Errorf("assembling instruction %d: %s", i+1, err)
+		}
+	}
+	return ret, nil
+}
+
+// Disassemble attempts to parse raw back into
+// Instructions. Unrecognized RawInstructions are assumed to be an
+// extension not implemented by this package, and are passed through
+// unchanged to the output. The allDecoded value reports whether insts
+// contains no RawInstructions.
+func Disassemble(raw []RawInstruction) (insts []Instruction, allDecoded bool) {
+	insts = make([]Instruction, len(raw))
+	allDecoded = true
+	for i, r := range raw {
+		insts[i] = r.Disassemble()
+		if _, ok := insts[i].(RawInstruction); ok {
+			allDecoded = false
+		}
+	}
+	return insts, allDecoded
+}

+ 207 - 0
bpf/constants.go

@@ -0,0 +1,207 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bpf
+
+// A Register is a register of the BPF virtual machine.
+type Register uint16
+
+const (
+	// RegA is the accumulator register. RegA is always the
+	// destination register of ALU operations.
+	RegA Register = iota
+	// RegX is the indirection register, used by LoadIndirect
+	// operations.
+	RegX
+)
+
+// An ALUOp is an arithmetic or logic operation.
+type ALUOp uint16
+
+// ALU binary operation types.
+const (
+	ALUOpAdd ALUOp = iota << 4
+	ALUOpSub
+	ALUOpMul
+	ALUOpDiv
+	ALUOpOr
+	ALUOpAnd
+	ALUOpShiftLeft
+	ALUOpShiftRight
+	aluOpNeg // Not exported because it's the only unary ALU operation, and gets its own instruction type.
+	ALUOpMod
+	ALUOpXor
+)
+
+// A JumpTest is a comparison operator used in conditional jumps.
+type JumpTest uint16
+
+// Supported operators for conditional jumps.
+const (
+	// K == A
+	JumpEqual JumpTest = iota
+	// K != A
+	JumpNotEqual
+	// K > A
+	JumpGreaterThan
+	// K < A
+	JumpLessThan
+	// K >= A
+	JumpGreaterOrEqual
+	// K <= A
+	JumpLessOrEqual
+	// K & A != 0
+	JumpBitsSet
+	// K & A == 0
+	JumpBitsNotSet
+)
+
+// An Extension is a function call provided by the kernel that
+// performs advanced operations that are expensive or impossible
+// within the BPF virtual machine.
+//
+// Extensions are only implemented by the Linux kernel.
+//
+// TODO: should we prune this list? Some of these extensions seem
+// either broken or near-impossible to use correctly, whereas other
+// (len, random, ifindex) are quite useful.
+type Extension int
+
+// Extension functions available in the Linux kernel.
+const (
+	// ExtLen returns the length of the packet.
+	ExtLen Extension = 1
+	// ExtProto returns the packet's L3 protocol type.
+	ExtProto = 0
+	// ExtType returns the packet's type (skb->pkt_type in the kernel)
+	//
+	// TODO: better documentation. How nice an API do we want to
+	// provide for these esoteric extensions?
+	ExtType = 4
+	// ExtPayloadOffset returns the offset of the packet payload, or
+	// the first protocol header that the kernel does not know how to
+	// parse.
+	ExtPayloadOffset = 52
+	// ExtInterfaceIndex returns the index of the interface on which
+	// the packet was received.
+	ExtInterfaceIndex = 8
+	// ExtNetlinkAttr returns the netlink attribute of type X at
+	// offset A.
+	ExtNetlinkAttr = 12
+	// ExtNetlinkAttrNested returns the nested netlink attribute of
+	// type X at offset A.
+	ExtNetlinkAttrNested = 16
+	// ExtMark returns the packet's mark value.
+	ExtMark = 20
+	// ExtQueue returns the packet's assigned hardware queue.
+	ExtQueue = 24
+	// ExtLinkLayerType returns the packet's hardware address type
+	// (e.g. Ethernet, Infiniband).
+	ExtLinkLayerType = 28
+	// ExtRXHash returns the packets receive hash.
+	//
+	// TODO: figure out what this rxhash actually is.
+	ExtRXHash = 32
+	// ExtCPUID returns the ID of the CPU processing the current
+	// packet.
+	ExtCPUID = 36
+	// ExtVLANTag returns the packet's VLAN tag.
+	ExtVLANTag = 44
+	// ExtVLANTagPresent returns non-zero if the packet has a VLAN
+	// tag.
+	//
+	// TODO: I think this might be a lie: it reads bit 0x1000 of the
+	// VLAN header, which changed meaning in recent revisions of the
+	// spec - this extension may now return meaningless information.
+	ExtVLANTagPresent = 48
+	// ExtVLANProto returns 0x8100 if the frame has a VLAN header,
+	// 0x88a8 if the frame has a "Q-in-Q" double VLAN header, or some
+	// other value if no VLAN information is present.
+	ExtVLANProto = 60
+	// ExtRand returns a uniformly random uint32.
+	ExtRand = 56
+)
+
+// The following gives names to various bit patterns used in opcode construction.
+
+const opClsMask uint16 = 0x7
+
+const (
+	// +---------------+-----------------+---+---+---+
+	// | AddrMode (3b) | LoadWidth (2b)  | 0 | 0 | 0 |
+	// +---------------+-----------------+---+---+---+
+	opClsLoadA uint16 = iota
+	// +---------------+-----------------+---+---+---+
+	// | AddrMode (3b) | LoadWidth (2b)  | 0 | 0 | 1 |
+	// +---------------+-----------------+---+---+---+
+	opClsLoadX
+	// +---+---+---+---+---+---+---+---+
+	// | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
+	// +---+---+---+---+---+---+---+---+
+	opClsStoreA
+	// +---+---+---+---+---+---+---+---+
+	// | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
+	// +---+---+---+---+---+---+---+---+
+	opClsStoreX
+	// +---------------+-----------------+---+---+---+
+	// | Operator (4b) | OperandSrc (1b) | 1 | 0 | 0 |
+	// +---------------+-----------------+---+---+---+
+	opClsALU
+	// +-----------------------------+---+---+---+---+
+	// |      TestOperator (4b)      | 0 | 1 | 0 | 1 |
+	// +-----------------------------+---+---+---+---+
+	opClsJump
+	// +---+-------------------------+---+---+---+---+
+	// | 0 | 0 | 0 |   RetSrc (1b)   | 0 | 1 | 1 | 0 |
+	// +---+-------------------------+---+---+---+---+
+	opClsReturn
+	// +---+-------------------------+---+---+---+---+
+	// | 0 | 0 | 0 |  TXAorTAX (1b)  | 0 | 1 | 1 | 1 |
+	// +---+-------------------------+---+---+---+---+
+	opClsMisc
+)
+
+const (
+	opAddrModeImmediate uint16 = iota << 5
+	opAddrModeAbsolute
+	opAddrModeIndirect
+	opAddrModeScratch
+	// These are actually extensions, not addressing modes.
+	opAddrModePacketLen
+	opAddrModeIPv4HeaderLen
+)
+
+const (
+	opLoadWidth4 uint16 = iota << 3
+	opLoadWidth2
+	opLoadWidth1
+)
+
+// Operator defined by ALUOp*
+const opALUOpMask = 0xf0
+
+const opALUSrcMask = 0x08
+
+const (
+	opALUSrcConstant uint16 = iota << 3
+	opALUSrcX
+)
+
+const (
+	opJumpAlways = iota << 4
+	opJumpEqual
+	opJumpGT
+	opJumpGE
+	opJumpSet
+)
+
+const (
+	opRetSrcConstant uint16 = iota << 4
+	opRetSrcA
+)
+
+const (
+	opMiscTAX = 0x00
+	opMiscTXA = 0x80
+)

+ 11 - 0
bpf/doc.go

@@ -0,0 +1,11 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package bpf implements marshalling and unmarshalling of programs
+// for the Berkeley Packet Filter virtual machine.
+//
+// TODO: brief overview of the BPF virtual machine (registers, scratch, packet access, execution constraints)
+//
+// TODO: simple BPF program examples
+package bpf // import "golang.org/x/net/bpf"

+ 419 - 0
bpf/instructions.go

@@ -0,0 +1,419 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bpf
+
+import "fmt"
+
+// An Instruction is one instruction executed by the BPF virtual
+// machine.
+type Instruction interface {
+	// Assemble assembles the Instruction into a RawInstruction.
+	Assemble() (RawInstruction, error)
+}
+
+// A RawInstruction is a raw BPF virtual machine instruction.
+type RawInstruction struct {
+	// Operation to execute.
+	Op uint16
+	// For conditional jump instructions, the number of instructions
+	// to skip if the condition is true/false.
+	Jt uint8
+	Jf uint8
+	// Constant parameter. The meaning depends on the Op.
+	K uint32
+}
+
+// Assemble implements the Instruction Assemble method.
+func (ri RawInstruction) Assemble() (RawInstruction, error) { return ri, nil }
+
+// Disassemble parses ri into an Instruction and returns it. If ri is
+// not recognized by this package, ri itself is returned.
+func (ri RawInstruction) Disassemble() Instruction {
+	switch ri.Op {
+	case opClsLoadA | opLoadWidth4 | opAddrModeImmediate:
+		return LoadConstant{Dst: RegA, Val: ri.K}
+	case opClsLoadX | opLoadWidth4 | opAddrModeImmediate:
+		return LoadConstant{Dst: RegX, Val: ri.K}
+
+	case opClsLoadA | opLoadWidth4 | opAddrModeScratch:
+		if ri.K > 15 {
+			return ri
+		}
+		return LoadScratch{Dst: RegA, N: int(ri.K)}
+	case opClsLoadX | opLoadWidth4 | opAddrModeScratch:
+		if ri.K > 15 {
+			return ri
+		}
+		return LoadScratch{Dst: RegX, N: int(ri.K)}
+
+	case opClsLoadA | opLoadWidth4 | opAddrModeAbsolute:
+		ext := Extension(uint32(ri.K) + 0x1000)
+		switch ext {
+		case ExtProto, ExtType, ExtPayloadOffset, ExtInterfaceIndex, ExtNetlinkAttr, ExtNetlinkAttrNested, ExtMark, ExtQueue, ExtLinkLayerType, ExtRXHash, ExtCPUID, ExtVLANTag, ExtVLANTagPresent, ExtVLANProto, ExtRand:
+			return LoadExtension{Num: ext}
+		default:
+			return LoadAbsolute{Off: ri.K, Size: 4}
+		}
+	case opClsLoadA | opLoadWidth2 | opAddrModeAbsolute:
+		return LoadAbsolute{Off: ri.K, Size: 2}
+	case opClsLoadA | opLoadWidth1 | opAddrModeAbsolute:
+		return LoadAbsolute{Off: ri.K, Size: 1}
+
+	case opClsLoadA | opLoadWidth4 | opAddrModeIndirect:
+		return LoadIndirect{Off: ri.K, Size: 4}
+	case opClsLoadA | opLoadWidth2 | opAddrModeIndirect:
+		return LoadIndirect{Off: ri.K, Size: 2}
+	case opClsLoadA | opLoadWidth1 | opAddrModeIndirect:
+		return LoadIndirect{Off: ri.K, Size: 1}
+
+	case opClsLoadX | opLoadWidth1 | opAddrModeIPv4HeaderLen:
+		return LoadIPv4HeaderLen{Off: ri.K}
+
+	case opClsLoadA | opLoadWidth4 | opAddrModePacketLen:
+		return LoadExtension{Num: ExtLen}
+
+	case opClsStoreA:
+		if ri.K > 15 {
+			return ri
+		}
+		return StoreScratch{Src: RegA, N: int(ri.K)}
+	case opClsStoreX:
+		if ri.K > 15 {
+			return ri
+		}
+		return StoreScratch{Src: RegX, N: int(ri.K)}
+
+	case opClsALU | uint16(aluOpNeg):
+		return NegateA{}
+
+	case opClsJump | opJumpAlways:
+		return Jump{Skip: ri.K}
+	case opClsJump | opJumpEqual:
+		return JumpIf{
+			Cond:      JumpEqual,
+			Val:       ri.K,
+			SkipTrue:  ri.Jt,
+			SkipFalse: ri.Jf,
+		}
+	case opClsJump | opJumpGT:
+		return JumpIf{
+			Cond:      JumpGreaterThan,
+			Val:       ri.K,
+			SkipTrue:  ri.Jt,
+			SkipFalse: ri.Jf,
+		}
+	case opClsJump | opJumpGE:
+		return JumpIf{
+			Cond:      JumpGreaterOrEqual,
+			Val:       ri.K,
+			SkipTrue:  ri.Jt,
+			SkipFalse: ri.Jf,
+		}
+	case opClsJump | opJumpSet:
+		return JumpIf{
+			Cond:      JumpBitsSet,
+			Val:       ri.K,
+			SkipTrue:  ri.Jt,
+			SkipFalse: ri.Jf,
+		}
+
+	case opClsReturn | opRetSrcA:
+		return RetA{}
+	case opClsReturn | opRetSrcConstant:
+		return RetConstant{Val: ri.K}
+
+	case opClsMisc | opMiscTXA:
+		return TXA{}
+	case opClsMisc | opMiscTAX:
+		return TAX{}
+	}
+
+	// ALU operations require bitmasking to decode, so are done
+	// outside the main switch.
+
+	if ri.Op&opClsMask != opClsALU {
+		return ri
+	}
+
+	op := ALUOp(ri.Op & opALUOpMask)
+	switch op {
+	case ALUOpAdd, ALUOpSub, ALUOpMul, ALUOpDiv, ALUOpOr, ALUOpAnd, ALUOpShiftLeft, ALUOpShiftRight, ALUOpMod, ALUOpXor:
+	default:
+		return ri
+	}
+	if ri.Op&opALUSrcMask != 0 {
+		return ALUOpX{Op: op}
+	}
+	return ALUOpConstant{Op: op, Val: ri.K}
+}
+
+// LoadConstant loads Val into register Dst.
+type LoadConstant struct {
+	Dst Register
+	Val uint32
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a LoadConstant) Assemble() (RawInstruction, error) {
+	return assembleLoad(a.Dst, 4, opAddrModeImmediate, a.Val)
+}
+
+// LoadScratch loads scratch[N] into register Dst.
+type LoadScratch struct {
+	Dst Register
+	N   int // 0-15
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a LoadScratch) Assemble() (RawInstruction, error) {
+	if a.N < 0 || a.N > 15 {
+		return RawInstruction{}, fmt.Errorf("invalid scratch slot %d", a.N)
+	}
+	return assembleLoad(a.Dst, 4, opAddrModeScratch, uint32(a.N))
+}
+
+// LoadAbsolute loads packet[Off:Off+Size] as an integer value into
+// register A.
+type LoadAbsolute struct {
+	Off  uint32
+	Size int // 1, 2 or 4
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a LoadAbsolute) Assemble() (RawInstruction, error) {
+	return assembleLoad(RegA, a.Size, opAddrModeAbsolute, a.Off)
+}
+
+// LoadIndirect loads packet[X+Off:X+Off+Size] as an integer value
+// into register A.
+type LoadIndirect struct {
+	Off  uint32
+	Size int // 1, 2 or 4
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a LoadIndirect) Assemble() (RawInstruction, error) {
+	return assembleLoad(RegA, a.Size, opAddrModeIndirect, a.Off)
+}
+
+// LoadIPv4HeaderLen loads into register X the length of the IPv4
+// header whose first byte is packet[Off].
+type LoadIPv4HeaderLen struct {
+	Off uint32
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a LoadIPv4HeaderLen) Assemble() (RawInstruction, error) {
+	return assembleLoad(RegX, 1, opAddrModeIPv4HeaderLen, a.Off)
+}
+
+// LoadExtension invokes a linux-specific extension and stores the
+// result in register A.
+type LoadExtension struct {
+	Num Extension
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a LoadExtension) Assemble() (RawInstruction, error) {
+	if a.Num == ExtLen {
+		return assembleLoad(RegA, 4, opAddrModePacketLen, 0)
+	}
+	return assembleLoad(RegA, 4, opAddrModeAbsolute, uint32(-0x1000+a.Num))
+}
+
+// StoreScratch stores register Src into scratch[N].
+type StoreScratch struct {
+	Src Register
+	N   int // 0-15
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a StoreScratch) Assemble() (RawInstruction, error) {
+	if a.N < 0 || a.N > 15 {
+		return RawInstruction{}, fmt.Errorf("invalid scratch slot %d", a.N)
+	}
+	var op uint16
+	switch a.Src {
+	case RegA:
+		op = opClsStoreA
+	case RegX:
+		op = opClsStoreX
+	default:
+		return RawInstruction{}, fmt.Errorf("invalid source register %v", a.Src)
+	}
+
+	return RawInstruction{
+		Op: op,
+		K:  uint32(a.N),
+	}, nil
+}
+
+// ALUOpConstant executes A = A <Op> Val.
+type ALUOpConstant struct {
+	Op  ALUOp
+	Val uint32
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a ALUOpConstant) Assemble() (RawInstruction, error) {
+	return RawInstruction{
+		Op: opClsALU | opALUSrcConstant | uint16(a.Op),
+		K:  a.Val,
+	}, nil
+}
+
+// ALUOpX executes A = A <Op> X
+type ALUOpX struct {
+	Op ALUOp
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a ALUOpX) Assemble() (RawInstruction, error) {
+	return RawInstruction{
+		Op: opClsALU | opALUSrcX | uint16(a.Op),
+	}, nil
+}
+
+// NegateA executes A = -A.
+type NegateA struct{}
+
+// Assemble implements the Instruction Assemble method.
+func (a NegateA) Assemble() (RawInstruction, error) {
+	return RawInstruction{
+		Op: opClsALU | uint16(aluOpNeg),
+	}, nil
+}
+
+// Jump skips the following Skip instructions in the program.
+type Jump struct {
+	Skip uint32
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a Jump) Assemble() (RawInstruction, error) {
+	return RawInstruction{
+		Op: opClsJump | opJumpAlways,
+		K:  a.Skip,
+	}, nil
+}
+
+// JumpIf skips the following Skip instructions in the program if A
+// <Cond> Val is true.
+type JumpIf struct {
+	Cond      JumpTest
+	Val       uint32
+	SkipTrue  uint8
+	SkipFalse uint8
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a JumpIf) Assemble() (RawInstruction, error) {
+	var (
+		cond uint16
+		flip bool
+	)
+	switch a.Cond {
+	case JumpEqual:
+		cond = opJumpEqual
+	case JumpNotEqual:
+		cond, flip = opJumpEqual, true
+	case JumpGreaterThan:
+		cond = opJumpGT
+	case JumpLessThan:
+		cond, flip = opJumpGE, true
+	case JumpGreaterOrEqual:
+		cond = opJumpGE
+	case JumpLessOrEqual:
+		cond, flip = opJumpGT, true
+	case JumpBitsSet:
+		cond = opJumpSet
+	case JumpBitsNotSet:
+		cond, flip = opJumpSet, true
+	default:
+		return RawInstruction{}, fmt.Errorf("unknown JumpTest %v", a.Cond)
+	}
+	jt, jf := a.SkipTrue, a.SkipFalse
+	if flip {
+		jt, jf = jf, jt
+	}
+	return RawInstruction{
+		Op: opClsJump | cond,
+		Jt: jt,
+		Jf: jf,
+		K:  a.Val,
+	}, nil
+}
+
+// RetA exits the BPF program, returning the value of register A.
+type RetA struct{}
+
+// Assemble implements the Instruction Assemble method.
+func (a RetA) Assemble() (RawInstruction, error) {
+	return RawInstruction{
+		Op: opClsReturn | opRetSrcA,
+	}, nil
+}
+
+// RetConstant exits the BPF program, returning a constant value.
+type RetConstant struct {
+	Val uint32
+}
+
+// Assemble implements the Instruction Assemble method.
+func (a RetConstant) Assemble() (RawInstruction, error) {
+	return RawInstruction{
+		Op: opClsReturn | opRetSrcConstant,
+		K:  a.Val,
+	}, nil
+}
+
+// TXA copies the value of register X to register A.
+type TXA struct{}
+
+// Assemble implements the Instruction Assemble method.
+func (a TXA) Assemble() (RawInstruction, error) {
+	return RawInstruction{
+		Op: opClsMisc | opMiscTXA,
+	}, nil
+}
+
+// TAX copies the value of register A to register X.
+type TAX struct{}
+
+// Assemble implements the Instruction Assemble method.
+func (a TAX) Assemble() (RawInstruction, error) {
+	return RawInstruction{
+		Op: opClsMisc | opMiscTAX,
+	}, nil
+}
+
+func assembleLoad(dst Register, loadSize int, mode uint16, k uint32) (RawInstruction, error) {
+	var (
+		cls uint16
+		sz  uint16
+	)
+	switch dst {
+	case RegA:
+		cls = opClsLoadA
+	case RegX:
+		cls = opClsLoadX
+	default:
+		return RawInstruction{}, fmt.Errorf("invalid target register %v", dst)
+	}
+	switch loadSize {
+	case 1:
+		sz = opLoadWidth1
+	case 2:
+		sz = opLoadWidth2
+	case 4:
+		sz = opLoadWidth4
+	default:
+		return RawInstruction{}, fmt.Errorf("invalid load byte length %d", sz)
+	}
+	return RawInstruction{
+		Op: cls | sz | mode,
+		K:  k,
+	}, nil
+}

+ 184 - 0
bpf/instructions_test.go

@@ -0,0 +1,184 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bpf
+
+import (
+	"io/ioutil"
+	"reflect"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+// This is a direct translation of the program in
+// testdata/all_instructions.txt.
+var allInstructions = []Instruction{
+	LoadConstant{Dst: RegA, Val: 42},
+	LoadConstant{Dst: RegX, Val: 42},
+
+	LoadScratch{Dst: RegA, N: 3},
+	LoadScratch{Dst: RegX, N: 3},
+
+	LoadAbsolute{Off: 42, Size: 1},
+	LoadAbsolute{Off: 42, Size: 2},
+	LoadAbsolute{Off: 42, Size: 4},
+
+	LoadIndirect{Off: 42, Size: 1},
+	LoadIndirect{Off: 42, Size: 2},
+	LoadIndirect{Off: 42, Size: 4},
+
+	LoadIPv4HeaderLen{Off: 42},
+
+	LoadExtension{Num: ExtLen},
+	LoadExtension{Num: ExtProto},
+	LoadExtension{Num: ExtType},
+	LoadExtension{Num: ExtRand},
+
+	StoreScratch{Src: RegA, N: 3},
+	StoreScratch{Src: RegX, N: 3},
+
+	ALUOpConstant{Op: ALUOpAdd, Val: 42},
+	ALUOpConstant{Op: ALUOpSub, Val: 42},
+	ALUOpConstant{Op: ALUOpMul, Val: 42},
+	ALUOpConstant{Op: ALUOpDiv, Val: 42},
+	ALUOpConstant{Op: ALUOpOr, Val: 42},
+	ALUOpConstant{Op: ALUOpAnd, Val: 42},
+	ALUOpConstant{Op: ALUOpShiftLeft, Val: 42},
+	ALUOpConstant{Op: ALUOpShiftRight, Val: 42},
+	ALUOpConstant{Op: ALUOpMod, Val: 42},
+	ALUOpConstant{Op: ALUOpXor, Val: 42},
+
+	ALUOpX{Op: ALUOpAdd},
+	ALUOpX{Op: ALUOpSub},
+	ALUOpX{Op: ALUOpMul},
+	ALUOpX{Op: ALUOpDiv},
+	ALUOpX{Op: ALUOpOr},
+	ALUOpX{Op: ALUOpAnd},
+	ALUOpX{Op: ALUOpShiftLeft},
+	ALUOpX{Op: ALUOpShiftRight},
+	ALUOpX{Op: ALUOpMod},
+	ALUOpX{Op: ALUOpXor},
+
+	NegateA{},
+
+	Jump{Skip: 10},
+	JumpIf{Cond: JumpEqual, Val: 42, SkipTrue: 8, SkipFalse: 9},
+	JumpIf{Cond: JumpNotEqual, Val: 42, SkipTrue: 8},
+	JumpIf{Cond: JumpLessThan, Val: 42, SkipTrue: 7},
+	JumpIf{Cond: JumpLessOrEqual, Val: 42, SkipTrue: 6},
+	JumpIf{Cond: JumpGreaterThan, Val: 42, SkipTrue: 4, SkipFalse: 5},
+	JumpIf{Cond: JumpGreaterOrEqual, Val: 42, SkipTrue: 3, SkipFalse: 4},
+	JumpIf{Cond: JumpBitsSet, Val: 42, SkipTrue: 2, SkipFalse: 3},
+
+	TAX{},
+	TXA{},
+
+	RetA{},
+	RetConstant{Val: 42},
+}
+var allInstructionsExpected = "testdata/all_instructions.bpf"
+
+// Check that we produce the same output as the canonical bpf_asm
+// linux kernel tool.
+func TestInterop(t *testing.T) {
+	out, err := Assemble(allInstructions)
+	if err != nil {
+		t.Fatalf("assembly of allInstructions program failed: %s", err)
+	}
+	t.Logf("Assembled program is %d instructions long", len(out))
+
+	bs, err := ioutil.ReadFile(allInstructionsExpected)
+	if err != nil {
+		t.Fatalf("reading %s: %s", allInstructionsExpected, err)
+	}
+	// First statement is the number of statements, last statement is
+	// empty. We just ignore both and rely on slice length.
+	stmts := strings.Split(string(bs), ",")
+	if len(stmts)-2 != len(out) {
+		t.Fatalf("test program lengths don't match: %s has %d, Go implementation has %d", allInstructionsExpected, len(stmts)-2, len(allInstructions))
+	}
+
+	for i, stmt := range stmts[1 : len(stmts)-2] {
+		nums := strings.Split(stmt, " ")
+		if len(nums) != 4 {
+			t.Fatalf("malformed instruction %d in %s: %s", i+1, allInstructionsExpected, stmt)
+		}
+
+		actual := out[i]
+
+		op, err := strconv.ParseUint(nums[0], 10, 16)
+		if err != nil {
+			t.Fatalf("malformed opcode %s in instruction %d of %s", nums[0], i+1, allInstructionsExpected)
+		}
+		if actual.Op != uint16(op) {
+			t.Errorf("opcode mismatch on instruction %d (%#v): got 0x%02x, want 0x%02x", i+1, allInstructions[i], actual.Op, op)
+		}
+
+		jt, err := strconv.ParseUint(nums[1], 10, 8)
+		if err != nil {
+			t.Fatalf("malformed jt offset %s in instruction %d of %s", nums[1], i+1, allInstructionsExpected)
+		}
+		if actual.Jt != uint8(jt) {
+			t.Errorf("jt mismatch on instruction %d (%#v): got %d, want %d", i+1, allInstructions[i], actual.Jt, jt)
+		}
+
+		jf, err := strconv.ParseUint(nums[2], 10, 8)
+		if err != nil {
+			t.Fatalf("malformed jf offset %s in instruction %d of %s", nums[2], i+1, allInstructionsExpected)
+		}
+		if actual.Jf != uint8(jf) {
+			t.Errorf("jf mismatch on instruction %d (%#v): got %d, want %d", i+1, allInstructions[i], actual.Jf, jf)
+		}
+
+		k, err := strconv.ParseUint(nums[3], 10, 32)
+		if err != nil {
+			t.Fatalf("malformed constant %s in instruction %d of %s", nums[3], i+1, allInstructionsExpected)
+		}
+		if actual.K != uint32(k) {
+			t.Errorf("constant mismatch on instruction %d (%#v): got %d, want %d", i+1, allInstructions[i], actual.K, k)
+		}
+	}
+}
+
+// Check that assembly and disassembly match each other.
+//
+// Because we offer "fake" jump conditions that don't appear in the
+// machine code, disassembly won't be a 1:1 match with the original
+// source, although the behavior will be identical. However,
+// reassembling the disassembly should produce an identical program.
+func TestAsmDisasm(t *testing.T) {
+	prog1, err := Assemble(allInstructions)
+	if err != nil {
+		t.Fatalf("assembly of allInstructions program failed: %s", err)
+	}
+	t.Logf("Assembled program is %d instructions long", len(prog1))
+
+	src, allDecoded := Disassemble(prog1)
+	if !allDecoded {
+		t.Errorf("Disassemble(Assemble(allInstructions)) produced unrecognized instructions:")
+		for i, inst := range src {
+			if r, ok := inst.(RawInstruction); ok {
+				t.Logf("  insn %d, %#v --> %#v", i+1, allInstructions[i], r)
+			}
+		}
+	}
+
+	prog2, err := Assemble(src)
+	if err != nil {
+		t.Fatalf("assembly of Disassemble(Assemble(allInstructions)) failed: %s", err)
+	}
+
+	if len(prog2) != len(prog1) {
+		t.Fatalf("disassembly changed program size: %d insns before, %d insns after", len(prog1), len(prog2))
+	}
+	if !reflect.DeepEqual(prog1, prog2) {
+		t.Errorf("program mutated by disassembly:")
+		for i := range prog2 {
+			if !reflect.DeepEqual(prog1[i], prog2[i]) {
+				t.Logf("  insn %d, s: %#v, p1: %#v, p2: %#v", i+1, allInstructions[i], prog1[i], prog2[i])
+			}
+		}
+	}
+}

+ 1 - 0
bpf/testdata/all_instructions.bpf

@@ -0,0 +1 @@
+50,0 0 0 42,1 0 0 42,96 0 0 3,97 0 0 3,48 0 0 42,40 0 0 42,32 0 0 42,80 0 0 42,72 0 0 42,64 0 0 42,177 0 0 42,128 0 0 0,32 0 0 4294963200,32 0 0 4294963204,32 0 0 4294963256,2 0 0 3,3 0 0 3,4 0 0 42,20 0 0 42,36 0 0 42,52 0 0 42,68 0 0 42,84 0 0 42,100 0 0 42,116 0 0 42,148 0 0 42,164 0 0 42,12 0 0 0,28 0 0 0,44 0 0 0,60 0 0 0,76 0 0 0,92 0 0 0,108 0 0 0,124 0 0 0,156 0 0 0,172 0 0 0,132 0 0 0,5 0 0 10,21 8 9 42,21 0 8 42,53 0 7 42,37 0 6 42,37 4 5 42,53 3 4 42,69 2 3 42,7 0 0 0,135 0 0 0,22 0 0 0,6 0 0 0,

+ 79 - 0
bpf/testdata/all_instructions.txt

@@ -0,0 +1,79 @@
+# This filter is compiled to all_instructions.bpf by the `bpf_asm`
+# tool, which can be found in the linux kernel source tree under
+# tools/net.
+
+# Load immediate
+ld #42
+ldx #42
+
+# Load scratch
+ld M[3]
+ldx M[3]
+
+# Load absolute
+ldb [42]
+ldh [42]
+ld [42]
+
+# Load indirect
+ldb [x + 42]
+ldh [x + 42]
+ld [x + 42]
+
+# Load IPv4 header length
+ldx 4*([42]&0xf)
+
+# Run extension function
+ld #len
+ld #proto
+ld #type
+ld #rand
+
+# Store scratch
+st M[3]
+stx M[3]
+
+# A <op> constant
+add #42
+sub #42
+mul #42
+div #42
+or #42
+and #42
+lsh #42
+rsh #42
+mod #42
+xor #42
+
+# A <op> X
+add x
+sub x
+mul x
+div x
+or x
+and x
+lsh x
+rsh x
+mod x
+xor x
+
+# !A
+neg
+
+# Jumps
+ja end
+jeq #42,prev,end
+jne #42,end
+jlt #42,end
+jle #42,end
+jgt #42,prev,end
+jge #42,prev,end
+jset #42,prev,end
+
+# Register transfers
+tax
+txa
+
+# Returns
+prev: ret a
+end: ret #42