123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package http2
- import (
- "fmt"
- "math"
- "sort"
- )
- // RFC 7540, Section 5.3.5: the default weight is 16.
- const priorityDefaultWeight = 15 // 16 = 15 + 1
- // PriorityWriteSchedulerConfig configures a priorityWriteScheduler.
- type PriorityWriteSchedulerConfig struct {
- // MaxClosedNodesInTree controls the maximum number of closed streams to
- // retain in the priority tree. Setting this to zero saves a small amount
- // of memory at the cost of performance.
- //
- // See RFC 7540, Section 5.3.4:
- // "It is possible for a stream to become closed while prioritization
- // information ... is in transit. ... This potentially creates suboptimal
- // prioritization, since the stream could be given a priority that is
- // different from what is intended. To avoid these problems, an endpoint
- // SHOULD retain stream prioritization state for a period after streams
- // become closed. The longer state is retained, the lower the chance that
- // streams are assigned incorrect or default priority values."
- MaxClosedNodesInTree int
- // MaxIdleNodesInTree controls the maximum number of idle streams to
- // retain in the priority tree. Setting this to zero saves a small amount
- // of memory at the cost of performance.
- //
- // See RFC 7540, Section 5.3.4:
- // Similarly, streams that are in the "idle" state can be assigned
- // priority or become a parent of other streams. This allows for the
- // creation of a grouping node in the dependency tree, which enables
- // more flexible expressions of priority. Idle streams begin with a
- // default priority (Section 5.3.5).
- MaxIdleNodesInTree int
- // ThrottleOutOfOrderWrites enables write throttling to help ensure that
- // data is delivered in priority order. This works around a race where
- // stream B depends on stream A and both streams are about to call Write
- // to queue DATA frames. If B wins the race, a naive scheduler would eagerly
- // write as much data from B as possible, but this is suboptimal because A
- // is a higher-priority stream. With throttling enabled, we write a small
- // amount of data from B to minimize the amount of bandwidth that B can
- // steal from A.
- ThrottleOutOfOrderWrites bool
- }
- // NewPriorityWriteScheduler constructs a WriteScheduler that schedules
- // frames by following HTTP/2 priorities as described in RFC 7540 Section 5.3.
- // If cfg is nil, default options are used.
- func NewPriorityWriteScheduler(cfg *PriorityWriteSchedulerConfig) WriteScheduler {
- if cfg == nil {
- // For justification of these defaults, see:
- // https://docs.google.com/document/d/1oLhNg1skaWD4_DtaoCxdSRN5erEXrH-KnLrMwEpOtFY
- cfg = &PriorityWriteSchedulerConfig{
- MaxClosedNodesInTree: 10,
- MaxIdleNodesInTree: 10,
- ThrottleOutOfOrderWrites: false,
- }
- }
- ws := &priorityWriteScheduler{
- nodes: make(map[uint32]*priorityNode),
- maxClosedNodesInTree: cfg.MaxClosedNodesInTree,
- maxIdleNodesInTree: cfg.MaxIdleNodesInTree,
- enableWriteThrottle: cfg.ThrottleOutOfOrderWrites,
- }
- ws.nodes[0] = &ws.root
- if cfg.ThrottleOutOfOrderWrites {
- ws.writeThrottleLimit = 1024
- } else {
- ws.writeThrottleLimit = math.MaxInt32
- }
- return ws
- }
- type priorityNodeState int
- const (
- priorityNodeOpen priorityNodeState = iota
- priorityNodeClosed
- priorityNodeIdle
- )
- // priorityNode is a node in an HTTP/2 priority tree.
- // Each node is associated with a single stream ID.
- // See RFC 7540, Section 5.3.
- type priorityNode struct {
- q writeQueue // queue of pending frames to write
- id uint32 // id of the stream, or 0 for the root of the tree
- weight uint8 // the actual weight is weight+1, so the value is in [1,256]
- state priorityNodeState // open | closed | idle
- bytes int64 // number of bytes written by this node, or 0 if closed
- subtreeBytes int64 // sum(node.bytes) of all nodes in this subtree
- // These links form the priority tree.
- parent *priorityNode
- kids *priorityNode // start of the kids list
- prev, next *priorityNode // doubly-linked list of siblings
- }
- func (n *priorityNode) setParent(parent *priorityNode) {
- if n == parent {
- panic("setParent to self")
- }
- if n.parent == parent {
- return
- }
- // Unlink from current parent.
- if parent := n.parent; parent != nil {
- if n.prev == nil {
- parent.kids = n.next
- } else {
- n.prev.next = n.next
- }
- if n.next != nil {
- n.next.prev = n.prev
- }
- }
- // Link to new parent.
- // If parent=nil, remove n from the tree.
- // Always insert at the head of parent.kids (this is assumed by walkReadyInOrder).
- n.parent = parent
- if parent == nil {
- n.next = nil
- n.prev = nil
- } else {
- n.next = parent.kids
- n.prev = nil
- if n.next != nil {
- n.next.prev = n
- }
- parent.kids = n
- }
- }
- func (n *priorityNode) addBytes(b int64) {
- n.bytes += b
- for ; n != nil; n = n.parent {
- n.subtreeBytes += b
- }
- }
- // walkReadyInOrder iterates over the tree in priority order, calling f for each node
- // with a non-empty write queue. When f returns true, this function returns true and the
- // walk halts. tmp is used as scratch space for sorting.
- //
- // f(n, openParent) takes two arguments: the node to visit, n, and a bool that is true
- // if any ancestor p of n is still open (ignoring the root node).
- func (n *priorityNode) walkReadyInOrder(openParent bool, tmp *[]*priorityNode, f func(*priorityNode, bool) bool) bool {
- if !n.q.empty() && f(n, openParent) {
- return true
- }
- if n.kids == nil {
- return false
- }
- // Don't consider the root "open" when updating openParent since
- // we can't send data frames on the root stream (only control frames).
- if n.id != 0 {
- openParent = openParent || (n.state == priorityNodeOpen)
- }
- // Common case: only one kid or all kids have the same weight.
- // Some clients don't use weights; other clients (like web browsers)
- // use mostly-linear priority trees.
- w := n.kids.weight
- needSort := false
- for k := n.kids.next; k != nil; k = k.next {
- if k.weight != w {
- needSort = true
- break
- }
- }
- if !needSort {
- for k := n.kids; k != nil; k = k.next {
- if k.walkReadyInOrder(openParent, tmp, f) {
- return true
- }
- }
- return false
- }
- // Uncommon case: sort the child nodes. We remove the kids from the parent,
- // then re-insert after sorting so we can reuse tmp for future sort calls.
- *tmp = (*tmp)[:0]
- for n.kids != nil {
- *tmp = append(*tmp, n.kids)
- n.kids.setParent(nil)
- }
- sort.Sort(sortPriorityNodeSiblings(*tmp))
- for i := len(*tmp) - 1; i >= 0; i-- {
- (*tmp)[i].setParent(n) // setParent inserts at the head of n.kids
- }
- for k := n.kids; k != nil; k = k.next {
- if k.walkReadyInOrder(openParent, tmp, f) {
- return true
- }
- }
- return false
- }
- type sortPriorityNodeSiblings []*priorityNode
- func (z sortPriorityNodeSiblings) Len() int { return len(z) }
- func (z sortPriorityNodeSiblings) Swap(i, k int) { z[i], z[k] = z[k], z[i] }
- func (z sortPriorityNodeSiblings) Less(i, k int) bool {
- // Prefer the subtree that has sent fewer bytes relative to its weight.
- // See sections 5.3.2 and 5.3.4.
- wi, bi := float64(z[i].weight+1), float64(z[i].subtreeBytes)
- wk, bk := float64(z[k].weight+1), float64(z[k].subtreeBytes)
- if bi == 0 && bk == 0 {
- return wi >= wk
- }
- if bk == 0 {
- return false
- }
- return bi/bk <= wi/wk
- }
- type priorityWriteScheduler struct {
- // root is the root of the priority tree, where root.id = 0.
- // The root queues control frames that are not associated with any stream.
- root priorityNode
- // nodes maps stream ids to priority tree nodes.
- nodes map[uint32]*priorityNode
- // maxID is the maximum stream id in nodes.
- maxID uint32
- // lists of nodes that have been closed or are idle, but are kept in
- // the tree for improved prioritization. When the lengths exceed either
- // maxClosedNodesInTree or maxIdleNodesInTree, old nodes are discarded.
- closedNodes, idleNodes []*priorityNode
- // From the config.
- maxClosedNodesInTree int
- maxIdleNodesInTree int
- writeThrottleLimit int32
- enableWriteThrottle bool
- // tmp is scratch space for priorityNode.walkReadyInOrder to reduce allocations.
- tmp []*priorityNode
- // pool of empty queues for reuse.
- queuePool writeQueuePool
- }
- func (ws *priorityWriteScheduler) OpenStream(streamID uint32, options OpenStreamOptions) {
- // The stream may be currently idle but cannot be opened or closed.
- if curr := ws.nodes[streamID]; curr != nil {
- if curr.state != priorityNodeIdle {
- panic(fmt.Sprintf("stream %d already opened", streamID))
- }
- curr.state = priorityNodeOpen
- return
- }
- // RFC 7540, Section 5.3.5:
- // "All streams are initially assigned a non-exclusive dependency on stream 0x0.
- // Pushed streams initially depend on their associated stream. In both cases,
- // streams are assigned a default weight of 16."
- parent := ws.nodes[options.PusherID]
- if parent == nil {
- parent = &ws.root
- }
- n := &priorityNode{
- q: *ws.queuePool.get(),
- id: streamID,
- weight: priorityDefaultWeight,
- state: priorityNodeOpen,
- }
- n.setParent(parent)
- ws.nodes[streamID] = n
- if streamID > ws.maxID {
- ws.maxID = streamID
- }
- }
- func (ws *priorityWriteScheduler) CloseStream(streamID uint32) {
- if streamID == 0 {
- panic("violation of WriteScheduler interface: cannot close stream 0")
- }
- if ws.nodes[streamID] == nil {
- panic(fmt.Sprintf("violation of WriteScheduler interface: unknown stream %d", streamID))
- }
- if ws.nodes[streamID].state != priorityNodeOpen {
- panic(fmt.Sprintf("violation of WriteScheduler interface: stream %d already closed", streamID))
- }
- n := ws.nodes[streamID]
- n.state = priorityNodeClosed
- n.addBytes(-n.bytes)
- q := n.q
- ws.queuePool.put(&q)
- n.q.s = nil
- if ws.maxClosedNodesInTree > 0 {
- ws.addClosedOrIdleNode(&ws.closedNodes, ws.maxClosedNodesInTree, n)
- } else {
- ws.removeNode(n)
- }
- }
- func (ws *priorityWriteScheduler) AdjustStream(streamID uint32, priority PriorityParam) {
- if streamID == 0 {
- panic("adjustPriority on root")
- }
- // If streamID does not exist, there are two cases:
- // - A closed stream that has been removed (this will have ID <= maxID)
- // - An idle stream that is being used for "grouping" (this will have ID > maxID)
- n := ws.nodes[streamID]
- if n == nil {
- if streamID <= ws.maxID || ws.maxIdleNodesInTree == 0 {
- return
- }
- ws.maxID = streamID
- n = &priorityNode{
- q: *ws.queuePool.get(),
- id: streamID,
- weight: priorityDefaultWeight,
- state: priorityNodeIdle,
- }
- n.setParent(&ws.root)
- ws.nodes[streamID] = n
- ws.addClosedOrIdleNode(&ws.idleNodes, ws.maxIdleNodesInTree, n)
- }
- // Section 5.3.1: A dependency on a stream that is not currently in the tree
- // results in that stream being given a default priority (Section 5.3.5).
- parent := ws.nodes[priority.StreamDep]
- if parent == nil {
- n.setParent(&ws.root)
- n.weight = priorityDefaultWeight
- return
- }
- // Ignore if the client tries to make a node its own parent.
- if n == parent {
- return
- }
- // Section 5.3.3:
- // "If a stream is made dependent on one of its own dependencies, the
- // formerly dependent stream is first moved to be dependent on the
- // reprioritized stream's previous parent. The moved dependency retains
- // its weight."
- //
- // That is: if parent depends on n, move parent to depend on n.parent.
- for x := parent.parent; x != nil; x = x.parent {
- if x == n {
- parent.setParent(n.parent)
- break
- }
- }
- // Section 5.3.3: The exclusive flag causes the stream to become the sole
- // dependency of its parent stream, causing other dependencies to become
- // dependent on the exclusive stream.
- if priority.Exclusive {
- k := parent.kids
- for k != nil {
- next := k.next
- if k != n {
- k.setParent(n)
- }
- k = next
- }
- }
- n.setParent(parent)
- n.weight = priority.Weight
- }
- func (ws *priorityWriteScheduler) Push(wr FrameWriteRequest) {
- var n *priorityNode
- if id := wr.StreamID(); id == 0 {
- n = &ws.root
- } else {
- n = ws.nodes[id]
- if n == nil {
- // id is an idle or closed stream. wr should not be a HEADERS or
- // DATA frame. However, wr can be a RST_STREAM. In this case, we
- // push wr onto the root, rather than creating a new priorityNode,
- // since RST_STREAM is tiny and the stream's priority is unknown
- // anyway. See issue #17919.
- if wr.DataSize() > 0 {
- panic("add DATA on non-open stream")
- }
- n = &ws.root
- }
- }
- n.q.push(wr)
- }
- func (ws *priorityWriteScheduler) Pop() (wr FrameWriteRequest, ok bool) {
- ws.root.walkReadyInOrder(false, &ws.tmp, func(n *priorityNode, openParent bool) bool {
- limit := int32(math.MaxInt32)
- if openParent {
- limit = ws.writeThrottleLimit
- }
- wr, ok = n.q.consume(limit)
- if !ok {
- return false
- }
- n.addBytes(int64(wr.DataSize()))
- // If B depends on A and B continuously has data available but A
- // does not, gradually increase the throttling limit to allow B to
- // steal more and more bandwidth from A.
- if openParent {
- ws.writeThrottleLimit += 1024
- if ws.writeThrottleLimit < 0 {
- ws.writeThrottleLimit = math.MaxInt32
- }
- } else if ws.enableWriteThrottle {
- ws.writeThrottleLimit = 1024
- }
- return true
- })
- return wr, ok
- }
- func (ws *priorityWriteScheduler) addClosedOrIdleNode(list *[]*priorityNode, maxSize int, n *priorityNode) {
- if maxSize == 0 {
- return
- }
- if len(*list) == maxSize {
- // Remove the oldest node, then shift left.
- ws.removeNode((*list)[0])
- x := (*list)[1:]
- copy(*list, x)
- *list = (*list)[:len(x)]
- }
- *list = append(*list, n)
- }
- func (ws *priorityWriteScheduler) removeNode(n *priorityNode) {
- for k := n.kids; k != nil; k = k.next {
- k.setParent(n.parent)
- }
- n.setParent(nil)
- delete(ws.nodes, n.id)
- }
|