Go: Performance Hit of Waiting for Multiple Channels-CodePudding

I discovered something today that threw me for a bit of a loop, and I wanted to run it by the community to see if I'm missing something, or perhaps just designing things poorly.

Use case: I have an input channel, and I want a Go routine to wait for values on that channel. If the context is canceled, exit out. Optionally, also run a callback if it's been waiting a certain amount of time for an input without receiving one.

I started with code like this:

func myRoutine(ctx context.Context, c <-chan int, callbackInterval *time.Duration, callback func() error) {
    var timeoutChan <-chan time.Time
    var timer *time.Timer
    if callbackInterval != nil {
        // If we have a callback interval set, create a timer for it
        timer = time.NewTimer(*callbackInterval)
        timeoutChan = timer.C
    } else {
        // If we don't have a callback interval set, create
        // a channel that will never provide a value.
        timeoutChan = make(<-chan time.Time, 0)
    }

    for {
        select {

        // Handle context cancellation
        case <-ctx.Done():
            return

        // Handle timeouts
        case <-timeoutChan:
            callback()

        // Handle a value in the channel
        case v, ok := <-c:
            if !ok {
                // Channel is closed, exit out
                return
            }

            // Do something with v
            fmt.Println(v)
        }

        // Reset the timeout timer, if there is one
        if timer != nil {
            if !timer.Stop() {
                // See documentation for timer.Stop() for why this is needed
                <-timer.C
            }
            // Reset the timer
            timer.Reset(*callbackInterval)
            timeoutChan = timer.C
        }
    }
}

This design seemed nice because there's no way (as far as I can tell) to have a conditional case in a select (I think it's possible with reflect, but that's usually super slow), so instead of having two different selects (one with the timer when a timer is needed, one without) and an if to select between them, I just did one select where the timer channel never provides a value if a timer isn't desired. Keep it DRY.

But then I started wondering about the performance impacts of this. When we're not using a timer, would it slow down the application to have this extra channel in the select that never gets a value (in place of the timer channel)?

So, I decided to do some testing to compare.

package main

import (
    "context"
    "fmt"
    "reflect"
    "time"
)

func prepareChan() chan int {
    var count int = 10000000

    c := make(chan int, count)

    for i := 0; i < count; i   {
        c <- i
    }
    close(c)
    return c
}

func oneChan() int64 {
    c := prepareChan()

    foundVal := true
    start := time.Now()
    for {
        select {
        case _, foundVal = <-c:
            break
        }
        if !foundVal {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("1 Chan - Standard: %dms\n", ms)
    return ms
}

func twoChan() int64 {
    c := prepareChan()

    neverchan1 := make(chan struct{}, 0)

    foundVal := true
    start := time.Now()
    for {
        select {
        case _, foundVal = <-c:
            break
        case <-neverchan1:
            break
        }
        if !foundVal {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("2 Chan - Standard: %dms\n", ms)
    return ms
}

func threeChan() int64 {
    c := prepareChan()

    neverchan1 := make(chan struct{}, 0)
    neverchan2 := make(chan struct{}, 0)

    foundVal := true
    start := time.Now()
    for {
        select {
        case _, foundVal = <-c:
            break
        case <-neverchan1:
            break
        case <-neverchan2:
            break
        }
        if !foundVal {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("3 Chan - Standard: %dms\n", ms)
    return ms
}

func fourChan() int64 {
    c := prepareChan()

    neverchan1 := make(chan struct{}, 0)
    neverchan2 := make(chan struct{}, 0)
    neverchan3 := make(chan struct{}, 0)

    foundVal := true
    start := time.Now()
    for {
        select {
        case _, foundVal = <-c:
            break
        case <-neverchan1:
            break
        case <-neverchan2:
            break
        case <-neverchan3:
            break
        }
        if !foundVal {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("4 Chan - Standard: %dms\n", ms)
    return ms
}

func oneChanReflect() int64 {
    c := reflect.ValueOf(prepareChan())

    branches := []reflect.SelectCase{
        {Dir: reflect.SelectRecv, Chan: c, Send: reflect.Value{}},
    }

    start := time.Now()
    for {
        _, _, recvOK := reflect.Select(branches)
        if !recvOK {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("1 Chan - Reflect: %dms\n", ms)
    return ms
}

func twoChanReflect() int64 {
    c := reflect.ValueOf(prepareChan())
    neverchan1 := reflect.ValueOf(make(chan struct{}, 0))

    branches := []reflect.SelectCase{
        {Dir: reflect.SelectRecv, Chan: c, Send: reflect.Value{}},
        {Dir: reflect.SelectRecv, Chan: neverchan1, Send: reflect.Value{}},
    }

    start := time.Now()
    for {
        _, _, recvOK := reflect.Select(branches)
        if !recvOK {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("2 Chan - Reflect: %dms\n", ms)
    return ms
}

func threeChanReflect() int64 {
    c := reflect.ValueOf(prepareChan())
    neverchan1 := reflect.ValueOf(make(chan struct{}, 0))
    neverchan2 := reflect.ValueOf(make(chan struct{}, 0))

    branches := []reflect.SelectCase{
        {Dir: reflect.SelectRecv, Chan: c, Send: reflect.Value{}},
        {Dir: reflect.SelectRecv, Chan: neverchan1, Send: reflect.Value{}},
        {Dir: reflect.SelectRecv, Chan: neverchan2, Send: reflect.Value{}},
    }

    start := time.Now()
    for {
        _, _, recvOK := reflect.Select(branches)
        if !recvOK {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("3 Chan - Reflect: %dms\n", ms)
    return ms
}

func fourChanReflect() int64 {
    c := reflect.ValueOf(prepareChan())
    neverchan1 := reflect.ValueOf(make(chan struct{}, 0))
    neverchan2 := reflect.ValueOf(make(chan struct{}, 0))
    neverchan3 := reflect.ValueOf(make(chan struct{}, 0))

    branches := []reflect.SelectCase{
        {Dir: reflect.SelectRecv, Chan: c, Send: reflect.Value{}},
        {Dir: reflect.SelectRecv, Chan: neverchan1, Send: reflect.Value{}},
        {Dir: reflect.SelectRecv, Chan: neverchan2, Send: reflect.Value{}},
        {Dir: reflect.SelectRecv, Chan: neverchan3, Send: reflect.Value{}},
    }

    start := time.Now()
    for {
        _, _, recvOK := reflect.Select(branches)
        if !recvOK {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("4 Chan - Reflect: %dms\n", ms)
    return ms
}

func main() {
    oneChan()
    oneChanReflect()
    twoChan()
    twoChanReflect()
    threeChan()
    threeChanReflect()
    fourChan()
    fourChanReflect()
}

And the results:

1 Chan - Standard: 169ms
1 Chan - Reflect: 1017ms
2 Chan - Standard: 460ms
2 Chan - Reflect: 1593ms
3 Chan - Standard: 682ms
3 Chan - Reflect: 2041ms
4 Chan - Standard: 950ms
4 Chan - Reflect: 2423ms

It scales linearly with the number of channels. In hindsight, I suppose this makes sense, as it must be doing a fast loop to poll each channel to see if it has a value? As expected, using reflect is far slower.

In any case, my questions are:

Does this result surprise anyone else? I would have expected it would use a interrupt-based design that would allow it to maintain the same performance regardless of the number of channels in the select, as it wouldn't need to poll each channel.
Given the original problem that I was trying to solve (an "optional" case in a select), what would be the best/preferred design? Is the answer just to have two different selects, one with the timer and one without? That gets awfully messy when I have 2 or 3 conditional/optional timers for various things.

EDIT: @Brits suggested using a nil channel for "never returning a value" instead of an initialized channel, i.e. using var neverchan1 chan struct{} instead of neverchan1 := make(chan struct{}, 0). Here are the new performance results:

1 Chan - Standard: 221ms
1 Chan - Reflect: 1639ms
2 Chan - Standard: 362ms
2 Chan - Reflect: 2544ms
3 Chan - Standard: 376ms
3 Chan - Reflect: 3359ms
4 Chan - Standard: 394ms
4 Chan - Reflect: 4123ms

There's still an effect, most noticeably from one channel in the select to two, but after the second one the performance impact is much smaller than with an initialized channel.

Still wondering if this is the best possible solution though...

CodePudding user response：

As per the comments an alternative to using select with a channel that "channel never provides a value" is to use a nil channel ("never ready for communication"). Replacing neverchan1 := make(chan struct{}, 0) with var neverchan1 chan struct{} (or neverchan1 := chan struct{}(nil)) as per the following example:

func twoChan() int64 {
    c := prepareChan()

    var neverchan1 chan struct{} // was neverchan1 := make(chan struct{}, 0)

    foundVal := true
    start := time.Now()
    for {
        select {
        case _, foundVal = <-c:
            break
        case <-neverchan1:
            break
        }
        if !foundVal {
            break
        }
    }
    ms := time.Since(start).Milliseconds()
    fmt.Printf("2 Chan - Standard: %dms\n", ms)
    return ms
}

This significantly narrows the gap (using 4 channel version as the difference is greater - my machine is a bit slower than yours):

4 Chan - Standard: 1281ms
4 Chan - Nil: 394ms

is the best possible solution

No; but that would probably involve some assembler! There are a number things you could do that may improve on this (here are a few very rough examples); however their effectiveness is going to depend on a range of factors (real life vs contrived test case performance often differs significantly).

At this point I would be asking "what impact is optimising this function going to have on the overall application?"; unless saving a few ns will make a material difference (i.e. improve profit!) I'd suggest stopping at this point until you:

Have confirmed that there is an issue to address
Can profile the code in a realistic scenario (also, in general, I feel that "readability beats speed").