I am learning Go using their "A Tour of Go". I managed to do all exercises but the last one has me frustrated. It is dying with fatal error: all goroutines are asleep - deadlock!
package main
import (
"fmt"
"sync"
)
type Fetcher interface {
// Fetch returns the body of URL and
// a slice of URLs found on that page.
Fetch(url string) (body string, urls []string, err error)
}
var urlCache = make(map[string]bool)
var mutex = sync.Mutex{}
// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
func Crawl(url string, depth int, fetcher Fetcher, wg *sync.WaitGroup) {
defer wg.Done()
fmt.Printf("Crawl: %v \n", url)
if depth <= 0 {
fmt.Println("Crawl: reached depth")
return
}
mutex.Lock()
if alreadyFetched := urlCache[url]; alreadyFetched {
fmt.Printf("Crawl: %v already fetched\n", url)
return
}
urlCache[url] = true
mutex.Unlock()
body, urls, err := fetcher.Fetch(url)
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("Crawl: found %s %q\n", url, body)
for _, u := range urls {
wg.Add(1)
go Crawl(u, depth-1, fetcher, wg)
}
return
}
func main() {
var wg sync.WaitGroup
fmt.Println("Main: Starting worker")
wg.Add(1)
go Crawl("https://golang.org/", 4, fetcher, &wg)
fmt.Println("Main: Waiting for workers to finish")
fmt.Println("Main: Completed")
wg.Wait()
}
// fakeFetcher is Fetcher that returns canned results.
type fakeFetcher map[string]*fakeResult
type fakeResult struct {
body string
urls []string
}
func (f fakeFetcher) Fetch(url string) (string, []string, error) {
if res, ok := f[url]; ok {
return res.body, res.urls, nil
}
return "", nil, fmt.Errorf("not found: %s", url)
}
// fetcher is a populated fakeFetcher.
var fetcher = fakeFetcher{
"https://golang.org/": &fakeResult{
"The Go Programming Language",
[]string{
"https://golang.org/pkg/",
"https://golang.org/cmd/",
},
},
"https://golang.org/pkg/": &fakeResult{
"Packages",
[]string{
"https://golang.org/",
"https://golang.org/cmd/",
"https://golang.org/pkg/fmt/",
"https://golang.org/pkg/os/",
},
},
"https://golang.org/pkg/fmt/": &fakeResult{
"Package fmt",
[]string{
"https://golang.org/",
"https://golang.org/pkg/",
},
},
"https://golang.org/pkg/os/": &fakeResult{
"Package os",
[]string{
"https://golang.org/",
"https://golang.org/pkg/",
},
},
}
Any tips and help will be greatly appreciated. Thank you.
Edit1: In-lined code
CodePudding user response:
Here:
mutex.Lock()
if alreadyFetched := urlCache[url]; alreadyFetched {
fmt.Printf("Crawl: %v already fetched\n", url)
return
}
urlCache[url] = true
mutex.Unlock()
When the if
condition is true, you return without unlocking the shared mutex.
So eventually the other goroutines will deadlock on mutex.Lock()
, because the goroutine which acquired it never released.
Call mutex.Unlock()
also in the if
block before returning.
You could also use defer mutex.Unlock()
right after locking, and before the if
statement, and in a trivial application this won't make an appreciable difference, but in a real-world scenario you want to keep the resource locked for the shortest time possible. If you have a function body with other long-running operations, unlocking immediately after the if
is acceptable. But then you must remember to release the lock if the if
can return control flow to the caller.