func main() {
//switch statement here that runs grabusernames()
}
func grabusernames() {
f, err := os.OpenFile("longlist.txt", os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
rd := bufio.NewReader(f)
for {
line, err := rd.ReadString('\n')
line2 := strings.TrimSpace(line)
if err != nil {
if err == io.EOF {
break
}
log.Fatalf("read file line error: %v", err)
return
}
tellonym(line2)
}
}
func tellonym(line2 string) {
threads := 10
swg := sizedwaitgroup.New(threads)
for i := 0; i < 1000; i {
swg.Add()
go func(i int) {
defer swg.Done()
var client http.Client
resp, err := client.Get("https://tellonym.me/" line2)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
//fmt.Println("Response code: ", resp.StatusCode)
if resp.StatusCode == 404 {
fmt.Println("Username" line2 "not taken")
} else if resp.StatusCode == 200 {
fmt.Println("username " line2 " taken")
} else {
fmt.Println("Something else, response code: ", resp.StatusCode)
}
}(i)
}
The issue with the code above is that it checks the same username 1,000 times I'd like it to check each username in the longlist.txt once, but I want to concurrently do it ( it's a long list and I'd like it to be fast
Current output:
Username causenot taken
Username causenot taken
Username causenot taken
Username causenot taken
Desired output:
Username causenot taken
Username billybob taken
Username something taken
Username stacker taken
CodePudding user response:
You have to use goroutines in tellonym(line2) function. In your for loop you are using same username with 1,000 times.
func main() {
//switch statement here that runs grabusernames()
}
func grabusernames() {
f, err := os.OpenFile("longlist.txt", os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
rd := bufio.NewReader(f)
for {
line, err := rd.ReadString('\n')
line2 := strings.TrimSpace(line)
if err != nil {
if err == io.EOF {
break
}
log.Fatalf("read file line error: %v", err)
return
}
go tellonym(line2) // use go routines in here
}
}
Also take care about this details:
if you're reading from io.Reader consider it as reading from the stream. It's the single input source, which you can't 'read in parallel' because of it's nature - under the hood, you're getting byte, waiting for another one, getting one more and so on. Tokenizing it in words comes later, in buffer.
Second, I hope you're not trying to use goroutines as a 'silver bullet' in a 'let's add gouroutines and everything will just speed up' manner. If Go gives you such an easy way to use concurrency, it doesn't mean you should use it everywhere.
And finally, if you really need to split huge file into words in parallel and you think that splitting part will be the bottleneck (don't know your case, but I really doubt that) - then you have to invent your own algorithm and use 'os' package to Seek()/Read() parts of the file, each processed by it's own gouroutine and track somehow which parts were already processed.
CodePudding user response:
Try this
func grabusernames() {
f, err := os.OpenFile("longlist.txt", os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
rd := bufio.NewReader(f)
ch := make(chan struct{}, 10)
var sem sync.WaitGroup
for {
line, err := rd.ReadString('\n')
line2 := strings.TrimSpace(line)
if err != nil {
if err == io.EOF {
break
}
log.Fatalf("read file line error: %v", err)
return
}
ch <- struct{}{}
sem.Add(1)
go tellonym(line2, ch, &sem)
}
sem.Wait()
}
func tellonym(line2 string, ch chan struct{}, sem *sync.WaitGroup) {
defer func() {
sem.Done()
<-ch
}()
var client http.Client
resp, err := client.Get("https://tellonym.me/" line2)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
//fmt.Println("Response code: ", resp.StatusCode)
if resp.StatusCode == 404 {
fmt.Println("Username" line2 "not taken")
} else if resp.StatusCode == 200 {
fmt.Println("username " line2 " taken")
} else {
fmt.Println("Something else, response code: ", resp.StatusCode)
}
}