I am new to the language GO and working on an assignment where i should write a code that return the word frequencies of the text. However I know that the words 'Hello', 'HELLO' and 'hello' are all counted as 'hello', so I need to convert all strings to lower case.
I know that I should use strings.ToLower(), however I dont know where I should Included that in the class. Can someone please help me?
package main
import (
"fmt"
"io/ioutil"
"log"
"strings"
"time"
)
const DataFile = "loremipsum.txt"
// Return the word frequencies of the text argument.
func WordCount(text string) map[string]int {
fregs := make(map[string]int)
words := strings.Fields(text)
for _, word := range words {
fregs[word] = 1
}
return fregs
}
// Benchmark how long it takes to count word frequencies in text numRuns times.
//
// Return the total time elapsed.
func benchmark(text string, numRuns int) int64 {
start := time.Now()
for i := 0; i < numRuns; i {
WordCount(text)
}
runtimeMillis := time.Since(start).Nanoseconds() / 1e6
return runtimeMillis
}
// Print the results of a benchmark
func printResults(runtimeMillis int64, numRuns int) {
fmt.Printf("amount of runs: %d\n", numRuns)
fmt.Printf("total time: %d ms\n", runtimeMillis)
average := float64(runtimeMillis) / float64(numRuns)
fmt.Printf("average time/run: %.2f ms\n", average)
}
func main() {
// read in DataFile as a string called data
data, err:= ioutil.ReadFile("loremipsum.txt")
if err != nil {
log.Fatal(err)
}
// Convert []byte to string and print to screen
text := string(data)
fmt.Println(text)
fmt.Printf("%#v",WordCount(string(data)))
numRuns := 100
runtimeMillis := benchmark(string(data), numRuns)
printResults(runtimeMillis, numRuns)
}
CodePudding user response:
You should convert words to lowercase when you are using them as map key
for _, word := range words {
fregs[strings.ToLower(word)] = 1
}
CodePudding user response:
I get [a:822 a.:110 I want all a in the same. How do i a change the code so that a and a. is the same? – hello123
You need to carefully define a word. For example, a string of consecutive letters and numbers converted to lowercase.
func WordCount(s string) map[string]int {
wordFunc := func(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
}
counts := make(map[string]int)
for _, word := range strings.FieldsFunc(s, wordFunc) {
counts[strings.ToLower(word)]
}
return counts
}
CodePudding user response:
to remove all non-word characters you could use a regular expression:
package main
import (
"bufio"
"fmt"
"log"
"regexp"
"strings"
)
func main() {
str1 := "This is some text! I want to count each word. Is it cool?"
re, err := regexp.Compile(`[^\w]`)
if err != nil {
log.Fatal(err)
}
str1 = re.ReplaceAllString(str1, " ")
scanner := bufio.NewScanner(strings.NewReader(str1))
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
fmt.Println(strings.ToLower(scanner.Text()))
}
}