Home > Enterprise >  add colly package output text to map in golang
add colly package output text to map in golang

Time:11-17

i was making a web scraper with colly package, where it collects the ContestName and ContestTime from a website and make a json file.

so i did like this


    Contests := make(map[string]map[string]map[string]map[string]string)
    
    Contests["AtCoder"] = make(map[string]map[string]map[string]string)
    Contests["AtCoder"]["FutureContests"] = make(map[string]map[string]string)

    AtcoderFunc(Contests)


.................code..........

func AtcoderFunc(Contests map[string]map[string]map[string]map[string]string) {
    collector := colly.NewCollector(
        colly.AllowedDomains("atcoder.jp", "www.atcoder.jp"),
    )

    // loc, _ := time.LoadLocation("Asia/Calcutta")
    // format := "2006-01-02 15:04:05"
    // var i int
    format := "2006-01-02 15:04:05-0700"
    loc, _ := time.LoadLocation("Asia/Calcutta")


    for i := 1; i < 10; i   {
        ContestSelTime := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(1)  a", i 1)
        ContestSelName := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(2)", i)

        // for contest name
        collector.OnHTML(ContestSelName, func(element *colly.HTMLElement) {
            ContestName := element.ChildText("a")
            fmt.Printf("%T \n", ContestName)
            fmt.Println(ContestName) // instead of printing i want to add it to the Contests["AtCoder"]["FutureContests"] map and print like json 
            

        })

        // for contestTime
        collector.OnHTML(ContestSelTime, func(element *colly.HTMLElement) {
            ContestStartTime := element.ChildText("time")
            parsed_time, _ := time.Parse(format, ContestStartTime)
            IST_time := parsed_time.In(loc)
            fmt.Println("Time in IST", IST_time) // instead of printing i want to add it to the Contests["AtCoder"]["FutureContests"] map.
        })

    }

    collector.OnRequest(func(request *colly.Request) {
        fmt.Println("Visiting", request.URL.String())
    })

    collector.Visit("https://atcoder.jp/contests")

}


got any ideas? i tried adding the value to the map like this

            Contests["AtCoder"]["FutureContests"] = map[string]string{
                "Name": string(ContestName),
            }

i want to make json like this

{
  "AtCoder": {
    "FutureContests": {
      "1": {
        "Name": "Contest name",
        "Start": "time here"
      },
      "2": {
        "Name": "Contest name",
        "Start": "time here"
      }
    }
  }
}

but it giving error cannot use (map[string]string literal) (value of type map[string]string) as map[string]map[string]string value in assignment

any idea?

CodePudding user response:

The error was in the map assignment. It's pretty hard to manage a so nested structure but I found a way to successfully deal with it. Let me present the code:

package main

import (
    "encoding/json"
    "fmt"
    "strconv"
    "time"

    "github.com/gocolly/colly/v2"
)

type contest struct{}

func AtcoderFunc(contests map[string]map[string]map[string]string) {
    collector := colly.NewCollector(
        colly.AllowedDomains("atcoder.jp", "www.atcoder.jp"),
    )

    format := "2006-01-02 15:04:05-0700"
    loc, _ := time.LoadLocation("Asia/Calcutta")

    contests["UpcomingContest"] = make(map[string]map[string]string)

    for i := 1; i < 3; i   {
        rawI := strconv.Itoa(i)
        contests["UpcomingContest"][rawI] = make(map[string]string)

        contestSelTime := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(1)  a", i 1)
        contestSelName := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(2)", i)

        // for contest name
        collector.OnHTML(contestSelName, func(element *colly.HTMLElement) {
            contestName := element.ChildText("a")
            contests["UpcomingContest"][rawI]["Name"] = contestName
        })

        // for contestTime
        collector.OnHTML(contestSelTime, func(element *colly.HTMLElement) {
            ContestStartTime := element.ChildText("time")
            parsed_time, _ := time.Parse(format, ContestStartTime)
            IST_time := parsed_time.In(loc)
            contests["UpcomingContest"][rawI]["Time"] = fmt.Sprint(IST_time)
        })
    }

    collector.OnRequest(func(r *colly.Request) {
        fmt.Println("Visiting", r.URL.String())
    })

    collector.Visit("https://atcoder.jp/contests")
}

func main() {
    contests := make(map[string]map[string]map[string]map[string]string)
    contests["AtCoder"] = make(map[string]map[string]map[string]string)

    AtcoderFunc(contests["AtCoder"])

    data, _ := json.MarshalIndent(contests, "", "  ")
    fmt.Println(string(data))
}

More or less I keep your structure. Other than fixing the issue I refactored a little bit your example by changing some names and get rid of unused statements. Lastly, I used the MarshalIndent function to beautify the JSON string printed onto the terminal.
Let me know if works also for you!

  • Related