Home > OS >  How can I destruct this in Golang?
How can I destruct this in Golang?

Time:07-08

It may be a stupid question because I just learned Golang. I hope you understand.

I am making a program to extract data from the homepage using the goquery package:

package main

import (
    "fmt"
    "log"
    "net/http"

    "github.com/PuerkitoBio/goquery"
)

var url string = "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=3"

func main() {
    getPages()
}

func getPages() int {
    res, err := http.Get(url)
    checkErr(err)
    checkCode(res)

    defer res.Body.Close()

    doc, err := goquery.NewDocumentFromReader(res.Body)
    checkErr(err)

    doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
        fmt.Println(s.Find("a"))
    })

    return 0
}

func checkErr(err error) {
    if err != nil {
        log.Fatalln(err)
        fmt.Println(err)
    }
}

func checkCode(res *http.Response) {
    if res.StatusCode != 200 {
        log.Fatalln("Request failed with statusCode:", res.StatusCode)
    }
}

It prints below:

&{[0x140002db0a0 0x140002db570 0x140002db810 0x140002dbd50 0x140002dc000 0x140002dc2a0 0x140002dc540 0x140002dc850] 0x140000b2438 0x14000305680}
&{[0x140002dcd90 0x140002dd810] 0x140000b2438 0x14000305710}

But I just want to print only the first array out. Like this:

[0x140002dcd90 0x140002dd810]

How can I destruct them?

CodePudding user response:

The problem is that you are printing as result is matched.

You can save the *goquery.Selection in a new slice and print only the last element. This example is working because you want the last occurrence, but in real life you must parse the query result for something in specific to not depend about result order.

// type Selection struct {
//  Nodes    []*html.Node
//  document *Document
//  prevSel  *Selection
// }

var temp []*goquery.Selection

temp = append(temp, doc.Find(".tplPagination").Each(func(i int, s *goquery.Selection) {
    s.Find("a")
}))

fmt.Printf("last: %v\n", temp[len(temp)-1])
temp[len(temp)-1]: &{[0xc0002dcd90 0xc0002e0a80] 0xc00000e3f0 0xc000309770}

The Nodes []*html.Node can be accessed with same example:

fmt.Printf("last: %v\n", temp[len(temp)-1].Nodes)

CodePudding user response:

As per your comment you were looking to parse the page and get the number of pages and number of posts. Here is my attempt:

package main

import (
    "fmt"
    "github.com/PuerkitoBio/goquery"
    "log"
    "math"
    "net/http"
    "strconv"
    "strings"
)

func errCheck(err error) {
    if err != nil {
        log.Fatal(err)
    }
}

func ExampleScrape() {
    url := "https://www.jobkorea.co.kr/Search/?stext=golang&tabType=recruit&Page_No=%s"
    page := 3
    fmt.Println("Current page:", page)

    res, err := http.Get(fmt.Sprintf(url, page))
    errCheck(err)

    defer res.Body.Close()

    if res.StatusCode != 200 {
        log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
    }

    doc, err := goquery.NewDocumentFromReader(res.Body)
    errCheck(err)

    posts_div := doc.Find(".recruit-info div.dev_list.lists-cnt")
    total_count_div := posts_div.Nodes[0]

    var total_count int
    for _, a := range total_count_div.Attr {
        if a.Key == "total-count" {
            total_count, err = strconv.Atoi(a.Val)
            errCheck(err)
            break
        }
    }
    fmt.Println("Total count:", total_count)

    titles := posts_div.Find(".list-post .title")
    fmt.Println("On this page:", len(titles.Nodes))
    fmt.Println("Pages:", math.Ceil(float64(total_count)/float64(len(titles.Nodes))))

    fmt.Println("\nTitles on this page:")
    titles.Each(func(i int, s *goquery.Selection) {
        fmt.Println("\t-", strings.TrimSpace(s.Text()))
    })
}

func main() {
    ExampleScrape()
}
  • Related