Reading in a webpage

package main

import (
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
)

func main() {

	resp, err := http.Get("https://www.w3schools.com/html/html_tables.asp")
	if err != nil {
		log.Fatal(err)
	}

	defer resp.Body.Close()

	// Read the response body and convert it to a string
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Fatal(err)
	}

	html := string(body)
	fmt.Println(html)

}


Install goquery

go get github.com/PuerkitoBio/goquery 


Now Read in the response body into goquery

doc, err := goquery.NewDocumentFromReader(resp.Body)

if err != nil {

    log.Fatal(err)

}


Using Find:

doc.Find("table").Each(func(i int, sel * goquery.Selection) {

    // For sake of simplicity taking the first table of the page
    if i == 0 {

        // Looping through headers
        headers: = sel.Find("th").Each(func(_ int, sel * goquery.Selection) {
            if sel != nil {
                fmt.Print(sel.Text())
                fmt.Print(" ")
            }
        })
        fmt.Println()

        // Looping through cells
        sel.Find("td").Each(func(index int, sel * goquery.Selection) {
            if sel != nil {
                fmt.Print(sel.Text())
                fmt.Print(" ")
            }

            // Printing columns nicely
            if (index + 1) % headers.Size() == 0 {
                fmt.Println()
            }
        })
    }
})



References

  • No labels