You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

45 lines
1.1 KiB
Go

package scrapeutils
import (
"context"
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/imroc/req/v3"
)
const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.0.0"
var ReqClient = req.NewClient().
DisableAutoReadResponse().
EnableInsecureSkipVerify().
SetUserAgent(userAgent)
func GetHTML(ctx context.Context, url string) (string, error) {
res, err := ReqClient.R().SetContext(ctx).SetRetryCount(3).Get(url)
if err != nil {
return "", fmt.Errorf("failed to get url: %w", err)
}
html, err := res.ToString()
if err != nil {
return "", fmt.Errorf("failed to read response body: %w", err)
}
return html, nil
}
func GetParsed(ctx context.Context, url string) (*goquery.Document, error) {
html, err := GetHTML(ctx, url)
if err != nil {
return nil, fmt.Errorf("failed to get html: %w", err)
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
return nil, fmt.Errorf("failed to parse html: %w", err)
}
return doc, nil
}
type Fetcher struct {
}