use goquery instead of colly

This commit is contained in:
Aleksandr Trushkin
2024-02-06 19:14:55 +03:00
parent 60e9de0275
commit 2352ebb942
3 changed files with 110 additions and 112 deletions

View File

@ -18,8 +18,8 @@ import (
"git.loyso.art/frx/eway/internal/crypto"
"git.loyso.art/frx/eway/internal/entity"
"github.com/PuerkitoBio/goquery"
"github.com/go-resty/resty/v2"
"github.com/gocolly/colly"
"github.com/rs/zerolog"
)
@ -191,12 +191,12 @@ func (c *client) GetGoodsRemnants(
productsStr = append(productsStr, strconv.Itoa(sku))
}
resp, err := c.http.R().
req := c.http.R().
SetFormData(map[string]string{
"products": strings.Join(productsStr, ","),
}).
SetDoNotParseResponse(true).
Post("/goods_remnants")
SetDoNotParseResponse(true)
resp, err := c.do(ctx, "GetGoodsRemnants", req, resty.MethodPost, "/goods_remnants")
if err != nil {
return nil, fmt.Errorf("getting goods new: %w", err)
}
@ -230,22 +230,36 @@ func (c *client) GetGoodsNew(
params GetGoodsNewParams,
) (items []entity.GoodsItemRaw, total int, err error) {
var response getGoodsNewResponse
resp, err := c.http.R().
SetFormData(map[string]string{
"draw": strconv.Itoa(params.Draw),
"start": strconv.Itoa(params.Start),
"length": strconv.Itoa(params.Length),
"order[0][column]": "14",
"order[0][dir]": "desc",
"search[value]": "",
"search[regex]": "false",
"search_in_stocks": "on",
"remnants_atleast": "5",
}).
formData := map[string]string{
"draw": strconv.Itoa(params.Draw),
"start": strconv.Itoa(params.Start),
"length": strconv.Itoa(params.Length),
"order[0][column]": "14",
"order[0][dir]": "desc",
"search[value]": "",
"search[regex]": "false",
}
if params.SearchInStocks {
stocksNum := strconv.Itoa(params.RemmantsAtleast)
formData["search_in_stocks"] = "on"
formData["remnants_atleast"] = stocksNum
}
c.log.Debug().
Int("remnants", params.RemmantsAtleast).
Bool("search_in_stocks", params.SearchInStocks).
Int("draw", params.Draw).
Int("start", params.Start).
Int("length", params.Length).
Msg("sending request")
req := c.http.R().
SetFormData(formData).
SetQueryParam("category_id", "0").
SetQueryParam("own", c.ownerID). // user id?
SetDoNotParseResponse(true).
Post("/goods_new")
SetDoNotParseResponse(true)
resp, err := c.do(ctx, "GetGoodsNew", req, resty.MethodPost, "/goods_new")
if err != nil {
return nil, -1, fmt.Errorf("getting goods new: %w", err)
}
@ -255,7 +269,6 @@ func (c *client) GetGoodsNew(
c.log.Error().Err(err).Msg("unable to close body")
}
}()
if resp.IsError() {
return nil, -1, errors.New("request was not successful")
}
@ -269,12 +282,14 @@ func (c *client) GetGoodsNew(
}
func (c *client) login(ctx context.Context, user, pass string) error {
resp, err := c.http.R().
req := c.http.R().
SetDoNotParseResponse(true).
SetFormData(map[string]string{
"username": user,
"password": pass,
}).Post("https://eway.elevel.ru/")
})
resp, err := c.do(ctx, "login", req, resty.MethodPost, "https://eway.elevel.ru/")
if err != nil {
return fmt.Errorf("sending request: %w", err)
}
@ -288,14 +303,27 @@ func (c *client) login(ctx context.Context, user, pass string) error {
return nil
}
type ProductInfo struct {
ImageLinks []string
Parameters map[string]string
}
func (c *client) do(ctx context.Context, name string, req *resty.Request, method string, url string) (resp *resty.Response, err error) {
resp, err = req.
EnableTrace().
Execute(method, url)
type parameterSelector struct {
Name string `selector:"div"`
Value string `selector:"div.text-right"`
traceInfo := resp.Request.TraceInfo()
c.log.Debug().
Str("name", name).
Str("path", url).
Str("method", method).
Float64("elapsed", traceInfo.TotalTime.Seconds()).
Float64("response_time", traceInfo.ResponseTime.Seconds()).
Int("attempt", traceInfo.RequestAttempt).
Bool("success", resp.IsSuccess()).
Msg("request processed")
if err != nil {
return nil, fmt.Errorf("executing request: %w", err)
}
return resp, err
}
func (c *client) GetProductInfo(ctx context.Context, cart int64) (pi entity.GoodsItemInfo, err error) {
@ -317,69 +345,73 @@ func (c *client) GetProductInfo(ctx context.Context, cart int64) (pi entity.Good
}
}
func (c *client) getProductInfo(ctx context.Context, cart int64) (pi entity.GoodsItemInfo, err error) {
collector := colly.NewCollector(
colly.AllowedDomains("eway.elevel.ru"),
colly.AllowURLRevisit(),
)
func (c *client) getProductInfo(ctx context.Context, cartID int64) (pi entity.GoodsItemInfo, err error) {
reqpath := "https://eway.elevel.ru/product/" + strconv.Itoa(int(cartID)) + "/"
req := c.http.R().SetDoNotParseResponse(true).AddRetryCondition(func(r *resty.Response, err error) bool {
if r.Request.Attempt > 3 {
return false
}
return strings.Contains(err.Error(), "pipe")
})
c.log.Debug().Msg("using go query")
pi.Parameters = map[string]string{}
start := time.Now()
resp, err := c.do(ctx, "getProductInfo", req, resty.MethodGet, reqpath)
if err != nil {
return pi, fmt.Errorf("getting product info: %w", err)
}
defer func() {
elapsed := time.Since(start).Seconds()
c.log.Info().
Float64("elapsed", elapsed).
Int64("cart", cart).
Msg("request processed")
errClose := resp.RawBody().Close()
if errClose == nil {
return
}
if err == nil {
err = errClose
return
}
c.log.Warn().Err(errClose).Msg("unable to close body")
}()
if resp.IsError() {
return pi, errors.New("request was not successful")
}
collector.OnHTML("body > div.page-container > div.page-content > div.content-wrapper > div.content > div.row > div.col-md-4 > div > div > div:nth-child(6)", func(e *colly.HTMLElement) {
e.ForEach("div.display-flex", func(i int, h *colly.HTMLElement) {
var s parameterSelector
err = h.Unmarshal(&s)
if err != nil {
c.log.Warn().Err(err).Msg("unable to unmarshal")
return
}
doc, err := goquery.NewDocumentFromReader(resp.RawBody())
if err != nil {
return pi, fmt.Errorf("makind new document: %w", err)
}
if s.Name == "" || s.Value == "" {
c.log.Warn().Msg("got empty key or value, skipping")
return
}
cleanText := func(t string) string {
return strings.TrimSuffix(strings.TrimSpace(t), ":")
}
pi.Parameters[s.Name] = s.Value
const parametersSelector = "body > div.page-container > div.page-content > div.content-wrapper > div.content > div.row > div.col-md-4 > div > div > div:nth-child(6)"
const parametersInnerNode = "div.display-flex"
doc.
Find(parametersSelector).
Find(parametersInnerNode).
Each(func(i int, s *goquery.Selection) {
name := cleanText(s.Find("div").Eq(0).Text())
value := cleanText(s.Find("div.text-right").Text())
pi.Parameters[name] = value
})
})
collector.OnHTML("div.gallery_panel", func(h *colly.HTMLElement) {
h.ForEach("div.gallery_thumbnail > img", func(i int, h *colly.HTMLElement) {
imageURL := h.Attr("src")
if imageURL == "" {
const galleryPanelSelector = "div.gallery_panel"
const galleryImageSelector = "div.gallery_thumbnail > img"
doc.
Find(galleryPanelSelector).
Find(galleryImageSelector).
Each(func(i int, s *goquery.Selection) {
imageURL, ok := s.Attr("src")
if !ok || len(imageURL) == 0 {
return
}
pi.PhotoURLs = append(pi.PhotoURLs, imageURL)
})
})
for i := 0; i < 3; i++ {
err = collector.Visit("https://eway.elevel.ru/product/" + strconv.Itoa(int(cart)) + "/")
if err != nil {
c.log.Warn().Err(err).Msg("unable to visit site, retrying...")
select {
case <-time.After(time.Second * 2):
continue
case <-ctx.Done():
return pi, ctx.Err()
}
}
break
}
if err != nil {
return pi, fmt.Errorf("visiting site: %w", err)
}
return pi, nil
}