378 lines
8.6 KiB
Go
378 lines
8.6 KiB
Go
package sravni
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"git.loyso.art/frx/kurious/internal/domain"
|
|
"git.loyso.art/frx/kurious/pkg/utilities/slices"
|
|
|
|
"github.com/go-resty/resty/v2"
|
|
"golang.org/x/net/html"
|
|
"golang.org/x/net/html/atom"
|
|
)
|
|
|
|
const (
|
|
baseURL = "https://www.sravni.ru/kursy"
|
|
)
|
|
|
|
type Client interface {
|
|
GetMainPageState() *PageState
|
|
ListEducationalProducts(
|
|
ctx context.Context,
|
|
params ListEducationProductsParams,
|
|
) (result ListEducationProductsResponse, err error)
|
|
}
|
|
|
|
func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) {
|
|
c = &client{
|
|
log: log.With(slog.String("client", "sravni")),
|
|
http: resty.New().
|
|
SetBaseURL(baseURL).
|
|
SetDebug(debug),
|
|
}
|
|
|
|
c.cachedMainPageInfo, err = c.getMainPageState(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
getQuerySet := func(fields []field) querySet {
|
|
items := slices.Map(fields, func(f field) string {
|
|
return f.Value
|
|
})
|
|
|
|
return newQuerySet(items...)
|
|
}
|
|
|
|
dicts := c.cachedMainPageInfo.Props.InitialReduxState.Dictionaries.Data
|
|
c.validLearningTypes = getQuerySet(dicts.LearningType.Fields)
|
|
c.validCourseThematics = getQuerySet(dicts.CourseThematics.Fields)
|
|
|
|
return c, nil
|
|
}
|
|
|
|
type client struct {
|
|
log *slog.Logger
|
|
http *resty.Client
|
|
|
|
cachedMainPageInfo *PageState
|
|
validLearningTypes querySet
|
|
validCourseThematics querySet
|
|
}
|
|
|
|
func (c *client) GetMainPageState() *PageState {
|
|
return c.cachedMainPageInfo.Clone()
|
|
}
|
|
|
|
type ListEducationProductsParams struct {
|
|
LearningType string
|
|
CoursesThematics string
|
|
|
|
Limit int
|
|
Offset int
|
|
}
|
|
|
|
type ListEducationProductsRequest struct {
|
|
Fingerprint string `json:"fingerPrint,omitempty"`
|
|
ProductName string `json:"productName,omitempty"`
|
|
AdvertisingOnly bool `json:"advertisingOnly"`
|
|
Location string `json:"location"`
|
|
OfferTypes []string `json:"offerTypes"`
|
|
IsMix bool `json:"isMix"`
|
|
MixRepeated bool `json:"mixRepeated"`
|
|
Fields []string `json:"fields"`
|
|
SortProperty string `json:"sortProperty"`
|
|
SortDirection string `json:"sortDirection"`
|
|
LearningType []string `json:"learningtype"`
|
|
CoursesThematics []string `json:"coursesThematics"`
|
|
NotSubIsWebinar string `json:"not-sub-isWebinar"`
|
|
NotB2B string `json:"not-b2b"`
|
|
|
|
Limit int `json:"limit"`
|
|
Offset int `json:"offset"`
|
|
}
|
|
|
|
type ListEducationProductsResponse struct {
|
|
Items []Course `json:"items"`
|
|
Organizations map[string]Organization `json:"organizations"`
|
|
|
|
TotalCount int `json:"totalCount"`
|
|
TotalCountAdv int `json:"totalCountAdv"`
|
|
}
|
|
|
|
func (c *client) ListEducationalProducts(
|
|
ctx context.Context,
|
|
params ListEducationProductsParams,
|
|
) (result ListEducationProductsResponse, err error) {
|
|
const urlPath = "/v1/education/products"
|
|
const defaultLimit = 1
|
|
const defaultSortProp = "advertising.position"
|
|
const defaultSortDirection = "asc"
|
|
if err = c.checkClientInited(); err != nil {
|
|
return result, err
|
|
}
|
|
|
|
if !c.validLearningTypes.hasValue(params.LearningType) {
|
|
return result, domain.NewValidationError("learning_type", "bad value")
|
|
}
|
|
if !c.validCourseThematics.hasValue(params.CoursesThematics) {
|
|
return result, domain.NewValidationError("courses_thematics", "bad value")
|
|
}
|
|
|
|
reqParams := ListEducationProductsRequest{
|
|
LearningType: []string{
|
|
params.LearningType,
|
|
},
|
|
CoursesThematics: []string{
|
|
params.CoursesThematics,
|
|
},
|
|
|
|
Fields: defaultProductFields,
|
|
SortProperty: defaultSortProp, // mayber sort by price?
|
|
SortDirection: defaultSortDirection,
|
|
NotSubIsWebinar: strconv.FormatBool(true),
|
|
NotB2B: strconv.FormatBool(true),
|
|
IsMix: true, // not sure why, but for better parsing
|
|
MixRepeated: true, // looks like this option should force to exclude duplicates
|
|
AdvertisingOnly: false, // If true, it will show only paid items.
|
|
Location: "", // TODO: get and fill location?
|
|
Fingerprint: "", // not sure it should be set.
|
|
ProductName: "", // looks like it does not affects anything
|
|
OfferTypes: nil, // for more precise filter but not needed.
|
|
|
|
Limit: defaultLimit,
|
|
Offset: 0,
|
|
}
|
|
|
|
req := c.http.R().
|
|
SetBody(reqParams).
|
|
SetResult(&result).
|
|
EnableTrace()
|
|
|
|
resp, err := req.Post(c.makeEducationURL(urlPath))
|
|
if err != nil {
|
|
return result, fmt.Errorf("making request: %w", err)
|
|
}
|
|
|
|
if resp.IsError() {
|
|
return result, fmt.Errorf("bad status code %d: %w", resp.StatusCode(), domain.ErrUnexpectedStatus)
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
func (c *client) makeEducationURL(path string) string {
|
|
if c.cachedMainPageInfo == nil {
|
|
return ""
|
|
}
|
|
|
|
return c.cachedMainPageInfo.RuntimeConfig.EducationURL + path
|
|
}
|
|
|
|
func (c *client) checkClientInited() error {
|
|
if c.cachedMainPageInfo == nil {
|
|
return ErrClientNotInited
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *client) getMainPageState(ctx context.Context) (*PageState, error) {
|
|
ctxLogger := restyCtxLogger{
|
|
ctx: ctx,
|
|
log: c.log,
|
|
}
|
|
|
|
req := c.http.R().
|
|
SetContext(ctx).
|
|
SetLogger(ctxLogger).
|
|
SetDoNotParseResponse(true).
|
|
EnableTrace()
|
|
|
|
resp, err := req.Get("/")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getting request: %w", err)
|
|
}
|
|
|
|
if resp.IsError() {
|
|
c.log.ErrorContext(ctx, "unable to proceed request", slog.String("body", string(resp.Body())))
|
|
return nil, fmt.Errorf("got %d, but expected success: %w", resp.StatusCode(), domain.ErrUnexpectedStatus)
|
|
}
|
|
|
|
traceInfo := resp.Request.TraceInfo()
|
|
c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo))
|
|
|
|
return c.parsePageState(ctx, resp.RawBody())
|
|
}
|
|
|
|
func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) {
|
|
page, err := html.Parse(body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parsing body: %w", err)
|
|
}
|
|
|
|
c.log.DebugContext(ctx, "finding page state")
|
|
|
|
var (
|
|
foundHtml bool
|
|
foundBody bool
|
|
)
|
|
findFunc := func(node *html.Node) (found, deeper bool) {
|
|
if node == nil {
|
|
c.log.DebugContext(ctx, "node is null, skipping")
|
|
return false, false
|
|
}
|
|
|
|
if !foundHtml && node.Type == html.ElementNode {
|
|
c.log.DebugContext(ctx, "found html node")
|
|
foundHtml = true
|
|
return false, true
|
|
}
|
|
|
|
if foundHtml && !foundBody && node.DataAtom == atom.Body {
|
|
c.log.DebugContext(ctx, "found body node")
|
|
foundBody = true
|
|
return false, true
|
|
}
|
|
|
|
if foundHtml && foundBody && node.DataAtom == atom.Script {
|
|
for _, attr := range node.Attr {
|
|
if attr.Key == "id" && attr.Val == "__NEXT_DATA__" {
|
|
c.log.DebugContext(ctx, "found script node with next_data")
|
|
return true, false
|
|
}
|
|
}
|
|
}
|
|
|
|
return false, false
|
|
}
|
|
|
|
nextData := findNode(page, findFunc)
|
|
if nextData == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
var out PageState
|
|
dataReader := strings.NewReader(nextData.FirstChild.Data)
|
|
err = json.NewDecoder(dataReader).Decode(&out)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("decoding html data: %w", err)
|
|
}
|
|
return &out, nil
|
|
}
|
|
|
|
var educationProductFields = newQuerySet(
|
|
"id",
|
|
"name",
|
|
"organization",
|
|
"advertising",
|
|
"discount",
|
|
"link",
|
|
"learningtype",
|
|
"dateStart",
|
|
"timeStart",
|
|
"timeAllHour",
|
|
"timeAllDay",
|
|
"timeAllMonth",
|
|
"isTermApproximately",
|
|
"dictionaryFormatFilterNew",
|
|
"dictionaryLevelFilterNew",
|
|
"price",
|
|
"priceAll",
|
|
"priceInstallment",
|
|
"courseImage",
|
|
"price",
|
|
"withoutDiscountPrice",
|
|
)
|
|
|
|
var defaultProductFields = must(educationProductFields.exactSubset(
|
|
"id",
|
|
"name",
|
|
"organization",
|
|
"advertising",
|
|
"discount",
|
|
"link",
|
|
"learningtype",
|
|
"dateStart",
|
|
"timeStart",
|
|
"timeAllHour",
|
|
"timeAllDay",
|
|
"timeAllMonth",
|
|
"price",
|
|
"priceAll",
|
|
"priceInstallment",
|
|
"courseImage",
|
|
"price",
|
|
"withoutDiscountPrice",
|
|
))
|
|
|
|
func must[T any](t T, err error) T {
|
|
if err != nil {
|
|
panic(err.Error())
|
|
}
|
|
|
|
return t
|
|
}
|
|
|
|
type querySet struct {
|
|
values []string
|
|
mappedValues map[string]struct{}
|
|
}
|
|
|
|
func (qs querySet) Values() []string {
|
|
out := make([]string, len(qs.values))
|
|
copy(out, qs.values)
|
|
|
|
return out
|
|
}
|
|
|
|
func (qs querySet) hasValue(value string) bool {
|
|
_, ok := qs.mappedValues[value]
|
|
return ok
|
|
}
|
|
|
|
func (qs querySet) exactSubset(values ...string) ([]string, error) {
|
|
out := make([]string, 0, len(values))
|
|
for _, value := range values {
|
|
if !qs.hasValue(value) {
|
|
return nil, fmt.Errorf("value %s was not found in set", value)
|
|
}
|
|
|
|
out = append(out, value)
|
|
}
|
|
|
|
return out, nil
|
|
|
|
}
|
|
|
|
// func (qs querySet) subset(values ...string) []string {
|
|
// out := make([]string, 0, len(values))
|
|
// for _, value := range values {
|
|
// if qs.hasValue(value) {
|
|
// out = append(out, value)
|
|
// }
|
|
// }
|
|
//
|
|
// return out
|
|
// }
|
|
|
|
func newQuerySet(values ...string) querySet {
|
|
qs := querySet{
|
|
values: make([]string, len(values)),
|
|
mappedValues: make(map[string]struct{}, len(values)),
|
|
}
|
|
|
|
for i, v := range values {
|
|
qs.values[i] = v
|
|
qs.mappedValues[v] = struct{}{}
|
|
}
|
|
|
|
return qs
|
|
}
|