package sravni import ( "context" "encoding/json" "fmt" "io" "log/slog" "strconv" "strings" "git.loyso.art/frx/kurious/internal/common/errors" "git.loyso.art/frx/kurious/pkg/slices" "github.com/go-resty/resty/v2" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) const ( baseURL = "https://www.sravni.ru/kursy" ) //go:generate mockery --name Client type Client interface { GetMainPageState() (*PageState, error) ListEducationalProducts( ctx context.Context, params ListEducationProductsParams, ) (result ListEducationProductsResponse, err error) } func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) { c = &client{ log: log.With(slog.String("client", "sravni")), http: resty.New(). SetBaseURL(baseURL). SetDebug(debug), } c.cachedMainPageInfo, err = c.getMainPageState(ctx) if err != nil { return nil, err } getQuerySet := func(fields []field) querySet { items := slices.Map(fields, func(f field) string { return f.Value }) return newQuerySet(items...) } dicts := c.cachedMainPageInfo.Props.InitialReduxState.Dictionaries.Data c.validLearningTypes = getQuerySet(dicts.LearningType.Fields) c.validCourseThematics = getQuerySet(dicts.CourseThematics.Fields) return c, nil } type client struct { log *slog.Logger http *resty.Client cachedMainPageInfo *PageState validLearningTypes querySet validCourseThematics querySet } func (c *client) GetMainPageState() (*PageState, error) { return c.cachedMainPageInfo.Clone(), nil } type ListEducationProductsParams struct { LearningType string CoursesThematics string Limit int Offset int } type ListEducationProductsRequest struct { Fingerprint string `json:"fingerPrint,omitempty"` ProductName string `json:"productName,omitempty"` AdvertisingOnly bool `json:"advertisingOnly"` Location string `json:"location"` OfferTypes []string `json:"offerTypes"` IsMix bool `json:"isMix"` MixRepeated bool `json:"mixRepeated"` Fields []string `json:"fields"` SortProperty string `json:"sortProperty"` SortDirection string `json:"sortDirection"` LearningType []string `json:"learningtype"` CoursesThematics []string `json:"coursesThematics"` NotSubIsWebinar string `json:"not-sub-isWebinar"` NotB2B string `json:"not-b2b"` Limit int `json:"limit"` Offset int `json:"offset"` } type ListEducationProductsResponse struct { Items []Course `json:"items"` Organizations map[string]Organization `json:"organizations"` TotalCount int `json:"totalCount"` TotalCountAdv int `json:"totalCountAdv"` } func (c *client) ListEducationalProducts( ctx context.Context, params ListEducationProductsParams, ) (result ListEducationProductsResponse, err error) { const urlPath = "/v1/education/products" const defaultLimit = 1 const defaultSortProp = "advertising.position" const defaultSortDirection = "asc" if err = c.checkClientInited(); err != nil { return result, err } if !c.validLearningTypes.hasValue(params.LearningType) { return result, errors.NewValidationError("learning_type", "bad value") } if !c.validCourseThematics.hasValue(params.CoursesThematics) { return result, errors.NewValidationError("courses_thematics", "bad value") } reqParams := ListEducationProductsRequest{ LearningType: []string{ params.LearningType, }, CoursesThematics: []string{ params.CoursesThematics, }, Fields: defaultProductFields, SortProperty: defaultSortProp, // mayber sort by price? SortDirection: defaultSortDirection, NotSubIsWebinar: strconv.FormatBool(true), NotB2B: strconv.FormatBool(true), IsMix: true, // not sure why, but for better parsing MixRepeated: true, // looks like this option should force to exclude duplicates AdvertisingOnly: false, // If true, it will show only paid items. Location: "", // TODO: get and fill location? Fingerprint: "", // not sure it should be set. ProductName: "", // looks like it does not affects anything OfferTypes: nil, // for more precise filter but not needed. Limit: defaultLimit, Offset: 0, } req := c.http.R(). SetBody(reqParams). SetResult(&result). EnableTrace() resp, err := req.Post(c.makeEducationURL(urlPath)) if err != nil { return result, fmt.Errorf("making request: %w", err) } if resp.IsError() { return result, fmt.Errorf("bad status code %d: %w", resp.StatusCode(), errors.ErrUnexpectedStatus) } return result, nil } func (c *client) makeEducationURL(path string) string { if c.cachedMainPageInfo == nil { return "" } return c.cachedMainPageInfo.RuntimeConfig.EducationURL + path } func (c *client) checkClientInited() error { if c.cachedMainPageInfo == nil { return ErrClientNotInited } return nil } func (c *client) getMainPageState(ctx context.Context) (*PageState, error) { ctxLogger := restyCtxLogger{ ctx: ctx, log: c.log, } req := c.http.R(). SetContext(ctx). SetLogger(ctxLogger). SetDoNotParseResponse(true). EnableTrace() resp, err := req.Get("/") if err != nil { return nil, fmt.Errorf("getting request: %w", err) } if resp.IsError() { c.log.ErrorContext(ctx, "unable to proceed request", slog.String("body", string(resp.Body()))) return nil, fmt.Errorf("got %d, but expected success: %w", resp.StatusCode(), errors.ErrUnexpectedStatus) } traceInfo := resp.Request.TraceInfo() c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo)) return c.parsePageState(ctx, resp.RawBody()) } func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) { page, err := html.Parse(body) if err != nil { return nil, fmt.Errorf("parsing body: %w", err) } c.log.DebugContext(ctx, "finding page state") var ( foundHtml bool foundBody bool ) findFunc := func(node *html.Node) (found, deeper bool) { if node == nil { c.log.DebugContext(ctx, "node is null, skipping") return false, false } if !foundHtml && node.Type == html.ElementNode { c.log.DebugContext(ctx, "found html node") foundHtml = true return false, true } if foundHtml && !foundBody && node.DataAtom == atom.Body { c.log.DebugContext(ctx, "found body node") foundBody = true return false, true } if foundHtml && foundBody && node.DataAtom == atom.Script { for _, attr := range node.Attr { if attr.Key == "id" && attr.Val == "__NEXT_DATA__" { c.log.DebugContext(ctx, "found script node with next_data") return true, false } } } return false, false } nextData := findNode(page, findFunc) if nextData == nil { return nil, nil } var out PageState dataReader := strings.NewReader(nextData.FirstChild.Data) err = json.NewDecoder(dataReader).Decode(&out) if err != nil { return nil, fmt.Errorf("decoding html data: %w", err) } return &out, nil } var educationProductFields = newQuerySet( "id", "name", "organization", "advertising", "discount", "link", "learningtype", "dateStart", "timeStart", "timeAllHour", "timeAllDay", "timeAllMonth", "isTermApproximately", "dictionaryFormatFilterNew", "dictionaryLevelFilterNew", "price", "priceAll", "priceInstallment", "courseImage", "price", "withoutDiscountPrice", ) var defaultProductFields = must(educationProductFields.exactSubset( "id", "name", "organization", "advertising", "discount", "link", "learningtype", "dateStart", "timeStart", "timeAllHour", "timeAllDay", "timeAllMonth", "price", "priceAll", "priceInstallment", "courseImage", "price", "withoutDiscountPrice", )) func must[T any](t T, err error) T { if err != nil { panic(err.Error()) } return t } type querySet struct { values []string mappedValues map[string]struct{} } func (qs querySet) Values() []string { out := make([]string, len(qs.values)) copy(out, qs.values) return out } func (qs querySet) hasValue(value string) bool { _, ok := qs.mappedValues[value] return ok } func (qs querySet) exactSubset(values ...string) ([]string, error) { out := make([]string, 0, len(values)) for _, value := range values { if !qs.hasValue(value) { return nil, fmt.Errorf("value %s was not found in set", value) } out = append(out, value) } return out, nil } // func (qs querySet) subset(values ...string) []string { // out := make([]string, 0, len(values)) // for _, value := range values { // if qs.hasValue(value) { // out = append(out, value) // } // } // // return out // } func newQuerySet(values ...string) querySet { qs := querySet{ values: make([]string, len(values)), mappedValues: make(map[string]struct{}, len(values)), } for i, v := range values { qs.values[i] = v qs.mappedValues[v] = struct{}{} } return qs }