package sravni import ( "context" "encoding/json" "fmt" "io" "log/slog" "strconv" "strings" "time" "git.loyso.art/frx/kurious/internal/common/errors" "git.loyso.art/frx/kurious/internal/common/xslices" "git.loyso.art/frx/kurious/pkg/xdefault" "github.com/go-resty/resty/v2" "golang.org/x/net/html" "golang.org/x/net/html/atom" "golang.org/x/time/rate" ) const ( baseURL = "https://www.sravni.ru/kursy" ) //go:generate mockery --name Client type Client interface { GetMainPageState() (*PageState, error) ListEducationalProducts( ctx context.Context, params ListEducationProductsParams, ) (result listEducationProductsResponse, err error) ListEducationalProductsFilterCount( ctx context.Context, params ListEducationProductsParams, ) (result ProductsFilterCount, err error) } func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) { c = &client{ log: log.With(slog.String("client", "sravni")), limiter: rate.NewLimiter(rate.Every(time.Millisecond*100), 1), http: resty.New(). SetBaseURL(baseURL). SetDebug(debug), } c.cachedMainPageInfo, err = c.getMainPageState(ctx) if err != nil { return nil, err } getQuerySet := func(fields []Field) querySet { items := xslices.Map(fields, func(f Field) string { return f.Value }) return newQuerySet(items...) } dicts := c.cachedMainPageInfo.Props.InitialReduxState.Dictionaries.Data c.validLearningTypes = getQuerySet(dicts.LearningType.Fields) c.validCourseThematics = getQuerySet(dicts.CourseThematics.Fields) return c, nil } type client struct { log *slog.Logger http *resty.Client cachedMainPageInfo *PageState validLearningTypes querySet validCourseThematics querySet limiter *rate.Limiter } func (c *client) GetMainPageState() (*PageState, error) { return c.cachedMainPageInfo.Clone(), nil } type ListEducationProductsParams struct { LearningType string CoursesThematics []string CourseGraphics []string CourseLevels []string CourseFormats []string CourseDurations []string CourseTypes []string SortBy string Limit int Offset int } type stringifiedBool string func AsStringifiedBool(b bool) stringifiedBool { return stringifiedBool(strconv.FormatBool(b)) } // FilterLevel is a Уровень сложности type FilterLevel string const ( FilterLevelJunior FilterLevel = "levelJuniorNew" FilterLevelMiddle FilterLevel = "levelMiddleNew" FilterLevelChildren FilterLevel = "levelChildNew" ) // FilterTime is a срок обучения type FilterTime string const ( FilterTimeLessMonth FilterTime = "1" // less than month FilterTimeFrom1To3Month FilterTime = "2" // from month to three month FilterTimeFrom3To6 FilterTime = "3" // from three to six months FilterTimeFrom6To12 FilterTime = "4" // from six to twelve months FilterTimeFrom12 FilterTime = "5" // from twelve months ) // FilterFormat is a Форма обучение type FilterFormat string const ( FilterFormatRecord FilterFormat = "formatRecordNew" FilterFormatOnline FilterFormat = "formatOnlineNew" FilterFormatOffline FilterFormat = "formatOfflineNew" ) // FilterGraphic is a График прохождения type FilterGraphic string const ( FilterGraphicTimeLength FilterGraphic = "courseTimeLengthNew" FilterGraphicTerm FilterGraphic = "courseTimeTermNew" ) type listEducationProductsRequest struct { Fingerprint string `json:"fingerPrint,omitempty"` ProductName string `json:"productName,omitempty"` Location string `json:"location"` OfferTypes []string `json:"offerTypes"` IsMix bool `json:"isMix"` MixRepeated bool `json:"mixRepeated"` Fields []string `json:"fields"` // Filters LearningType []string `json:"learningtype"` CoursesThematics []string `json:"coursesThematics"` Organizations []string `json:"organizations,omitempty"` // list of ids DictionatyFormatFilterNew []FilterFormat `json:"dictionaryFormatFilterNew,omitempty"` DictionaryTimeFilter []FilterTime `json:"dictionaryTimeFilter,omitempty"` DictionaryGraphicFilterNew []FilterGraphic `json:"dictionaryGraphicFilterNew,omitempty"` DictionatyLevelFilterNew []FilterLevel `json:"dictionaryLevelFilterNew,omitempty"` // Options SubMentor []stringifiedBool `json:"sub-mentor,omitempty"` // option with mentor SubTimeFree []stringifiedBool `json:"sub-timeFree,omitempty"` // option with trial SubJobGarantSub []stringifiedBool `json:"sub-jobGarantsub,omitempty"` // option for job garantee SubPriceFree []stringifiedBool `json:"sub-priceFree,omitempty"` // only free SubInstallment []stringifiedBool `json:"sub-installment,omitempty"` // with credit SubIsCourseProfession []stringifiedBool `json:"sub-isCourseProfession,omitempty"` // освоить профессию с нуля DevelopSkills []stringifiedBool `json:"developSkills,omitempty"` // развить навыки NotSubIsWebinar string `json:"not-sub-isWebinar,omitempty"` NotB2B string `json:"not-b2b,omitempty"` AdvertisingOnly bool `json:"advertisingOnly,omitempty"` // Pagination and sorting Limit int `json:"limit"` Offset int `json:"offset"` SortProperty string `json:"sortProperty"` SortDirection string `json:"sortDirection"` } type listEducationProductsResponse struct { Items []Course `json:"items"` Organizations map[string]Organization `json:"organizations"` TotalCount int `json:"totalCount"` TotalCountAdv int `json:"totalCountAdv"` } func (c *client) ListEducationalProducts( ctx context.Context, params ListEducationProductsParams, ) (result listEducationProductsResponse, err error) { const urlPath = "/v1/education/products" const defaultLimit = 1 const defaultSortProp = "advertising.position" const defaultSortDirection = "asc" // TODO: find out should it be settable const productName = "learning-courses" if err = c.checkClientInited(); err != nil { return result, err } if !c.validLearningTypes.hasValue(params.LearningType) { return result, errors.NewValidationError("learning_type", "unknown value") } reqParams := listEducationProductsRequest{ LearningType: valueAsArray(params.LearningType), CoursesThematics: params.CoursesThematics, ProductName: productName, Fields: defaultProductFields, SortProperty: defaultSortProp, // mayber sort by price? SortDirection: defaultSortDirection, NotSubIsWebinar: strconv.FormatBool(true), NotB2B: strconv.FormatBool(true), IsMix: false, // not sure why, but for better parsing MixRepeated: true, // looks like this option should force to exclude duplicates AdvertisingOnly: false, // If true, it will show only paid items. Location: "", // TODO: get and fill location? Fingerprint: "", // not sure it should be set. OfferTypes: []string{}, // for more precise filter but not needed. Limit: xdefault.WithFallback(params.Limit, defaultLimit), Offset: params.Offset, } if err = c.limiter.Wait(ctx); err != nil { return result, fmt.Errorf("waiting for limit: %w", err) } resp, err := c.http.R(). SetBody(reqParams). SetResult(&result). EnableTrace(). Post(c.makeEducationURL(urlPath)) if err != nil { return result, fmt.Errorf("making request: %w", err) } if resp.IsError() { return result, fmt.Errorf("bad status code %d: %w", resp.StatusCode(), errors.ErrUnexpectedStatus) } return result, nil } type educationProductFilterCountRequest struct { Filters educationProductFilter `json:"filters"` } type educationProductFilter struct { AdvertisingOnly bool `json:"advertisingOnly"` Location string `json:"location"` LearningTypes []string `json:"learningTypes"` CoursesThematics []string `json:"coursesThematics"` CourseGraphics []string `json:"courseGraphics"` CourseLevels []string `json:"courseLevels"` CourseFormats []string `json:"courseFormats"` CourseDurations []string `json:"courseDurations"` CourseTypes []string `json:"courseTypes"` } type boolableDict map[int]int type nameableDict map[string]int type ProductsFilterCount struct { IsCourseProfession boolableDict `json:"isCourseProfession"` // 0: count, 1: count eq to false + true CourseLevels nameableDict `json:"courseLevels"` CourseGraphics nameableDict `json:"courseGraphics"` OrganizationIDs nameableDict `json:"organizationIds"` HasTrialPeriod boolableDict `json:"hasTrialPeriod"` HasMentor boolableDict `json:"hasMentor"` HasJobGuarantee boolableDict `json:"hasJobGuarantee"` CourseFormats nameableDict `json:"courseFormats"` CourseDurations nameableDict `json:"courseDurations"` CoursesThematics nameableDict `json:"coursesThematics"` LearningTypes nameableDict `json:"learningTypes"` } func (c *client) ListEducationalProductsFilterCount( ctx context.Context, params ListEducationProductsParams, ) (result ProductsFilterCount, err error) { const urlPath = "/v2/education/products/filter/count" if err = c.checkClientInited(); err != nil { return result, err } if !c.validLearningTypes.hasValue(params.LearningType) { return result, errors.NewValidationError("learning_type", "unknown value") } for _, ct := range params.CoursesThematics { if !c.validCourseThematics.hasValue(ct) { return result, errors.NewValidationError("courses_thematics", "unknown value "+ct) } } reqParams := educationProductFilterCountRequest{ Filters: educationProductFilter{ AdvertisingOnly: false, Location: "", LearningTypes: valueAsArray(params.LearningType), CoursesThematics: params.CoursesThematics, CourseGraphics: params.CourseGraphics, CourseLevels: params.CourseLevels, CourseFormats: params.CourseFormats, CourseDurations: params.CourseDurations, CourseTypes: params.CourseTypes, }, } if err = c.limiter.Wait(ctx); err != nil { return result, fmt.Errorf("waiting for limit: %w", err) } var respData DataContainer[ProductsFilterCount] resp, err := c.http.R(). SetBody(reqParams). SetResult(&respData). EnableTrace(). Post(c.makeEducationURL(urlPath)) if err != nil { return result, fmt.Errorf("making request: %w", err) } if resp.IsError() { return result, fmt.Errorf("bad status code %d: %w", resp.StatusCode(), errors.ErrUnexpectedStatus) } return respData.Data, nil } func (c *client) makeEducationURL(path string) string { if c.cachedMainPageInfo == nil { return "" } return c.cachedMainPageInfo.RuntimeConfig.EducationURL + path } func (c *client) checkClientInited() error { if c.cachedMainPageInfo == nil { return ErrClientNotInited } return nil } func (c *client) getMainPageState(ctx context.Context) (*PageState, error) { ctxLogger := restyCtxLogger{ ctx: ctx, log: c.log, } req := c.http.R(). SetContext(ctx). SetLogger(ctxLogger). SetDoNotParseResponse(true). EnableTrace() resp, err := req.Get("/") if err != nil { return nil, fmt.Errorf("getting request: %w", err) } if resp.IsError() { c.log.ErrorContext(ctx, "unable to proceed request", slog.String("body", string(resp.Body()))) return nil, fmt.Errorf("got %d, but expected success: %w", resp.StatusCode(), errors.ErrUnexpectedStatus) } traceInfo := resp.Request.TraceInfo() c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo)) return c.parsePageState(ctx, resp.RawBody()) } func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) { page, err := html.Parse(body) if err != nil { return nil, fmt.Errorf("parsing body: %w", err) } c.log.DebugContext(ctx, "finding page state") var ( foundHtml bool foundBody bool ) findFunc := func(node *html.Node) (found, deeper bool) { if node == nil { c.log.DebugContext(ctx, "node is null, skipping") return false, false } if !foundHtml && node.Type == html.ElementNode { c.log.DebugContext(ctx, "found html node") foundHtml = true return false, true } if foundHtml && !foundBody && node.DataAtom == atom.Body { c.log.DebugContext(ctx, "found body node") foundBody = true return false, true } if foundHtml && foundBody && node.DataAtom == atom.Script { for _, attr := range node.Attr { if attr.Key == "id" && attr.Val == "__NEXT_DATA__" { c.log.DebugContext(ctx, "found script node with next_data") return true, false } } } return false, false } nextData := findNode(page, findFunc) if nextData == nil { return nil, nil } var out PageState dataReader := strings.NewReader(nextData.FirstChild.Data) err = json.NewDecoder(dataReader).Decode(&out) if err != nil { return nil, fmt.Errorf("decoding html data: %w", err) } return &out, nil } var educationProductFields = newQuerySet( "id", "name", "organization", "advertising", "discount", "link", "learningtype", "dateStart", "timeStart", "timeAllHour", "timeAllDay", "timeAllMonth", "isTermApproximately", "dictionaryFormatFilterNew", "dictionaryLevelFilterNew", "price", "priceAll", "priceInstallment", "courseImage", "price", "withoutDiscountPrice", ) var defaultProductFields = must(educationProductFields.exactSubset( "id", "name", "organization", "advertising", "discount", "link", "learningtype", "dateStart", "timeStart", "timeAllHour", "timeAllDay", "timeAllMonth", "price", "priceAll", "priceInstallment", "courseImage", "price", "withoutDiscountPrice", )) func must[T any](t T, err error) T { if err != nil { panic(err.Error()) } return t } type querySet struct { values []string mappedValues map[string]struct{} } func (qs querySet) Values() []string { out := make([]string, len(qs.values)) copy(out, qs.values) return out } func (qs querySet) hasValue(value string) bool { _, ok := qs.mappedValues[value] return ok } func (qs querySet) exactSubset(values ...string) ([]string, error) { out := make([]string, 0, len(values)) for _, value := range values { if !qs.hasValue(value) { return nil, fmt.Errorf("value %s was not found in set", value) } out = append(out, value) } return out, nil } func newQuerySet(values ...string) querySet { qs := querySet{ values: make([]string, len(values)), mappedValues: make(map[string]struct{}, len(values)), } for i, v := range values { qs.values[i] = v qs.mappedValues[v] = struct{}{} } return qs } func valueAsArray(value string) []string { if value == "" { return nil } return []string{value} }