542 lines
15 KiB
Go
542 lines
15 KiB
Go
package sravni
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"io"
|
||
"log/slog"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
|
||
"git.loyso.art/frx/kurious/internal/common/errors"
|
||
"git.loyso.art/frx/kurious/pkg/slices"
|
||
"git.loyso.art/frx/kurious/pkg/xdefault"
|
||
|
||
"github.com/go-resty/resty/v2"
|
||
"golang.org/x/net/html"
|
||
"golang.org/x/net/html/atom"
|
||
"golang.org/x/time/rate"
|
||
)
|
||
|
||
const (
|
||
baseURL = "https://www.sravni.ru/kursy"
|
||
)
|
||
|
||
//go:generate mockery --name Client
|
||
type Client interface {
|
||
GetMainPageState() (*PageState, error)
|
||
|
||
ListEducationalProducts(
|
||
ctx context.Context,
|
||
params ListEducationProductsParams,
|
||
) (result listEducationProductsResponse, err error)
|
||
ListEducationalProductsFilterCount(
|
||
ctx context.Context,
|
||
params ListEducationProductsParams,
|
||
) (result ProductsFilterCount, err error)
|
||
}
|
||
|
||
func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) {
|
||
c = &client{
|
||
log: log.With(slog.String("client", "sravni")),
|
||
limiter: rate.NewLimiter(rate.Every(time.Millisecond*100), 1),
|
||
http: resty.New().
|
||
SetBaseURL(baseURL).
|
||
SetDebug(debug),
|
||
}
|
||
|
||
c.cachedMainPageInfo, err = c.getMainPageState(ctx)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
getQuerySet := func(fields []field) querySet {
|
||
items := slices.Map(fields, func(f field) string {
|
||
return f.Value
|
||
})
|
||
|
||
return newQuerySet(items...)
|
||
}
|
||
|
||
dicts := c.cachedMainPageInfo.Props.InitialReduxState.Dictionaries.Data
|
||
c.validLearningTypes = getQuerySet(dicts.LearningType.Fields)
|
||
c.validCourseThematics = getQuerySet(dicts.CourseThematics.Fields)
|
||
|
||
return c, nil
|
||
}
|
||
|
||
type client struct {
|
||
log *slog.Logger
|
||
http *resty.Client
|
||
|
||
cachedMainPageInfo *PageState
|
||
validLearningTypes querySet
|
||
validCourseThematics querySet
|
||
|
||
limiter *rate.Limiter
|
||
}
|
||
|
||
func (c *client) GetMainPageState() (*PageState, error) {
|
||
return c.cachedMainPageInfo.Clone(), nil
|
||
}
|
||
|
||
type ListEducationProductsParams struct {
|
||
LearningType string
|
||
CoursesThematics []string
|
||
CourseGraphics []string
|
||
CourseLevels []string
|
||
CourseFormats []string
|
||
CourseDurations []string
|
||
CourseTypes []string
|
||
|
||
SortBy string
|
||
Limit int
|
||
Offset int
|
||
}
|
||
|
||
type stringifiedBool string
|
||
|
||
func AsStringifiedBool(b bool) stringifiedBool {
|
||
return stringifiedBool(strconv.FormatBool(b))
|
||
}
|
||
|
||
// FilterLevel is a Уровень сложности
|
||
type FilterLevel string
|
||
|
||
const (
|
||
FilterLevelJunior FilterLevel = "levelJuniorNew"
|
||
FilterLevelMiddle FilterLevel = "levelMiddleNew"
|
||
FilterLevelChildren FilterLevel = "levelChildNew"
|
||
)
|
||
|
||
// FilterTime is a срок обучения
|
||
type FilterTime string
|
||
|
||
const (
|
||
FilterTimeLessMonth FilterTime = "1" // less than month
|
||
FilterTimeFrom1To3Month FilterTime = "2" // from month to three month
|
||
FilterTimeFrom3To6 FilterTime = "3" // from three to six months
|
||
FilterTimeFrom6To12 FilterTime = "4" // from six to twelve months
|
||
FilterTimeFrom12 FilterTime = "5" // from twelve months
|
||
)
|
||
|
||
// FilterFormat is a Форма обучение
|
||
type FilterFormat string
|
||
|
||
const (
|
||
FilterFormatRecord FilterFormat = "formatRecordNew"
|
||
FilterFormatOnline FilterFormat = "formatOnlineNew"
|
||
FilterFormatOffline FilterFormat = "formatOfflineNew"
|
||
)
|
||
|
||
// FilterGraphic is a График прохождения
|
||
type FilterGraphic string
|
||
|
||
const (
|
||
FilterGraphicTimeLength FilterGraphic = "courseTimeLengthNew"
|
||
FilterGraphicTerm FilterGraphic = "courseTimeTermNew"
|
||
)
|
||
|
||
type listEducationProductsRequest struct {
|
||
Fingerprint string `json:"fingerPrint,omitempty"`
|
||
ProductName string `json:"productName,omitempty"`
|
||
Location string `json:"location"`
|
||
OfferTypes []string `json:"offerTypes"`
|
||
IsMix bool `json:"isMix"`
|
||
MixRepeated bool `json:"mixRepeated"`
|
||
Fields []string `json:"fields"`
|
||
|
||
// Filters
|
||
LearningType []string `json:"learningtype"`
|
||
CoursesThematics []string `json:"coursesThematics"`
|
||
Organizations []string `json:"organizations,omitempty"` // list of ids
|
||
DictionatyFormatFilterNew []FilterFormat `json:"dictionaryFormatFilterNew,omitempty"`
|
||
DictionaryTimeFilter []FilterTime `json:"dictionaryTimeFilter,omitempty"`
|
||
DictionaryGraphicFilterNew []FilterGraphic `json:"dictionaryGraphicFilterNew,omitempty"`
|
||
DictionatyLevelFilterNew []FilterLevel `json:"dictionaryLevelFilterNew,omitempty"`
|
||
|
||
// Options
|
||
SubMentor []stringifiedBool `json:"sub-mentor,omitempty"` // option with mentor
|
||
SubTimeFree []stringifiedBool `json:"sub-timeFree,omitempty"` // option with trial
|
||
SubJobGarantSub []stringifiedBool `json:"sub-jobGarantsub,omitempty"` // option for job garantee
|
||
SubPriceFree []stringifiedBool `json:"sub-priceFree,omitempty"` // only free
|
||
SubInstallment []stringifiedBool `json:"sub-installment,omitempty"` // with credit
|
||
SubIsCourseProfession []stringifiedBool `json:"sub-isCourseProfession,omitempty"` // освоить профессию с нуля
|
||
DevelopSkills []stringifiedBool `json:"developSkills,omitempty"` // развить навыки
|
||
|
||
NotSubIsWebinar string `json:"not-sub-isWebinar,omitempty"`
|
||
NotB2B string `json:"not-b2b,omitempty"`
|
||
AdvertisingOnly bool `json:"advertisingOnly,omitempty"`
|
||
|
||
// Pagination and sorting
|
||
Limit int `json:"limit"`
|
||
Offset int `json:"offset"`
|
||
SortProperty string `json:"sortProperty"`
|
||
SortDirection string `json:"sortDirection"`
|
||
}
|
||
|
||
type listEducationProductsResponse struct {
|
||
Items []Course `json:"items"`
|
||
Organizations map[string]Organization `json:"organizations"`
|
||
|
||
TotalCount int `json:"totalCount"`
|
||
TotalCountAdv int `json:"totalCountAdv"`
|
||
}
|
||
|
||
func (c *client) ListEducationalProducts(
|
||
ctx context.Context,
|
||
params ListEducationProductsParams,
|
||
) (result listEducationProductsResponse, err error) {
|
||
const urlPath = "/v1/education/products"
|
||
const defaultLimit = 1
|
||
const defaultSortProp = "advertising.position"
|
||
const defaultSortDirection = "asc"
|
||
// TODO: find out should it be settable
|
||
const productName = "learning-courses"
|
||
if err = c.checkClientInited(); err != nil {
|
||
return result, err
|
||
}
|
||
|
||
if !c.validLearningTypes.hasValue(params.LearningType) {
|
||
return result, errors.NewValidationError("learning_type", "unknown value")
|
||
}
|
||
for _, ct := range params.CoursesThematics {
|
||
if !c.validCourseThematics.hasValue(ct) {
|
||
return result, errors.NewValidationError("courses_thematics", "unknown value "+ct)
|
||
}
|
||
}
|
||
|
||
reqParams := listEducationProductsRequest{
|
||
LearningType: valueAsArray(params.LearningType),
|
||
CoursesThematics: params.CoursesThematics,
|
||
ProductName: productName,
|
||
Fields: defaultProductFields,
|
||
SortProperty: defaultSortProp, // mayber sort by price?
|
||
SortDirection: defaultSortDirection,
|
||
NotSubIsWebinar: strconv.FormatBool(true),
|
||
NotB2B: strconv.FormatBool(true),
|
||
IsMix: false, // not sure why, but for better parsing
|
||
MixRepeated: true, // looks like this option should force to exclude duplicates
|
||
AdvertisingOnly: false, // If true, it will show only paid items.
|
||
Location: "", // TODO: get and fill location?
|
||
Fingerprint: "", // not sure it should be set.
|
||
OfferTypes: []string{}, // for more precise filter but not needed.
|
||
|
||
Limit: xdefault.WithFallback(params.Limit, defaultLimit),
|
||
Offset: params.Offset,
|
||
}
|
||
|
||
if err = c.limiter.Wait(ctx); err != nil {
|
||
return result, fmt.Errorf("waiting for limit: %w", err)
|
||
}
|
||
|
||
resp, err := c.http.R().
|
||
SetBody(reqParams).
|
||
SetResult(&result).
|
||
EnableTrace().
|
||
Post(c.makeEducationURL(urlPath))
|
||
if err != nil {
|
||
return result, fmt.Errorf("making request: %w", err)
|
||
}
|
||
|
||
if resp.IsError() {
|
||
return result, fmt.Errorf("bad status code %d: %w", resp.StatusCode(), errors.ErrUnexpectedStatus)
|
||
}
|
||
|
||
return result, nil
|
||
}
|
||
|
||
type educationProductFilterCountRequest struct {
|
||
Filters educationProductFilter `json:"filters"`
|
||
}
|
||
|
||
type educationProductFilter struct {
|
||
AdvertisingOnly bool `json:"advertisingOnly"`
|
||
Location string `json:"location"`
|
||
LearningTypes []string `json:"learningTypes"`
|
||
CoursesThematics []string `json:"coursesThematics"`
|
||
CourseGraphics []string `json:"courseGraphics"`
|
||
CourseLevels []string `json:"courseLevels"`
|
||
CourseFormats []string `json:"courseFormats"`
|
||
CourseDurations []string `json:"courseDurations"`
|
||
CourseTypes []string `json:"courseTypes"`
|
||
}
|
||
|
||
type boolableDict map[int]int
|
||
type nameableDict map[string]int
|
||
|
||
type ProductsFilterCount struct {
|
||
IsCourseProfession boolableDict `json:"isCourseProfession"` // 0: count, 1: count eq to false + true
|
||
CourseLevels nameableDict `json:"courseLevels"`
|
||
CourseGraphics nameableDict `json:"courseGraphics"`
|
||
OrganizationIDs nameableDict `json:"organizationIds"`
|
||
HasTrialPeriod boolableDict `json:"hasTrialPeriod"`
|
||
HasMentor boolableDict `json:"hasMentor"`
|
||
HasJobGuarantee boolableDict `json:"hasJobGuarantee"`
|
||
CourseFormats nameableDict `json:"courseFormats"`
|
||
CourseDurations nameableDict `json:"courseDurations"`
|
||
CoursesThematics nameableDict `json:"coursesThematics"`
|
||
LearningTypes nameableDict `json:"learningTypes"`
|
||
}
|
||
|
||
func (c *client) ListEducationalProductsFilterCount(
|
||
ctx context.Context,
|
||
params ListEducationProductsParams,
|
||
) (result ProductsFilterCount, err error) {
|
||
const urlPath = "/v2/education/products/filter/count"
|
||
if err = c.checkClientInited(); err != nil {
|
||
return result, err
|
||
}
|
||
|
||
if !c.validLearningTypes.hasValue(params.LearningType) {
|
||
return result, errors.NewValidationError("learning_type", "unknown value")
|
||
}
|
||
for _, ct := range params.CoursesThematics {
|
||
if !c.validCourseThematics.hasValue(ct) {
|
||
return result, errors.NewValidationError("courses_thematics", "unknown value "+ct)
|
||
}
|
||
}
|
||
|
||
reqParams := educationProductFilterCountRequest{
|
||
Filters: educationProductFilter{
|
||
AdvertisingOnly: false,
|
||
Location: "",
|
||
LearningTypes: valueAsArray(params.LearningType),
|
||
CoursesThematics: params.CoursesThematics,
|
||
CourseGraphics: params.CourseGraphics,
|
||
CourseLevels: params.CourseLevels,
|
||
CourseFormats: params.CourseFormats,
|
||
CourseDurations: params.CourseDurations,
|
||
CourseTypes: params.CourseTypes,
|
||
},
|
||
}
|
||
|
||
if err = c.limiter.Wait(ctx); err != nil {
|
||
return result, fmt.Errorf("waiting for limit: %w", err)
|
||
}
|
||
|
||
var respData DataContainer[ProductsFilterCount]
|
||
resp, err := c.http.R().
|
||
SetBody(reqParams).
|
||
SetResult(&respData).
|
||
EnableTrace().
|
||
Post(c.makeEducationURL(urlPath))
|
||
if err != nil {
|
||
return result, fmt.Errorf("making request: %w", err)
|
||
}
|
||
|
||
if resp.IsError() {
|
||
return result, fmt.Errorf("bad status code %d: %w", resp.StatusCode(), errors.ErrUnexpectedStatus)
|
||
}
|
||
|
||
return respData.Data, nil
|
||
}
|
||
|
||
func (c *client) makeEducationURL(path string) string {
|
||
if c.cachedMainPageInfo == nil {
|
||
return ""
|
||
}
|
||
|
||
return c.cachedMainPageInfo.RuntimeConfig.EducationURL + path
|
||
}
|
||
|
||
func (c *client) checkClientInited() error {
|
||
if c.cachedMainPageInfo == nil {
|
||
return ErrClientNotInited
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func (c *client) getMainPageState(ctx context.Context) (*PageState, error) {
|
||
ctxLogger := restyCtxLogger{
|
||
ctx: ctx,
|
||
log: c.log,
|
||
}
|
||
|
||
req := c.http.R().
|
||
SetContext(ctx).
|
||
SetLogger(ctxLogger).
|
||
SetDoNotParseResponse(true).
|
||
EnableTrace()
|
||
|
||
resp, err := req.Get("/")
|
||
if err != nil {
|
||
return nil, fmt.Errorf("getting request: %w", err)
|
||
}
|
||
|
||
if resp.IsError() {
|
||
c.log.ErrorContext(ctx, "unable to proceed request", slog.String("body", string(resp.Body())))
|
||
return nil, fmt.Errorf("got %d, but expected success: %w", resp.StatusCode(), errors.ErrUnexpectedStatus)
|
||
}
|
||
|
||
traceInfo := resp.Request.TraceInfo()
|
||
c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo))
|
||
|
||
return c.parsePageState(ctx, resp.RawBody())
|
||
}
|
||
|
||
func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) {
|
||
page, err := html.Parse(body)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("parsing body: %w", err)
|
||
}
|
||
|
||
c.log.DebugContext(ctx, "finding page state")
|
||
|
||
var (
|
||
foundHtml bool
|
||
foundBody bool
|
||
)
|
||
findFunc := func(node *html.Node) (found, deeper bool) {
|
||
if node == nil {
|
||
c.log.DebugContext(ctx, "node is null, skipping")
|
||
return false, false
|
||
}
|
||
|
||
if !foundHtml && node.Type == html.ElementNode {
|
||
c.log.DebugContext(ctx, "found html node")
|
||
foundHtml = true
|
||
return false, true
|
||
}
|
||
|
||
if foundHtml && !foundBody && node.DataAtom == atom.Body {
|
||
c.log.DebugContext(ctx, "found body node")
|
||
foundBody = true
|
||
return false, true
|
||
}
|
||
|
||
if foundHtml && foundBody && node.DataAtom == atom.Script {
|
||
for _, attr := range node.Attr {
|
||
if attr.Key == "id" && attr.Val == "__NEXT_DATA__" {
|
||
c.log.DebugContext(ctx, "found script node with next_data")
|
||
return true, false
|
||
}
|
||
}
|
||
}
|
||
|
||
return false, false
|
||
}
|
||
|
||
nextData := findNode(page, findFunc)
|
||
if nextData == nil {
|
||
return nil, nil
|
||
}
|
||
|
||
var out PageState
|
||
dataReader := strings.NewReader(nextData.FirstChild.Data)
|
||
err = json.NewDecoder(dataReader).Decode(&out)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("decoding html data: %w", err)
|
||
}
|
||
return &out, nil
|
||
}
|
||
|
||
var educationProductFields = newQuerySet(
|
||
"id",
|
||
"name",
|
||
"organization",
|
||
"advertising",
|
||
"discount",
|
||
"link",
|
||
"learningtype",
|
||
"dateStart",
|
||
"timeStart",
|
||
"timeAllHour",
|
||
"timeAllDay",
|
||
"timeAllMonth",
|
||
"isTermApproximately",
|
||
"dictionaryFormatFilterNew",
|
||
"dictionaryLevelFilterNew",
|
||
"price",
|
||
"priceAll",
|
||
"priceInstallment",
|
||
"courseImage",
|
||
"price",
|
||
"withoutDiscountPrice",
|
||
)
|
||
|
||
var defaultProductFields = must(educationProductFields.exactSubset(
|
||
"id",
|
||
"name",
|
||
"organization",
|
||
"advertising",
|
||
"discount",
|
||
"link",
|
||
"learningtype",
|
||
"dateStart",
|
||
"timeStart",
|
||
"timeAllHour",
|
||
"timeAllDay",
|
||
"timeAllMonth",
|
||
"price",
|
||
"priceAll",
|
||
"priceInstallment",
|
||
"courseImage",
|
||
"price",
|
||
"withoutDiscountPrice",
|
||
))
|
||
|
||
func must[T any](t T, err error) T {
|
||
if err != nil {
|
||
panic(err.Error())
|
||
}
|
||
|
||
return t
|
||
}
|
||
|
||
type querySet struct {
|
||
values []string
|
||
mappedValues map[string]struct{}
|
||
}
|
||
|
||
func (qs querySet) Values() []string {
|
||
out := make([]string, len(qs.values))
|
||
copy(out, qs.values)
|
||
|
||
return out
|
||
}
|
||
|
||
func (qs querySet) hasValue(value string) bool {
|
||
_, ok := qs.mappedValues[value]
|
||
return ok
|
||
}
|
||
|
||
func (qs querySet) exactSubset(values ...string) ([]string, error) {
|
||
out := make([]string, 0, len(values))
|
||
for _, value := range values {
|
||
if !qs.hasValue(value) {
|
||
return nil, fmt.Errorf("value %s was not found in set", value)
|
||
}
|
||
|
||
out = append(out, value)
|
||
}
|
||
|
||
return out, nil
|
||
|
||
}
|
||
|
||
func newQuerySet(values ...string) querySet {
|
||
qs := querySet{
|
||
values: make([]string, len(values)),
|
||
mappedValues: make(map[string]struct{}, len(values)),
|
||
}
|
||
|
||
for i, v := range values {
|
||
qs.values[i] = v
|
||
qs.mappedValues[v] = struct{}{}
|
||
}
|
||
|
||
return qs
|
||
}
|
||
|
||
func valueAsArray(value string) []string {
|
||
if value == "" {
|
||
return nil
|
||
}
|
||
|
||
return []string{value}
|
||
}
|