Files
kurious/internal/infrastructure/interfaceadapters/courses/sravni/client.go
2023-11-23 19:13:54 +03:00

378 lines
8.6 KiB
Go

package sravni
import (
"context"
"encoding/json"
"fmt"
"io"
"log/slog"
"strconv"
"strings"
"git.loyso.art/frx/kurious/internal/domain"
"git.loyso.art/frx/kurious/pkg/utilities/slices"
"github.com/go-resty/resty/v2"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
const (
baseURL = "https://www.sravni.ru/kursy"
)
type Client interface {
GetMainPageState() *PageState
ListEducationalProducts(
ctx context.Context,
params ListEducationProductsParams,
) (result ListEducationProductsResponse, err error)
}
func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) {
c = &client{
log: log.With(slog.String("client", "sravni")),
http: resty.New().
SetBaseURL(baseURL).
SetDebug(debug),
}
c.cachedMainPageInfo, err = c.getMainPageState(ctx)
if err != nil {
return nil, err
}
getQuerySet := func(fields []field) querySet {
items := slices.Map(fields, func(f field) string {
return f.Value
})
return newQuerySet(items...)
}
dicts := c.cachedMainPageInfo.Props.InitialReduxState.Dictionaries.Data
c.validLearningTypes = getQuerySet(dicts.LearningType.Fields)
c.validCourseThematics = getQuerySet(dicts.CourseThematics.Fields)
return c, nil
}
type client struct {
log *slog.Logger
http *resty.Client
cachedMainPageInfo *PageState
validLearningTypes querySet
validCourseThematics querySet
}
func (c *client) GetMainPageState() *PageState {
return c.cachedMainPageInfo.Clone()
}
type ListEducationProductsParams struct {
LearningType string
CoursesThematics string
Limit int
Offset int
}
type ListEducationProductsRequest struct {
Fingerprint string `json:"fingerPrint,omitempty"`
ProductName string `json:"productName,omitempty"`
AdvertisingOnly bool `json:"advertisingOnly"`
Location string `json:"location"`
OfferTypes []string `json:"offerTypes"`
IsMix bool `json:"isMix"`
MixRepeated bool `json:"mixRepeated"`
Fields []string `json:"fields"`
SortProperty string `json:"sortProperty"`
SortDirection string `json:"sortDirection"`
LearningType []string `json:"learningtype"`
CoursesThematics []string `json:"coursesThematics"`
NotSubIsWebinar string `json:"not-sub-isWebinar"`
NotB2B string `json:"not-b2b"`
Limit int `json:"limit"`
Offset int `json:"offset"`
}
type ListEducationProductsResponse struct {
Items []Course `json:"items"`
Organizations map[string]Organization `json:"organizations"`
TotalCount int `json:"totalCount"`
TotalCountAdv int `json:"totalCountAdv"`
}
func (c *client) ListEducationalProducts(
ctx context.Context,
params ListEducationProductsParams,
) (result ListEducationProductsResponse, err error) {
const urlPath = "/v1/education/products"
const defaultLimit = 1
const defaultSortProp = "advertising.position"
const defaultSortDirection = "asc"
if err = c.checkClientInited(); err != nil {
return result, err
}
if !c.validLearningTypes.hasValue(params.LearningType) {
return result, domain.NewValidationError("learning_type", "bad value")
}
if !c.validCourseThematics.hasValue(params.CoursesThematics) {
return result, domain.NewValidationError("courses_thematics", "bad value")
}
reqParams := ListEducationProductsRequest{
LearningType: []string{
params.LearningType,
},
CoursesThematics: []string{
params.CoursesThematics,
},
Fields: defaultProductFields,
SortProperty: defaultSortProp, // mayber sort by price?
SortDirection: defaultSortDirection,
NotSubIsWebinar: strconv.FormatBool(true),
NotB2B: strconv.FormatBool(true),
IsMix: true, // not sure why, but for better parsing
MixRepeated: true, // looks like this option should force to exclude duplicates
AdvertisingOnly: false, // If true, it will show only paid items.
Location: "", // TODO: get and fill location?
Fingerprint: "", // not sure it should be set.
ProductName: "", // looks like it does not affects anything
OfferTypes: nil, // for more precise filter but not needed.
Limit: defaultLimit,
Offset: 0,
}
req := c.http.R().
SetBody(reqParams).
SetResult(&result).
EnableTrace()
resp, err := req.Post(c.makeEducationURL(urlPath))
if err != nil {
return result, fmt.Errorf("making request: %w", err)
}
if resp.IsError() {
return result, fmt.Errorf("bad status code %d: %w", resp.StatusCode(), domain.ErrUnexpectedStatus)
}
return result, nil
}
func (c *client) makeEducationURL(path string) string {
if c.cachedMainPageInfo == nil {
return ""
}
return c.cachedMainPageInfo.RuntimeConfig.EducationURL + path
}
func (c *client) checkClientInited() error {
if c.cachedMainPageInfo == nil {
return ErrClientNotInited
}
return nil
}
func (c *client) getMainPageState(ctx context.Context) (*PageState, error) {
ctxLogger := restyCtxLogger{
ctx: ctx,
log: c.log,
}
req := c.http.R().
SetContext(ctx).
SetLogger(ctxLogger).
SetDoNotParseResponse(true).
EnableTrace()
resp, err := req.Get("/")
if err != nil {
return nil, fmt.Errorf("getting request: %w", err)
}
if resp.IsError() {
c.log.ErrorContext(ctx, "unable to proceed request", slog.String("body", string(resp.Body())))
return nil, fmt.Errorf("got %d, but expected success: %w", resp.StatusCode(), domain.ErrUnexpectedStatus)
}
traceInfo := resp.Request.TraceInfo()
c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo))
return c.parsePageState(ctx, resp.RawBody())
}
func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) {
page, err := html.Parse(body)
if err != nil {
return nil, fmt.Errorf("parsing body: %w", err)
}
c.log.DebugContext(ctx, "finding page state")
var (
foundHtml bool
foundBody bool
)
findFunc := func(node *html.Node) (found, deeper bool) {
if node == nil {
c.log.DebugContext(ctx, "node is null, skipping")
return false, false
}
if !foundHtml && node.Type == html.ElementNode {
c.log.DebugContext(ctx, "found html node")
foundHtml = true
return false, true
}
if foundHtml && !foundBody && node.DataAtom == atom.Body {
c.log.DebugContext(ctx, "found body node")
foundBody = true
return false, true
}
if foundHtml && foundBody && node.DataAtom == atom.Script {
for _, attr := range node.Attr {
if attr.Key == "id" && attr.Val == "__NEXT_DATA__" {
c.log.DebugContext(ctx, "found script node with next_data")
return true, false
}
}
}
return false, false
}
nextData := findNode(page, findFunc)
if nextData == nil {
return nil, nil
}
var out PageState
dataReader := strings.NewReader(nextData.FirstChild.Data)
err = json.NewDecoder(dataReader).Decode(&out)
if err != nil {
return nil, fmt.Errorf("decoding html data: %w", err)
}
return &out, nil
}
var educationProductFields = newQuerySet(
"id",
"name",
"organization",
"advertising",
"discount",
"link",
"learningtype",
"dateStart",
"timeStart",
"timeAllHour",
"timeAllDay",
"timeAllMonth",
"isTermApproximately",
"dictionaryFormatFilterNew",
"dictionaryLevelFilterNew",
"price",
"priceAll",
"priceInstallment",
"courseImage",
"price",
"withoutDiscountPrice",
)
var defaultProductFields = must(educationProductFields.exactSubset(
"id",
"name",
"organization",
"advertising",
"discount",
"link",
"learningtype",
"dateStart",
"timeStart",
"timeAllHour",
"timeAllDay",
"timeAllMonth",
"price",
"priceAll",
"priceInstallment",
"courseImage",
"price",
"withoutDiscountPrice",
))
func must[T any](t T, err error) T {
if err != nil {
panic(err.Error())
}
return t
}
type querySet struct {
values []string
mappedValues map[string]struct{}
}
func (qs querySet) Values() []string {
out := make([]string, len(qs.values))
copy(out, qs.values)
return out
}
func (qs querySet) hasValue(value string) bool {
_, ok := qs.mappedValues[value]
return ok
}
func (qs querySet) exactSubset(values ...string) ([]string, error) {
out := make([]string, 0, len(values))
for _, value := range values {
if !qs.hasValue(value) {
return nil, fmt.Errorf("value %s was not found in set", value)
}
out = append(out, value)
}
return out, nil
}
// func (qs querySet) subset(values ...string) []string {
// out := make([]string, 0, len(values))
// for _, value := range values {
// if qs.hasValue(value) {
// out = append(out, value)
// }
// }
//
// return out
// }
func newQuerySet(values ...string) querySet {
qs := querySet{
values: make([]string, len(values)),
mappedValues: make(map[string]struct{}, len(values)),
}
for i, v := range values {
qs.values[i] = v
qs.mappedValues[v] = struct{}{}
}
return qs
}