package sravni import ( "context" "encoding/json" "fmt" "io" "log/slog" "strings" "git.loyso.art/frx/kurious/internal/domain" "github.com/go-resty/resty/v2" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) const ( baseURL = "https://www.sravni.ru/kursy" ) func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) { c = &client{ log: log.With(slog.String("client", "sravni")), http: resty.New(). SetBaseURL(baseURL). SetDebug(debug), } c.cachedMainPageInfo, err = c.getMainPageState(ctx) if err != nil { return nil, err } return c, nil } type client struct { log *slog.Logger http *resty.Client cachedMainPageInfo *PageState } type PageStateRuntimeConfig struct { BrandingURL string `json:"brandingUrl"` Release string `json:"release"` Environment string `json:"environment"` Gateway string `json:"gatewayUrl"` APIGatewayURL string `json:"apiGatewayUrl"` EducationURL string `json:"educationUrl"` PhoneVerifierURL string `json:"phoneVerifierUrl"` WebPath string `json:"webPath"` ServiceName string `json:"serviceName"` OrgnazationURL string `json:"organizationsUrl"` } type Link struct { URL string `json:"url"` Title string `json:"title"` } type ReduxStatePrefooterItem struct { Title string `json:"title"` Links []Link `json:"links"` } type ReduxMetadata struct { Data struct { Prefooter []ReduxStatePrefooterItem `json:"prefooter"` } `json:"data"` } type InitialReduxState struct { Metadata ReduxMetadata `json:"metadata"` Categories struct { Data map[string]int `json:"data"` } `json:"categories"` } type PageStateProperties struct { InitialReduxState InitialReduxState `json:"initialReduxState"` } type PageState struct { Page string `json:"page"` Query map[string]string `json:"query"` BuildID string `json:"buildId"` RuntimeConfig PageStateRuntimeConfig `json:"runtimeConfig"` Props PageStateProperties `json:"props"` } func (p *PageState) Clone() *PageState { copiedState := *p copiedState.Query = make(map[string]string, len(p.Query)) for k, v := range p.Query { copiedState.Query[k] = v } data := p.Props.InitialReduxState.Categories.Data copiedData := make(map[string]int, len(data)) for k, v := range data { copiedData[k] = v } copiedState.Props.InitialReduxState.Categories.Data = copiedData return &copiedState } func (c *client) GetMainPageState() *PageState { return c.cachedMainPageInfo.Clone() } func (c *client) getMainPageState(ctx context.Context) (*PageState, error) { ctxLogger := restyCtxLogger{ ctx: ctx, log: c.log, } req := c.http.R(). SetContext(ctx). SetLogger(ctxLogger). SetDoNotParseResponse(true). EnableTrace() resp, err := req.Get("/") if err != nil { return nil, fmt.Errorf("getting request: %w", err) } if resp.IsError() { c.log.ErrorContext(ctx, "unable to proceed request", slog.String("body", string(resp.Body()))) return nil, fmt.Errorf("got %d, but expected success: %w", resp.StatusCode(), domain.UnexpectedStatusError) } traceInfo := resp.Request.TraceInfo() c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo)) return c.parsePageState(ctx, resp.RawBody()) } func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) { page, err := html.Parse(body) if err != nil { return nil, fmt.Errorf("parsing body: %w", err) } c.log.DebugContext(ctx, "finding page state") var ( foundHtml bool foundBody bool ) findFunc := func(node *html.Node) (found, deeper bool) { if node == nil { c.log.DebugContext(ctx, "node is null, skipping") return false, false } if !foundHtml && node.Type == html.ElementNode { c.log.DebugContext(ctx, "found html node") foundHtml = true return false, true } if foundHtml && !foundBody && node.DataAtom == atom.Body { c.log.DebugContext(ctx, "found body node") foundBody = true return false, true } if foundHtml && foundBody && node.DataAtom == atom.Script { for _, attr := range node.Attr { if attr.Key == "id" && attr.Val == "__NEXT_DATA__" { c.log.DebugContext(ctx, "found script node with next_data") return true, false } } } return false, false } nextData := findNode(page, findFunc) if nextData == nil { return nil, nil } var out PageState dataReader := strings.NewReader(nextData.FirstChild.Data) err = json.NewDecoder(dataReader).Decode(&out) if err != nil { return nil, fmt.Errorf("decoding html data: %w", err) } return &out, nil } func findNode(parent *html.Node, eq func(*html.Node) (found, deeper bool)) *html.Node { for child := parent.FirstChild; child != nil; child = child.NextSibling { found, deeper := eq(child) if found { return child } if deeper { deeperChild := findNode(child, eq) if deeperChild != nil { return deeperChild } } } return nil }