add metadata prefooter
This commit is contained in:
@ -1,10 +1,10 @@
|
||||
package sravni
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"strings"
|
||||
|
||||
@ -19,21 +19,30 @@ const (
|
||||
baseURL = "https://www.sravni.ru/kursy"
|
||||
)
|
||||
|
||||
func NewClient(log *slog.Logger, debug bool) *client {
|
||||
return &client{
|
||||
func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) {
|
||||
c = &client{
|
||||
log: log.With(slog.String("client", "sravni")),
|
||||
http: resty.New().
|
||||
SetBaseURL(baseURL).
|
||||
SetDebug(debug),
|
||||
}
|
||||
|
||||
c.cachedMainPageInfo, err = c.getMainPageState(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
type client struct {
|
||||
log *slog.Logger
|
||||
http *resty.Client
|
||||
|
||||
cachedMainPageInfo *PageState
|
||||
}
|
||||
|
||||
type MetaInfoRuntimeConfig struct {
|
||||
type PageStateRuntimeConfig struct {
|
||||
BrandingURL string `json:"brandingUrl"`
|
||||
Release string `json:"release"`
|
||||
Environment string `json:"environment"`
|
||||
@ -46,25 +55,63 @@ type MetaInfoRuntimeConfig struct {
|
||||
OrgnazationURL string `json:"organizationsUrl"`
|
||||
}
|
||||
|
||||
type MetaInfoReduxState struct {
|
||||
type Link struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
}
|
||||
|
||||
type ReduxStatePrefooterItem struct {
|
||||
Title string `json:"title"`
|
||||
Links []Link `json:"links"`
|
||||
}
|
||||
|
||||
type ReduxMetadata struct {
|
||||
Data struct {
|
||||
Prefooter []ReduxStatePrefooterItem `json:"prefooter"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type InitialReduxState struct {
|
||||
Metadata ReduxMetadata `json:"metadata"`
|
||||
Categories struct {
|
||||
Data map[string]int `json:"data"`
|
||||
} `json:"categories"`
|
||||
}
|
||||
|
||||
type MetaInfoProps struct {
|
||||
InitialReduxState MetaInfoReduxState `json:"initialReduxState"`
|
||||
type PageStateProperties struct {
|
||||
InitialReduxState InitialReduxState `json:"initialReduxState"`
|
||||
}
|
||||
|
||||
type MetaInfo struct {
|
||||
Page string `json:"page"`
|
||||
Query map[string]string `json:"query"`
|
||||
BuildID string `json:"buildId"`
|
||||
RuntimeConfig MetaInfoRuntimeConfig `json:"runtimeConfig"`
|
||||
Props MetaInfoProps `json:"props"`
|
||||
type PageState struct {
|
||||
Page string `json:"page"`
|
||||
Query map[string]string `json:"query"`
|
||||
BuildID string `json:"buildId"`
|
||||
RuntimeConfig PageStateRuntimeConfig `json:"runtimeConfig"`
|
||||
Props PageStateProperties `json:"props"`
|
||||
}
|
||||
|
||||
func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) {
|
||||
func (p *PageState) Clone() *PageState {
|
||||
copiedState := *p
|
||||
copiedState.Query = make(map[string]string, len(p.Query))
|
||||
for k, v := range p.Query {
|
||||
copiedState.Query[k] = v
|
||||
}
|
||||
|
||||
data := p.Props.InitialReduxState.Categories.Data
|
||||
copiedData := make(map[string]int, len(data))
|
||||
for k, v := range data {
|
||||
copiedData[k] = v
|
||||
}
|
||||
copiedState.Props.InitialReduxState.Categories.Data = copiedData
|
||||
|
||||
return &copiedState
|
||||
}
|
||||
|
||||
func (c *client) GetMainPageState() *PageState {
|
||||
return c.cachedMainPageInfo.Clone()
|
||||
}
|
||||
|
||||
func (c *client) getMainPageState(ctx context.Context) (*PageState, error) {
|
||||
ctxLogger := restyCtxLogger{
|
||||
ctx: ctx,
|
||||
log: c.log,
|
||||
@ -73,6 +120,7 @@ func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) {
|
||||
req := c.http.R().
|
||||
SetContext(ctx).
|
||||
SetLogger(ctxLogger).
|
||||
SetDoNotParseResponse(true).
|
||||
EnableTrace()
|
||||
|
||||
resp, err := req.Get("/")
|
||||
@ -88,85 +136,78 @@ func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) {
|
||||
traceInfo := resp.Request.TraceInfo()
|
||||
c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo))
|
||||
|
||||
r := bytes.NewReader(resp.Body())
|
||||
nodes, err := html.Parse(r)
|
||||
return c.parsePageState(ctx, resp.RawBody())
|
||||
}
|
||||
|
||||
func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) {
|
||||
page, err := html.Parse(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing html body: %w", err)
|
||||
return nil, fmt.Errorf("parsing body: %w", err)
|
||||
}
|
||||
|
||||
c.log.InfoContext(ctx, "inspecting node", slog.Any("node", nodes))
|
||||
c.log.DebugContext(ctx, "finding page state")
|
||||
|
||||
htmlNode := func() *html.Node {
|
||||
for child := nodes.FirstChild; child != nil; child = child.NextSibling {
|
||||
c.log.InfoContext(ctx, "inspecting node", slog.Any("node", child))
|
||||
if child.Type == html.ElementNode {
|
||||
return child
|
||||
}
|
||||
var (
|
||||
foundHtml bool
|
||||
foundBody bool
|
||||
)
|
||||
findFunc := func(node *html.Node) (found, deeper bool) {
|
||||
if node == nil {
|
||||
c.log.DebugContext(ctx, "node is null, skipping")
|
||||
return false, false
|
||||
}
|
||||
|
||||
return nil
|
||||
}()
|
||||
if htmlNode == nil {
|
||||
c.log.WarnContext(ctx, "no html node found")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var bodyNode *html.Node
|
||||
for child := htmlNode.FirstChild; child != nil; child = child.NextSibling {
|
||||
c.log.InfoContext(ctx, "inspecting html node", slog.Any("node", child))
|
||||
if child.DataAtom == atom.Body {
|
||||
c.log.InfoContext(ctx, "found body node")
|
||||
bodyNode = child
|
||||
break
|
||||
if !foundHtml && node.Type == html.ElementNode {
|
||||
c.log.DebugContext(ctx, "found html node")
|
||||
foundHtml = true
|
||||
return false, true
|
||||
}
|
||||
}
|
||||
|
||||
var nextData *html.Node
|
||||
for child := bodyNode.FirstChild; child != nil; child = child.NextSibling {
|
||||
c.log.InfoContext(ctx, "inspecting body node", slog.Any("node", child))
|
||||
if child.DataAtom == atom.Script {
|
||||
c.log.InfoContext(ctx, "found script node")
|
||||
for _, attr := range child.Attr {
|
||||
if foundHtml && !foundBody && node.DataAtom == atom.Body {
|
||||
c.log.DebugContext(ctx, "found body node")
|
||||
foundBody = true
|
||||
return false, true
|
||||
}
|
||||
|
||||
if foundHtml && foundBody && node.DataAtom == atom.Script {
|
||||
for _, attr := range node.Attr {
|
||||
if attr.Key == "id" && attr.Val == "__NEXT_DATA__" {
|
||||
c.log.InfoContext(ctx, "found metadata container")
|
||||
nextData = child.FirstChild
|
||||
break
|
||||
c.log.DebugContext(ctx, "found script node with next_data")
|
||||
return true, false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, false
|
||||
}
|
||||
|
||||
nextData := findNode(page, findFunc)
|
||||
if nextData == nil {
|
||||
c.log.WarnContext(ctx, "no metadata container found")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var out MetaInfo
|
||||
dataReader := strings.NewReader(nextData.Data)
|
||||
var out PageState
|
||||
dataReader := strings.NewReader(nextData.FirstChild.Data)
|
||||
err = json.NewDecoder(dataReader).Decode(&out)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unmarshalling data: %w", err)
|
||||
return nil, fmt.Errorf("decoding html data: %w", err)
|
||||
}
|
||||
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
type restyCtxLogger struct {
|
||||
ctx context.Context
|
||||
log *slog.Logger
|
||||
}
|
||||
func findNode(parent *html.Node, eq func(*html.Node) (found, deeper bool)) *html.Node {
|
||||
for child := parent.FirstChild; child != nil; child = child.NextSibling {
|
||||
found, deeper := eq(child)
|
||||
if found {
|
||||
return child
|
||||
}
|
||||
if deeper {
|
||||
deeperChild := findNode(child, eq)
|
||||
if deeperChild != nil {
|
||||
return deeperChild
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Debugf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.DebugContext(l.ctx, msg)
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Warnf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.WarnContext(l.ctx, msg)
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Errorf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.ErrorContext(l.ctx, msg)
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user