From f382d9e73bc4a1420ef20df2ba0ec5aabe0bca11 Mon Sep 17 00:00:00 2001 From: Gitea Date: Wed, 22 Nov 2023 01:12:04 +0300 Subject: [PATCH] add metadata prefooter --- Taskfile.yml | 16 +- cmd/dev/sravnicli/main.go | 56 ++++-- .../courses/sravni/client.go | 181 +++++++++++------- .../courses/sravni/logger.go | 27 +++ kurious.go | 35 ++-- 5 files changed, 209 insertions(+), 106 deletions(-) create mode 100644 internal/infrastructure/interfaceadapters/courses/sravni/logger.go diff --git a/Taskfile.yml b/Taskfile.yml index 952171e..1e0f8aa 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -4,6 +4,15 @@ env: CGO_ENABLED: 0 GOBIN: "{{.USER_WORKING_DIR}}/bin" PROJECT: "git.loyso.art/frx/kurious" +vars: + GIT_COMMIT: + sh: git log -n 1 --format=%h + GIT_VERSION: + sh: git tag | sort -r --version-sort | head -n1 + BUILD_TIME: + sh: TZ=UTC date --iso-8601=seconds + LDFLAGS: + sh: echo '-X "{{.PROJECT}}.buildTime={{.BUILD_TIME}}" -X "{{.PROJECT}}.commit={{.GIT_COMMIT}}" -X "{{.PROJECT}}.version={{.GIT_VERSION}}"' tasks: install_tools: @@ -17,4 +26,9 @@ tasks: - go test --count=1 ./internal/... build: cmds: - - go build -o $GOBIN/sravnicli -v -ldflags "-X '$PROJECT.version=ohwell'" cmd/dev/sravnicli/main.go + - go build -o $GOBIN/sravnicli -v -ldflags '{{.LDFLAGS}}' cmd/dev/sravnicli/main.go + deps: [check, test] + run: + deps: [build] + cmds: + - $GOBIN/sravnicli diff --git a/cmd/dev/sravnicli/main.go b/cmd/dev/sravnicli/main.go index 268dcf1..949a6c7 100644 --- a/cmd/dev/sravnicli/main.go +++ b/cmd/dev/sravnicli/main.go @@ -3,6 +3,7 @@ package main import ( "context" "encoding/json" + "fmt" "log/slog" "os" "os/signal" @@ -15,26 +16,45 @@ func main() { ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) defer cancel() - log := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{})) + log := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + ReplaceAttr: func(_ []string, a slog.Attr) slog.Attr { + if a.Key == slog.TimeKey { + a.Value = slog.Int64Value(a.Value.Time().Unix()) + } - version, commit, bt := kurious.Version(), kurious.Commit(), kurious.BuildTime() - pid := os.Getpid() + return a + }, + })) - log.InfoContext( - ctx, "running app", - slog.Int("pid", pid), - slog.String("version", version), - slog.String("commit", commit), - slog.Time("build_time", bt), - ) + version, commit, bt := kurious.Version(), kurious.Commit(), kurious.BuildTime() + pid := os.Getpid() - client := sravni.NewClient(log, true) - meta, err := client.GetMetaInfo(ctx) - if err != nil { - log.ErrorContext(ctx, "unable to get meta info", slog.Any("error", err)) - } + log.InfoContext( + ctx, "running app", + slog.Int("pid", pid), + slog.String("version", version), + slog.String("commit", commit), + slog.Time("build_time", bt), + ) - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - _ = enc.Encode(meta) + err := app(ctx, log) + if err != nil { + slog.ErrorContext(ctx, "unable to run app", slog.Any("error", err)) + os.Exit(1) + } +} + +func app(ctx context.Context, log *slog.Logger) error { + client, err := sravni.NewClient(ctx, log, true) + if err != nil { + return fmt.Errorf("making new client: %w", err) + } + + meta := client.GetMainPageState() + + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + + return enc.Encode(meta) } diff --git a/internal/infrastructure/interfaceadapters/courses/sravni/client.go b/internal/infrastructure/interfaceadapters/courses/sravni/client.go index 0039678..f9b7cb5 100644 --- a/internal/infrastructure/interfaceadapters/courses/sravni/client.go +++ b/internal/infrastructure/interfaceadapters/courses/sravni/client.go @@ -1,10 +1,10 @@ package sravni import ( - "bytes" "context" "encoding/json" "fmt" + "io" "log/slog" "strings" @@ -19,21 +19,30 @@ const ( baseURL = "https://www.sravni.ru/kursy" ) -func NewClient(log *slog.Logger, debug bool) *client { - return &client{ +func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) { + c = &client{ log: log.With(slog.String("client", "sravni")), http: resty.New(). SetBaseURL(baseURL). SetDebug(debug), } + + c.cachedMainPageInfo, err = c.getMainPageState(ctx) + if err != nil { + return nil, err + } + + return c, nil } type client struct { log *slog.Logger http *resty.Client + + cachedMainPageInfo *PageState } -type MetaInfoRuntimeConfig struct { +type PageStateRuntimeConfig struct { BrandingURL string `json:"brandingUrl"` Release string `json:"release"` Environment string `json:"environment"` @@ -46,25 +55,63 @@ type MetaInfoRuntimeConfig struct { OrgnazationURL string `json:"organizationsUrl"` } -type MetaInfoReduxState struct { +type Link struct { + URL string `json:"url"` + Title string `json:"title"` +} + +type ReduxStatePrefooterItem struct { + Title string `json:"title"` + Links []Link `json:"links"` +} + +type ReduxMetadata struct { + Data struct { + Prefooter []ReduxStatePrefooterItem `json:"prefooter"` + } `json:"data"` +} + +type InitialReduxState struct { + Metadata ReduxMetadata `json:"metadata"` Categories struct { Data map[string]int `json:"data"` } `json:"categories"` } -type MetaInfoProps struct { - InitialReduxState MetaInfoReduxState `json:"initialReduxState"` +type PageStateProperties struct { + InitialReduxState InitialReduxState `json:"initialReduxState"` } -type MetaInfo struct { - Page string `json:"page"` - Query map[string]string `json:"query"` - BuildID string `json:"buildId"` - RuntimeConfig MetaInfoRuntimeConfig `json:"runtimeConfig"` - Props MetaInfoProps `json:"props"` +type PageState struct { + Page string `json:"page"` + Query map[string]string `json:"query"` + BuildID string `json:"buildId"` + RuntimeConfig PageStateRuntimeConfig `json:"runtimeConfig"` + Props PageStateProperties `json:"props"` } -func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) { +func (p *PageState) Clone() *PageState { + copiedState := *p + copiedState.Query = make(map[string]string, len(p.Query)) + for k, v := range p.Query { + copiedState.Query[k] = v + } + + data := p.Props.InitialReduxState.Categories.Data + copiedData := make(map[string]int, len(data)) + for k, v := range data { + copiedData[k] = v + } + copiedState.Props.InitialReduxState.Categories.Data = copiedData + + return &copiedState +} + +func (c *client) GetMainPageState() *PageState { + return c.cachedMainPageInfo.Clone() +} + +func (c *client) getMainPageState(ctx context.Context) (*PageState, error) { ctxLogger := restyCtxLogger{ ctx: ctx, log: c.log, @@ -73,6 +120,7 @@ func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) { req := c.http.R(). SetContext(ctx). SetLogger(ctxLogger). + SetDoNotParseResponse(true). EnableTrace() resp, err := req.Get("/") @@ -88,85 +136,78 @@ func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) { traceInfo := resp.Request.TraceInfo() c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo)) - r := bytes.NewReader(resp.Body()) - nodes, err := html.Parse(r) + return c.parsePageState(ctx, resp.RawBody()) +} + +func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) { + page, err := html.Parse(body) if err != nil { - return nil, fmt.Errorf("parsing html body: %w", err) + return nil, fmt.Errorf("parsing body: %w", err) } - c.log.InfoContext(ctx, "inspecting node", slog.Any("node", nodes)) + c.log.DebugContext(ctx, "finding page state") - htmlNode := func() *html.Node { - for child := nodes.FirstChild; child != nil; child = child.NextSibling { - c.log.InfoContext(ctx, "inspecting node", slog.Any("node", child)) - if child.Type == html.ElementNode { - return child - } + var ( + foundHtml bool + foundBody bool + ) + findFunc := func(node *html.Node) (found, deeper bool) { + if node == nil { + c.log.DebugContext(ctx, "node is null, skipping") + return false, false } - return nil - }() - if htmlNode == nil { - c.log.WarnContext(ctx, "no html node found") - return nil, nil - } - - var bodyNode *html.Node - for child := htmlNode.FirstChild; child != nil; child = child.NextSibling { - c.log.InfoContext(ctx, "inspecting html node", slog.Any("node", child)) - if child.DataAtom == atom.Body { - c.log.InfoContext(ctx, "found body node") - bodyNode = child - break + if !foundHtml && node.Type == html.ElementNode { + c.log.DebugContext(ctx, "found html node") + foundHtml = true + return false, true } - } - var nextData *html.Node - for child := bodyNode.FirstChild; child != nil; child = child.NextSibling { - c.log.InfoContext(ctx, "inspecting body node", slog.Any("node", child)) - if child.DataAtom == atom.Script { - c.log.InfoContext(ctx, "found script node") - for _, attr := range child.Attr { + if foundHtml && !foundBody && node.DataAtom == atom.Body { + c.log.DebugContext(ctx, "found body node") + foundBody = true + return false, true + } + + if foundHtml && foundBody && node.DataAtom == atom.Script { + for _, attr := range node.Attr { if attr.Key == "id" && attr.Val == "__NEXT_DATA__" { - c.log.InfoContext(ctx, "found metadata container") - nextData = child.FirstChild - break + c.log.DebugContext(ctx, "found script node with next_data") + return true, false } } } + + return false, false } + nextData := findNode(page, findFunc) if nextData == nil { - c.log.WarnContext(ctx, "no metadata container found") return nil, nil } - var out MetaInfo - dataReader := strings.NewReader(nextData.Data) + var out PageState + dataReader := strings.NewReader(nextData.FirstChild.Data) err = json.NewDecoder(dataReader).Decode(&out) if err != nil { - return nil, fmt.Errorf("unmarshalling data: %w", err) + return nil, fmt.Errorf("decoding html data: %w", err) } - return &out, nil } -type restyCtxLogger struct { - ctx context.Context - log *slog.Logger -} +func findNode(parent *html.Node, eq func(*html.Node) (found, deeper bool)) *html.Node { + for child := parent.FirstChild; child != nil; child = child.NextSibling { + found, deeper := eq(child) + if found { + return child + } + if deeper { + deeperChild := findNode(child, eq) + if deeperChild != nil { + return deeperChild + } + } + } -func (l restyCtxLogger) Debugf(format string, v ...any) { - msg := fmt.Sprintf(format, v...) - l.log.DebugContext(l.ctx, msg) -} - -func (l restyCtxLogger) Warnf(format string, v ...any) { - msg := fmt.Sprintf(format, v...) - l.log.WarnContext(l.ctx, msg) -} - -func (l restyCtxLogger) Errorf(format string, v ...any) { - msg := fmt.Sprintf(format, v...) - l.log.ErrorContext(l.ctx, msg) + return nil } diff --git a/internal/infrastructure/interfaceadapters/courses/sravni/logger.go b/internal/infrastructure/interfaceadapters/courses/sravni/logger.go new file mode 100644 index 0000000..f2b7061 --- /dev/null +++ b/internal/infrastructure/interfaceadapters/courses/sravni/logger.go @@ -0,0 +1,27 @@ +package sravni + +import ( + "context" + "fmt" + "log/slog" +) + +type restyCtxLogger struct { + ctx context.Context + log *slog.Logger +} + +func (l restyCtxLogger) Debugf(format string, v ...any) { + msg := fmt.Sprintf(format, v...) + l.log.DebugContext(l.ctx, msg) +} + +func (l restyCtxLogger) Warnf(format string, v ...any) { + msg := fmt.Sprintf(format, v...) + l.log.WarnContext(l.ctx, msg) +} + +func (l restyCtxLogger) Errorf(format string, v ...any) { + msg := fmt.Sprintf(format, v...) + l.log.ErrorContext(l.ctx, msg) +} diff --git a/kurious.go b/kurious.go index ddd416f..3cf97a0 100644 --- a/kurious.go +++ b/kurious.go @@ -6,33 +6,34 @@ import ( ) var ( - version = "unknown" - commit = "unknown" - buildTime = "" - buildTimeParsed = time.Time{} + version = "unknown" + commit = "unknown" + buildTime = "" + buildTimeParsed = time.Time{} ) func Version() string { - return version + return version } func Commit() string { - return commit + return commit } var buildTimeParseOnce sync.Once + func BuildTime() time.Time { - if buildTime == "" { - return time.Time{} - } + if buildTime == "" { + return time.Time{} + } - buildTimeParseOnce.Do(func() { - var err error - buildTimeParsed, err = time.Parse(buildTime, time.RFC3339) - if err != nil { - panic(err.Error()) - } - }) + buildTimeParseOnce.Do(func() { + var err error + buildTimeParsed, err = time.Parse(time.RFC3339, buildTime) + if err != nil { + panic(err.Error()) + } + }) - return buildTimeParsed + return buildTimeParsed }