add metadata prefooter

This commit is contained in:
Gitea
2023-11-22 01:12:04 +03:00
parent 9bc56666a0
commit f382d9e73b
5 changed files with 209 additions and 106 deletions

View File

@ -4,6 +4,15 @@ env:
CGO_ENABLED: 0 CGO_ENABLED: 0
GOBIN: "{{.USER_WORKING_DIR}}/bin" GOBIN: "{{.USER_WORKING_DIR}}/bin"
PROJECT: "git.loyso.art/frx/kurious" PROJECT: "git.loyso.art/frx/kurious"
vars:
GIT_COMMIT:
sh: git log -n 1 --format=%h
GIT_VERSION:
sh: git tag | sort -r --version-sort | head -n1
BUILD_TIME:
sh: TZ=UTC date --iso-8601=seconds
LDFLAGS:
sh: echo '-X "{{.PROJECT}}.buildTime={{.BUILD_TIME}}" -X "{{.PROJECT}}.commit={{.GIT_COMMIT}}" -X "{{.PROJECT}}.version={{.GIT_VERSION}}"'
tasks: tasks:
install_tools: install_tools:
@ -17,4 +26,9 @@ tasks:
- go test --count=1 ./internal/... - go test --count=1 ./internal/...
build: build:
cmds: cmds:
- go build -o $GOBIN/sravnicli -v -ldflags "-X '$PROJECT.version=ohwell'" cmd/dev/sravnicli/main.go - go build -o $GOBIN/sravnicli -v -ldflags '{{.LDFLAGS}}' cmd/dev/sravnicli/main.go
deps: [check, test]
run:
deps: [build]
cmds:
- $GOBIN/sravnicli

View File

@ -3,6 +3,7 @@ package main
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"fmt"
"log/slog" "log/slog"
"os" "os"
"os/signal" "os/signal"
@ -15,26 +16,45 @@ func main() {
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
defer cancel() defer cancel()
log := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{})) log := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelDebug,
ReplaceAttr: func(_ []string, a slog.Attr) slog.Attr {
if a.Key == slog.TimeKey {
a.Value = slog.Int64Value(a.Value.Time().Unix())
}
version, commit, bt := kurious.Version(), kurious.Commit(), kurious.BuildTime() return a
pid := os.Getpid() },
}))
log.InfoContext( version, commit, bt := kurious.Version(), kurious.Commit(), kurious.BuildTime()
ctx, "running app", pid := os.Getpid()
slog.Int("pid", pid),
slog.String("version", version),
slog.String("commit", commit),
slog.Time("build_time", bt),
)
client := sravni.NewClient(log, true) log.InfoContext(
meta, err := client.GetMetaInfo(ctx) ctx, "running app",
if err != nil { slog.Int("pid", pid),
log.ErrorContext(ctx, "unable to get meta info", slog.Any("error", err)) slog.String("version", version),
} slog.String("commit", commit),
slog.Time("build_time", bt),
)
enc := json.NewEncoder(os.Stdout) err := app(ctx, log)
enc.SetIndent("", " ") if err != nil {
_ = enc.Encode(meta) slog.ErrorContext(ctx, "unable to run app", slog.Any("error", err))
os.Exit(1)
}
}
func app(ctx context.Context, log *slog.Logger) error {
client, err := sravni.NewClient(ctx, log, true)
if err != nil {
return fmt.Errorf("making new client: %w", err)
}
meta := client.GetMainPageState()
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
return enc.Encode(meta)
} }

View File

@ -1,10 +1,10 @@
package sravni package sravni
import ( import (
"bytes"
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"log/slog" "log/slog"
"strings" "strings"
@ -19,21 +19,30 @@ const (
baseURL = "https://www.sravni.ru/kursy" baseURL = "https://www.sravni.ru/kursy"
) )
func NewClient(log *slog.Logger, debug bool) *client { func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) {
return &client{ c = &client{
log: log.With(slog.String("client", "sravni")), log: log.With(slog.String("client", "sravni")),
http: resty.New(). http: resty.New().
SetBaseURL(baseURL). SetBaseURL(baseURL).
SetDebug(debug), SetDebug(debug),
} }
c.cachedMainPageInfo, err = c.getMainPageState(ctx)
if err != nil {
return nil, err
}
return c, nil
} }
type client struct { type client struct {
log *slog.Logger log *slog.Logger
http *resty.Client http *resty.Client
cachedMainPageInfo *PageState
} }
type MetaInfoRuntimeConfig struct { type PageStateRuntimeConfig struct {
BrandingURL string `json:"brandingUrl"` BrandingURL string `json:"brandingUrl"`
Release string `json:"release"` Release string `json:"release"`
Environment string `json:"environment"` Environment string `json:"environment"`
@ -46,25 +55,63 @@ type MetaInfoRuntimeConfig struct {
OrgnazationURL string `json:"organizationsUrl"` OrgnazationURL string `json:"organizationsUrl"`
} }
type MetaInfoReduxState struct { type Link struct {
URL string `json:"url"`
Title string `json:"title"`
}
type ReduxStatePrefooterItem struct {
Title string `json:"title"`
Links []Link `json:"links"`
}
type ReduxMetadata struct {
Data struct {
Prefooter []ReduxStatePrefooterItem `json:"prefooter"`
} `json:"data"`
}
type InitialReduxState struct {
Metadata ReduxMetadata `json:"metadata"`
Categories struct { Categories struct {
Data map[string]int `json:"data"` Data map[string]int `json:"data"`
} `json:"categories"` } `json:"categories"`
} }
type MetaInfoProps struct { type PageStateProperties struct {
InitialReduxState MetaInfoReduxState `json:"initialReduxState"` InitialReduxState InitialReduxState `json:"initialReduxState"`
} }
type MetaInfo struct { type PageState struct {
Page string `json:"page"` Page string `json:"page"`
Query map[string]string `json:"query"` Query map[string]string `json:"query"`
BuildID string `json:"buildId"` BuildID string `json:"buildId"`
RuntimeConfig MetaInfoRuntimeConfig `json:"runtimeConfig"` RuntimeConfig PageStateRuntimeConfig `json:"runtimeConfig"`
Props MetaInfoProps `json:"props"` Props PageStateProperties `json:"props"`
} }
func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) { func (p *PageState) Clone() *PageState {
copiedState := *p
copiedState.Query = make(map[string]string, len(p.Query))
for k, v := range p.Query {
copiedState.Query[k] = v
}
data := p.Props.InitialReduxState.Categories.Data
copiedData := make(map[string]int, len(data))
for k, v := range data {
copiedData[k] = v
}
copiedState.Props.InitialReduxState.Categories.Data = copiedData
return &copiedState
}
func (c *client) GetMainPageState() *PageState {
return c.cachedMainPageInfo.Clone()
}
func (c *client) getMainPageState(ctx context.Context) (*PageState, error) {
ctxLogger := restyCtxLogger{ ctxLogger := restyCtxLogger{
ctx: ctx, ctx: ctx,
log: c.log, log: c.log,
@ -73,6 +120,7 @@ func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) {
req := c.http.R(). req := c.http.R().
SetContext(ctx). SetContext(ctx).
SetLogger(ctxLogger). SetLogger(ctxLogger).
SetDoNotParseResponse(true).
EnableTrace() EnableTrace()
resp, err := req.Get("/") resp, err := req.Get("/")
@ -88,85 +136,78 @@ func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) {
traceInfo := resp.Request.TraceInfo() traceInfo := resp.Request.TraceInfo()
c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo)) c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo))
r := bytes.NewReader(resp.Body()) return c.parsePageState(ctx, resp.RawBody())
nodes, err := html.Parse(r) }
func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) {
page, err := html.Parse(body)
if err != nil { if err != nil {
return nil, fmt.Errorf("parsing html body: %w", err) return nil, fmt.Errorf("parsing body: %w", err)
} }
c.log.InfoContext(ctx, "inspecting node", slog.Any("node", nodes)) c.log.DebugContext(ctx, "finding page state")
htmlNode := func() *html.Node { var (
for child := nodes.FirstChild; child != nil; child = child.NextSibling { foundHtml bool
c.log.InfoContext(ctx, "inspecting node", slog.Any("node", child)) foundBody bool
if child.Type == html.ElementNode { )
return child findFunc := func(node *html.Node) (found, deeper bool) {
} if node == nil {
c.log.DebugContext(ctx, "node is null, skipping")
return false, false
} }
return nil if !foundHtml && node.Type == html.ElementNode {
}() c.log.DebugContext(ctx, "found html node")
if htmlNode == nil { foundHtml = true
c.log.WarnContext(ctx, "no html node found") return false, true
return nil, nil
}
var bodyNode *html.Node
for child := htmlNode.FirstChild; child != nil; child = child.NextSibling {
c.log.InfoContext(ctx, "inspecting html node", slog.Any("node", child))
if child.DataAtom == atom.Body {
c.log.InfoContext(ctx, "found body node")
bodyNode = child
break
} }
}
var nextData *html.Node if foundHtml && !foundBody && node.DataAtom == atom.Body {
for child := bodyNode.FirstChild; child != nil; child = child.NextSibling { c.log.DebugContext(ctx, "found body node")
c.log.InfoContext(ctx, "inspecting body node", slog.Any("node", child)) foundBody = true
if child.DataAtom == atom.Script { return false, true
c.log.InfoContext(ctx, "found script node") }
for _, attr := range child.Attr {
if foundHtml && foundBody && node.DataAtom == atom.Script {
for _, attr := range node.Attr {
if attr.Key == "id" && attr.Val == "__NEXT_DATA__" { if attr.Key == "id" && attr.Val == "__NEXT_DATA__" {
c.log.InfoContext(ctx, "found metadata container") c.log.DebugContext(ctx, "found script node with next_data")
nextData = child.FirstChild return true, false
break
} }
} }
} }
return false, false
} }
nextData := findNode(page, findFunc)
if nextData == nil { if nextData == nil {
c.log.WarnContext(ctx, "no metadata container found")
return nil, nil return nil, nil
} }
var out MetaInfo var out PageState
dataReader := strings.NewReader(nextData.Data) dataReader := strings.NewReader(nextData.FirstChild.Data)
err = json.NewDecoder(dataReader).Decode(&out) err = json.NewDecoder(dataReader).Decode(&out)
if err != nil { if err != nil {
return nil, fmt.Errorf("unmarshalling data: %w", err) return nil, fmt.Errorf("decoding html data: %w", err)
} }
return &out, nil return &out, nil
} }
type restyCtxLogger struct { func findNode(parent *html.Node, eq func(*html.Node) (found, deeper bool)) *html.Node {
ctx context.Context for child := parent.FirstChild; child != nil; child = child.NextSibling {
log *slog.Logger found, deeper := eq(child)
} if found {
return child
}
if deeper {
deeperChild := findNode(child, eq)
if deeperChild != nil {
return deeperChild
}
}
}
func (l restyCtxLogger) Debugf(format string, v ...any) { return nil
msg := fmt.Sprintf(format, v...)
l.log.DebugContext(l.ctx, msg)
}
func (l restyCtxLogger) Warnf(format string, v ...any) {
msg := fmt.Sprintf(format, v...)
l.log.WarnContext(l.ctx, msg)
}
func (l restyCtxLogger) Errorf(format string, v ...any) {
msg := fmt.Sprintf(format, v...)
l.log.ErrorContext(l.ctx, msg)
} }

View File

@ -0,0 +1,27 @@
package sravni
import (
"context"
"fmt"
"log/slog"
)
type restyCtxLogger struct {
ctx context.Context
log *slog.Logger
}
func (l restyCtxLogger) Debugf(format string, v ...any) {
msg := fmt.Sprintf(format, v...)
l.log.DebugContext(l.ctx, msg)
}
func (l restyCtxLogger) Warnf(format string, v ...any) {
msg := fmt.Sprintf(format, v...)
l.log.WarnContext(l.ctx, msg)
}
func (l restyCtxLogger) Errorf(format string, v ...any) {
msg := fmt.Sprintf(format, v...)
l.log.ErrorContext(l.ctx, msg)
}

View File

@ -6,33 +6,34 @@ import (
) )
var ( var (
version = "unknown" version = "unknown"
commit = "unknown" commit = "unknown"
buildTime = "" buildTime = ""
buildTimeParsed = time.Time{} buildTimeParsed = time.Time{}
) )
func Version() string { func Version() string {
return version return version
} }
func Commit() string { func Commit() string {
return commit return commit
} }
var buildTimeParseOnce sync.Once var buildTimeParseOnce sync.Once
func BuildTime() time.Time { func BuildTime() time.Time {
if buildTime == "" { if buildTime == "" {
return time.Time{} return time.Time{}
} }
buildTimeParseOnce.Do(func() { buildTimeParseOnce.Do(func() {
var err error var err error
buildTimeParsed, err = time.Parse(buildTime, time.RFC3339) buildTimeParsed, err = time.Parse(time.RFC3339, buildTime)
if err != nil { if err != nil {
panic(err.Error()) panic(err.Error())
} }
}) })
return buildTimeParsed return buildTimeParsed
} }