add metadata prefooter
This commit is contained in:
16
Taskfile.yml
16
Taskfile.yml
@ -4,6 +4,15 @@ env:
|
||||
CGO_ENABLED: 0
|
||||
GOBIN: "{{.USER_WORKING_DIR}}/bin"
|
||||
PROJECT: "git.loyso.art/frx/kurious"
|
||||
vars:
|
||||
GIT_COMMIT:
|
||||
sh: git log -n 1 --format=%h
|
||||
GIT_VERSION:
|
||||
sh: git tag | sort -r --version-sort | head -n1
|
||||
BUILD_TIME:
|
||||
sh: TZ=UTC date --iso-8601=seconds
|
||||
LDFLAGS:
|
||||
sh: echo '-X "{{.PROJECT}}.buildTime={{.BUILD_TIME}}" -X "{{.PROJECT}}.commit={{.GIT_COMMIT}}" -X "{{.PROJECT}}.version={{.GIT_VERSION}}"'
|
||||
|
||||
tasks:
|
||||
install_tools:
|
||||
@ -17,4 +26,9 @@ tasks:
|
||||
- go test --count=1 ./internal/...
|
||||
build:
|
||||
cmds:
|
||||
- go build -o $GOBIN/sravnicli -v -ldflags "-X '$PROJECT.version=ohwell'" cmd/dev/sravnicli/main.go
|
||||
- go build -o $GOBIN/sravnicli -v -ldflags '{{.LDFLAGS}}' cmd/dev/sravnicli/main.go
|
||||
deps: [check, test]
|
||||
run:
|
||||
deps: [build]
|
||||
cmds:
|
||||
- $GOBIN/sravnicli
|
||||
|
||||
@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
@ -15,26 +16,45 @@ func main() {
|
||||
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
|
||||
defer cancel()
|
||||
|
||||
log := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{}))
|
||||
log := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
|
||||
Level: slog.LevelDebug,
|
||||
ReplaceAttr: func(_ []string, a slog.Attr) slog.Attr {
|
||||
if a.Key == slog.TimeKey {
|
||||
a.Value = slog.Int64Value(a.Value.Time().Unix())
|
||||
}
|
||||
|
||||
version, commit, bt := kurious.Version(), kurious.Commit(), kurious.BuildTime()
|
||||
pid := os.Getpid()
|
||||
return a
|
||||
},
|
||||
}))
|
||||
|
||||
log.InfoContext(
|
||||
ctx, "running app",
|
||||
slog.Int("pid", pid),
|
||||
slog.String("version", version),
|
||||
slog.String("commit", commit),
|
||||
slog.Time("build_time", bt),
|
||||
)
|
||||
version, commit, bt := kurious.Version(), kurious.Commit(), kurious.BuildTime()
|
||||
pid := os.Getpid()
|
||||
|
||||
client := sravni.NewClient(log, true)
|
||||
meta, err := client.GetMetaInfo(ctx)
|
||||
if err != nil {
|
||||
log.ErrorContext(ctx, "unable to get meta info", slog.Any("error", err))
|
||||
}
|
||||
log.InfoContext(
|
||||
ctx, "running app",
|
||||
slog.Int("pid", pid),
|
||||
slog.String("version", version),
|
||||
slog.String("commit", commit),
|
||||
slog.Time("build_time", bt),
|
||||
)
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
_ = enc.Encode(meta)
|
||||
err := app(ctx, log)
|
||||
if err != nil {
|
||||
slog.ErrorContext(ctx, "unable to run app", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func app(ctx context.Context, log *slog.Logger) error {
|
||||
client, err := sravni.NewClient(ctx, log, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("making new client: %w", err)
|
||||
}
|
||||
|
||||
meta := client.GetMainPageState()
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
|
||||
return enc.Encode(meta)
|
||||
}
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
package sravni
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"strings"
|
||||
|
||||
@ -19,21 +19,30 @@ const (
|
||||
baseURL = "https://www.sravni.ru/kursy"
|
||||
)
|
||||
|
||||
func NewClient(log *slog.Logger, debug bool) *client {
|
||||
return &client{
|
||||
func NewClient(ctx context.Context, log *slog.Logger, debug bool) (c *client, err error) {
|
||||
c = &client{
|
||||
log: log.With(slog.String("client", "sravni")),
|
||||
http: resty.New().
|
||||
SetBaseURL(baseURL).
|
||||
SetDebug(debug),
|
||||
}
|
||||
|
||||
c.cachedMainPageInfo, err = c.getMainPageState(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
type client struct {
|
||||
log *slog.Logger
|
||||
http *resty.Client
|
||||
|
||||
cachedMainPageInfo *PageState
|
||||
}
|
||||
|
||||
type MetaInfoRuntimeConfig struct {
|
||||
type PageStateRuntimeConfig struct {
|
||||
BrandingURL string `json:"brandingUrl"`
|
||||
Release string `json:"release"`
|
||||
Environment string `json:"environment"`
|
||||
@ -46,25 +55,63 @@ type MetaInfoRuntimeConfig struct {
|
||||
OrgnazationURL string `json:"organizationsUrl"`
|
||||
}
|
||||
|
||||
type MetaInfoReduxState struct {
|
||||
type Link struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
}
|
||||
|
||||
type ReduxStatePrefooterItem struct {
|
||||
Title string `json:"title"`
|
||||
Links []Link `json:"links"`
|
||||
}
|
||||
|
||||
type ReduxMetadata struct {
|
||||
Data struct {
|
||||
Prefooter []ReduxStatePrefooterItem `json:"prefooter"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type InitialReduxState struct {
|
||||
Metadata ReduxMetadata `json:"metadata"`
|
||||
Categories struct {
|
||||
Data map[string]int `json:"data"`
|
||||
} `json:"categories"`
|
||||
}
|
||||
|
||||
type MetaInfoProps struct {
|
||||
InitialReduxState MetaInfoReduxState `json:"initialReduxState"`
|
||||
type PageStateProperties struct {
|
||||
InitialReduxState InitialReduxState `json:"initialReduxState"`
|
||||
}
|
||||
|
||||
type MetaInfo struct {
|
||||
Page string `json:"page"`
|
||||
Query map[string]string `json:"query"`
|
||||
BuildID string `json:"buildId"`
|
||||
RuntimeConfig MetaInfoRuntimeConfig `json:"runtimeConfig"`
|
||||
Props MetaInfoProps `json:"props"`
|
||||
type PageState struct {
|
||||
Page string `json:"page"`
|
||||
Query map[string]string `json:"query"`
|
||||
BuildID string `json:"buildId"`
|
||||
RuntimeConfig PageStateRuntimeConfig `json:"runtimeConfig"`
|
||||
Props PageStateProperties `json:"props"`
|
||||
}
|
||||
|
||||
func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) {
|
||||
func (p *PageState) Clone() *PageState {
|
||||
copiedState := *p
|
||||
copiedState.Query = make(map[string]string, len(p.Query))
|
||||
for k, v := range p.Query {
|
||||
copiedState.Query[k] = v
|
||||
}
|
||||
|
||||
data := p.Props.InitialReduxState.Categories.Data
|
||||
copiedData := make(map[string]int, len(data))
|
||||
for k, v := range data {
|
||||
copiedData[k] = v
|
||||
}
|
||||
copiedState.Props.InitialReduxState.Categories.Data = copiedData
|
||||
|
||||
return &copiedState
|
||||
}
|
||||
|
||||
func (c *client) GetMainPageState() *PageState {
|
||||
return c.cachedMainPageInfo.Clone()
|
||||
}
|
||||
|
||||
func (c *client) getMainPageState(ctx context.Context) (*PageState, error) {
|
||||
ctxLogger := restyCtxLogger{
|
||||
ctx: ctx,
|
||||
log: c.log,
|
||||
@ -73,6 +120,7 @@ func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) {
|
||||
req := c.http.R().
|
||||
SetContext(ctx).
|
||||
SetLogger(ctxLogger).
|
||||
SetDoNotParseResponse(true).
|
||||
EnableTrace()
|
||||
|
||||
resp, err := req.Get("/")
|
||||
@ -88,85 +136,78 @@ func (c *client) GetMetaInfo(ctx context.Context) (*MetaInfo, error) {
|
||||
traceInfo := resp.Request.TraceInfo()
|
||||
c.log.InfoContext(ctx, "request proceeded", slog.Any("trace", traceInfo))
|
||||
|
||||
r := bytes.NewReader(resp.Body())
|
||||
nodes, err := html.Parse(r)
|
||||
return c.parsePageState(ctx, resp.RawBody())
|
||||
}
|
||||
|
||||
func (c *client) parsePageState(ctx context.Context, body io.Reader) (*PageState, error) {
|
||||
page, err := html.Parse(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing html body: %w", err)
|
||||
return nil, fmt.Errorf("parsing body: %w", err)
|
||||
}
|
||||
|
||||
c.log.InfoContext(ctx, "inspecting node", slog.Any("node", nodes))
|
||||
c.log.DebugContext(ctx, "finding page state")
|
||||
|
||||
htmlNode := func() *html.Node {
|
||||
for child := nodes.FirstChild; child != nil; child = child.NextSibling {
|
||||
c.log.InfoContext(ctx, "inspecting node", slog.Any("node", child))
|
||||
if child.Type == html.ElementNode {
|
||||
return child
|
||||
}
|
||||
var (
|
||||
foundHtml bool
|
||||
foundBody bool
|
||||
)
|
||||
findFunc := func(node *html.Node) (found, deeper bool) {
|
||||
if node == nil {
|
||||
c.log.DebugContext(ctx, "node is null, skipping")
|
||||
return false, false
|
||||
}
|
||||
|
||||
return nil
|
||||
}()
|
||||
if htmlNode == nil {
|
||||
c.log.WarnContext(ctx, "no html node found")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var bodyNode *html.Node
|
||||
for child := htmlNode.FirstChild; child != nil; child = child.NextSibling {
|
||||
c.log.InfoContext(ctx, "inspecting html node", slog.Any("node", child))
|
||||
if child.DataAtom == atom.Body {
|
||||
c.log.InfoContext(ctx, "found body node")
|
||||
bodyNode = child
|
||||
break
|
||||
if !foundHtml && node.Type == html.ElementNode {
|
||||
c.log.DebugContext(ctx, "found html node")
|
||||
foundHtml = true
|
||||
return false, true
|
||||
}
|
||||
}
|
||||
|
||||
var nextData *html.Node
|
||||
for child := bodyNode.FirstChild; child != nil; child = child.NextSibling {
|
||||
c.log.InfoContext(ctx, "inspecting body node", slog.Any("node", child))
|
||||
if child.DataAtom == atom.Script {
|
||||
c.log.InfoContext(ctx, "found script node")
|
||||
for _, attr := range child.Attr {
|
||||
if foundHtml && !foundBody && node.DataAtom == atom.Body {
|
||||
c.log.DebugContext(ctx, "found body node")
|
||||
foundBody = true
|
||||
return false, true
|
||||
}
|
||||
|
||||
if foundHtml && foundBody && node.DataAtom == atom.Script {
|
||||
for _, attr := range node.Attr {
|
||||
if attr.Key == "id" && attr.Val == "__NEXT_DATA__" {
|
||||
c.log.InfoContext(ctx, "found metadata container")
|
||||
nextData = child.FirstChild
|
||||
break
|
||||
c.log.DebugContext(ctx, "found script node with next_data")
|
||||
return true, false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, false
|
||||
}
|
||||
|
||||
nextData := findNode(page, findFunc)
|
||||
if nextData == nil {
|
||||
c.log.WarnContext(ctx, "no metadata container found")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var out MetaInfo
|
||||
dataReader := strings.NewReader(nextData.Data)
|
||||
var out PageState
|
||||
dataReader := strings.NewReader(nextData.FirstChild.Data)
|
||||
err = json.NewDecoder(dataReader).Decode(&out)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unmarshalling data: %w", err)
|
||||
return nil, fmt.Errorf("decoding html data: %w", err)
|
||||
}
|
||||
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
type restyCtxLogger struct {
|
||||
ctx context.Context
|
||||
log *slog.Logger
|
||||
}
|
||||
func findNode(parent *html.Node, eq func(*html.Node) (found, deeper bool)) *html.Node {
|
||||
for child := parent.FirstChild; child != nil; child = child.NextSibling {
|
||||
found, deeper := eq(child)
|
||||
if found {
|
||||
return child
|
||||
}
|
||||
if deeper {
|
||||
deeperChild := findNode(child, eq)
|
||||
if deeperChild != nil {
|
||||
return deeperChild
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Debugf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.DebugContext(l.ctx, msg)
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Warnf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.WarnContext(l.ctx, msg)
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Errorf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.ErrorContext(l.ctx, msg)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -0,0 +1,27 @@
|
||||
package sravni
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
)
|
||||
|
||||
type restyCtxLogger struct {
|
||||
ctx context.Context
|
||||
log *slog.Logger
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Debugf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.DebugContext(l.ctx, msg)
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Warnf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.WarnContext(l.ctx, msg)
|
||||
}
|
||||
|
||||
func (l restyCtxLogger) Errorf(format string, v ...any) {
|
||||
msg := fmt.Sprintf(format, v...)
|
||||
l.log.ErrorContext(l.ctx, msg)
|
||||
}
|
||||
35
kurious.go
35
kurious.go
@ -6,33 +6,34 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
version = "unknown"
|
||||
commit = "unknown"
|
||||
buildTime = ""
|
||||
buildTimeParsed = time.Time{}
|
||||
version = "unknown"
|
||||
commit = "unknown"
|
||||
buildTime = ""
|
||||
buildTimeParsed = time.Time{}
|
||||
)
|
||||
|
||||
func Version() string {
|
||||
return version
|
||||
return version
|
||||
}
|
||||
|
||||
func Commit() string {
|
||||
return commit
|
||||
return commit
|
||||
}
|
||||
|
||||
var buildTimeParseOnce sync.Once
|
||||
|
||||
func BuildTime() time.Time {
|
||||
if buildTime == "" {
|
||||
return time.Time{}
|
||||
}
|
||||
if buildTime == "" {
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
buildTimeParseOnce.Do(func() {
|
||||
var err error
|
||||
buildTimeParsed, err = time.Parse(buildTime, time.RFC3339)
|
||||
if err != nil {
|
||||
panic(err.Error())
|
||||
}
|
||||
})
|
||||
buildTimeParseOnce.Do(func() {
|
||||
var err error
|
||||
buildTimeParsed, err = time.Parse(time.RFC3339, buildTime)
|
||||
if err != nil {
|
||||
panic(err.Error())
|
||||
}
|
||||
})
|
||||
|
||||
return buildTimeParsed
|
||||
return buildTimeParsed
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user