able to parse xml

This commit is contained in:
2024-01-28 16:49:48 +03:00
parent a0b36ba83d
commit dd639995bd
11 changed files with 295 additions and 87 deletions

View File

@ -20,6 +20,10 @@ import (
// Yeah, singleton is not good UNLESS you're really lazy
var diInjector *do.Injector
func GetEwayClient() (eway.Client, error) {
return do.Invoke[eway.Client](diInjector)
}
func GetRepository() (storage.Repository, error) {
adapter, err := do.Invoke[*storageRepositoryAdapter](diInjector)
if err != nil {
@ -57,7 +61,11 @@ func SetupDI(ctx context.Context, cfgpath string) error {
return nil, fmt.Errorf("getting logger: %w", err)
}
client := eway.New(eway.Config(cfg.Eway), log)
client, err := eway.New(eway.Config(cfg.Eway), log)
if err != nil {
return nil, fmt.Errorf("making new eway client: %w", err)
}
return client, nil
})
@ -103,9 +111,9 @@ func getDB() (*badger.DB, error) {
}
type settings struct {
Badger config.Badger
Log config.Log
Eway config.Eway
Badger config.Badger `toml:"badger"`
Log config.Log `toml:"log"`
Eway config.Eway `toml:"eway"`
}
func parseSettings(cfgpath string) (cfg settings, err error) {

View File

@ -10,6 +10,7 @@ import (
"errors"
"fmt"
"io"
"math/big"
"os"
"os/signal"
"strconv"
@ -19,6 +20,7 @@ import (
"git.loyso.art/frx/eway/internal/encoding/fbs"
"git.loyso.art/frx/eway/internal/entity"
"git.loyso.art/frx/eway/internal/export"
"git.loyso.art/frx/eway/internal/interconnect/eway"
"github.com/brianvoe/gofakeit/v6"
"github.com/rodaine/table"
@ -66,10 +68,13 @@ func releaseDI(c *cli.Context) error {
return fmt.Errorf("getting logger: %w", err)
}
log.Info().Msg("shutting down env")
start := time.Now()
defer func() {
since := time.Since(start)
if err == nil {
return
}
log.Err(err).Dur("elapsed", since).Msg("shutdown finished")
}()
@ -91,14 +96,35 @@ func setupCLI(ctx context.Context) *cli.App {
app.Before = setupDI(ctx)
app.After = releaseDI
app.Commands = cli.Commands{
newParseCmd(ctx),
newImportCmd(ctx),
newExportCmd(ctx),
newViewCmd(ctx),
}
app.EnableBashCompletion = true
app.BashComplete = cli.DefaultAppComplete
return app
}
func newParseCmd(ctx context.Context) cli.Command {
return cli.Command{
Name: "parse",
Usage: "category for parsing items from various sources",
Subcommands: cli.Commands{
newParseEwayCmd(ctx),
},
}
}
func newParseEwayCmd(ctx context.Context) cli.Command {
return cli.Command{
Name: "eway",
Usage: "parse all available eway goods",
Action: decorateAction(ctx, parseEwayAction),
}
}
func newImportCmd(ctx context.Context) cli.Command {
return cli.Command{
Name: "import",
@ -111,7 +137,7 @@ func newImportCmd(ctx context.Context) cli.Command {
func newImportFromFileCmd(ctx context.Context) cli.Command {
return cli.Command{
Name: "fromfile",
Name: "file",
Usage: "imports from file into db",
Flags: []cli.Flag{
&cli.StringFlag{
@ -409,6 +435,20 @@ func importFromFileAction(ctx context.Context, c *cli.Context) error {
}
}()
failedItems, err := os.Create("failed.json")
if err != nil {
log.Warn().Err(err).Msg("unable to open file for failed results")
failedItems = os.Stdout
}
defer func() {
if failedItems == os.Stdout {
return
}
errClose := failedItems.Close()
log.Err(errClose).Msg("closing file")
}()
var (
goodsItem entity.GoodsItem
goodsItems []entity.GoodsItem
@ -439,6 +479,8 @@ func importFromFileAction(ctx context.Context, c *cli.Context) error {
err = json.Unmarshal(line, &goodsItem)
if err != nil {
log.Warn().Err(err).Str("line", string(line)).Msg("unable to unmarshal line into item")
_, _ = failedItems.Write(line)
_, _ = failedItems.Write([]byte{'\n'})
failedToInsert++
continue
}
@ -449,6 +491,12 @@ func importFromFileAction(ctx context.Context, c *cli.Context) error {
continue
}
if goodsItem.Type == "" {
log.Warn().Msg("bad item without proper type")
_ = json.NewEncoder(failedItems).Encode(goodsItem)
continue
}
_, err = r.Category().Create(ctx, goodsItem.Type)
if err != nil {
return fmt.Errorf("unable to create new category: %w", err)
@ -591,20 +639,151 @@ func exportYMLCatalogAction(ctx context.Context, c *cli.Context) error {
return enc.Encode(container)
}
func parseEwayAction(ctx context.Context, c *cli.Context) error {
client, err := components.GetEwayClient()
if err != nil {
return fmt.Errorf("getting eway client: %w", err)
}
repository, err := components.GetRepository()
if err != nil {
return fmt.Errorf("getting repository: %w", err)
}
logger, err := components.GetLogger()
if err != nil {
return fmt.Errorf("getting logger: %w", err)
}
const batchSize = 100
var i int
var start int
goodsItems := make([]entity.GoodsItem, 0, batchSize)
productIDs := make([]int, 0, batchSize)
knownCategories := make(map[string]struct{})
err = entity.IterWithErr(repository.Category().List(ctx)).Do(func(c entity.Category) error {
knownCategories[c.Name] = struct{}{}
return nil
})
if err != nil {
return fmt.Errorf("filling known categories: %w", err)
}
startFrom := time.Now()
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
items, total, err := client.GetGoodsNew(ctx, eway.GetGoodsNewParams{
Draw: i,
Start: start,
Length: batchSize,
SearchInStocks: true,
RemmantsAtleast: 5,
})
if err != nil {
return fmt.Errorf("getting next goods batch: %w", err)
}
productIDs = productIDs[:0]
for _, item := range items {
productIDs = append(productIDs, int(item.Cart))
}
remnants, err := client.GetGoodsRemnants(ctx, productIDs)
if err != nil {
return fmt.Errorf("getting goods remnants: %w", err)
}
goodsItems = goodsItems[:0]
for _, item := range items {
goodsItem, err := entity.MakeGoodsItem(item, remnants)
if err != nil {
logger.Warn().Err(err).Any("item", item).Msg("unable to make goods item")
continue
}
goodsItems = append(goodsItems, goodsItem)
if goodsItem.Type == "" {
continue
}
if _, ok := knownCategories[goodsItem.Type]; ok {
continue
}
category, err := repository.Category().Create(ctx, goodsItem.Type)
if err != nil {
return fmt.Errorf("creating category: %w", err)
}
logger.Debug().
Str("name", category.Name).
Int64("id", category.ID).
Msg("created new category")
knownCategories[goodsItem.Type] = struct{}{}
}
_, err = repository.GoodsItem().UpsertMany(ctx, goodsItems...)
if err != nil {
return fmt.Errorf("upserting items: %w", err)
}
progressFloat := float64(start) / float64(total)
progress := big.NewFloat(progressFloat).Text('f', 3)
elapsed := time.Since(startFrom).Seconds()
var left int
if progressFloat != 0 {
left = int(((1 - progressFloat) / progressFloat) * elapsed)
}
logger.Debug().
Int("from", start).
Int("to", start+batchSize).
Int("total", total).
Str("progress", progress).
Int("seconds_left", left).
Msg("handled next batch items")
if len(items) < batchSize {
break
}
start += batchSize
i++
}
return nil
}
func goodsItemAsOffer(in entity.GoodsItem, categoryIDByName map[string]int64) (out export.Offer) {
const defaultType = "vendor.model"
const defaultCurrency = "RUR"
const defaultAvailable = true
const quantityParamName = "Количество на складе «Москва»"
const basePictureURL = "https://eway.elevel.ru"
imgurl := func(path string) string {
return basePictureURL + path
}
categoryID := categoryIDByName[in.Type]
out = export.Offer{
ID: in.Cart,
VendorCode: in.Articul,
Price: int(in.TariffPrice),
CategoryID: categoryID,
PictureURLs: []string{
in.Photo,
imgurl(in.Photo),
},
Model: in.Name,

View File

@ -1,7 +1,7 @@
package config
type Badger struct {
Debug bool
Dir string
ValueDir *string
Debug bool `toml:"debug"`
Dir string `toml:"dir"`
ValueDir *string `toml:"value_dir"`
}

View File

@ -1,8 +1,8 @@
package config
type Eway struct {
SessionID string
SessionUser string
Contract string
Debug bool
SessionID string `toml:"session_id"`
SessionUser string `toml:"session_user"`
OwnerID string `toml:"owner_id"`
Debug bool `toml:"debug"`
}

View File

@ -50,6 +50,6 @@ func (l *LogFormat) UnmarshalText(data []byte) (err error) {
}
type Log struct {
Level string `json:"level"`
Format string `json:"format"`
Level string `json:"level" toml:"level"`
Format string `json:"format" toml:"format"`
}

View File

@ -45,15 +45,6 @@ type GoodsItemRaw struct {
type MappedGoodsRemnants map[int]GoodsRemnant
type GoodsRemnant [4]int32
func ExtractProductIDs(items []GoodsItem) (out []int) {
out = make([]int, 0, len(items))
for _, item := range items {
out = append(out, int(item.Cart))
}
return out
}
func MakeGoodsItem(
gi GoodsItemRaw,
remnants MappedGoodsRemnants,

28
internal/entity/iter.go Normal file
View File

@ -0,0 +1,28 @@
package entity
func IterWithErr[T any](t []T, err error) iterWithErr[T] {
return iterWithErr[T]{
items: t,
err: err,
}
}
type iterWithErr[T any] struct {
items []T
err error
}
func (iter iterWithErr[T]) Do(f func(T) error) error {
if iter.err != nil {
return iter.err
}
for _, item := range iter.items {
err := f(item)
if err != nil {
return err
}
}
return nil
}

View File

@ -19,7 +19,7 @@ type Offer struct {
PictureURLs []string `xml:"picture"`
Vendor string `xml:"vendor"`
Model string `xml:"model"`
VendorCode int `xml:"vendorCode"`
VendorCode string `xml:"vendorCode"`
TypePrefix string `xml:"typePrefix"`
Description string `xml:"description"`
ManufacturerWarrany bool `xml:"manufacturer_warranty"`
@ -33,7 +33,7 @@ type Currency struct {
type Category struct {
ID int64 `xml:"id,attr"`
ParentID int64 `xml:"parent_id,attr,omiempty"`
ParentID int64 `xml:"parent_id,attr,omitempty"`
Name string `xml:",chardata"`
}

View File

@ -4,6 +4,7 @@ import (
"encoding/xml"
"os"
"testing"
"time"
"github.com/brianvoe/gofakeit/v6"
)
@ -12,12 +13,12 @@ func TestYMLSerialize(t *testing.T) {
faker := gofakeit.New(0)
categories := make([]Category, faker.Rand.Intn(4))
knownCategory := map[int]struct{}{}
categoryIDs := make([]int, 0, 10)
knownCategory := map[int64]struct{}{}
categoryIDs := make([]int64, 0, 10)
for i := range categories {
categories[i].ID = faker.Rand.Int()
categories[i].ID = faker.Int64()
categories[i].Name = faker.HipsterWord()
categories[i].ParentID = faker.Rand.Int()
categories[i].ParentID = faker.Int64()
if _, ok := knownCategory[categories[i].ID]; ok {
continue
@ -42,7 +43,7 @@ func TestYMLSerialize(t *testing.T) {
}
offer.Vendor = faker.Company()
offer.Model = faker.CarModel()
offer.VendorCode = faker.Rand.Int()
offer.VendorCode = faker.DigitN(8)
offer.TypePrefix = faker.ProductName()
offer.Description = faker.Sentence(12)
offer.ManufacturerWarrany = true
@ -68,7 +69,7 @@ func TestYMLSerialize(t *testing.T) {
Categories: categories,
Offers: offers,
}
catalog.Date = faker.Date()
catalog.Date = faker.Date().Truncate(time.Second)
container := YmlContainer{
YmlCatalog: catalog,

View File

@ -20,18 +20,28 @@ import (
)
type Client interface {
GetGoodsRemnants(context.Context, []int) (entity.MappedGoodsRemnants, error)
GetGoodsNew(
context.Context,
GetGoodsNewParams,
) (items []entity.GoodsItemRaw, total int, err error)
}
type client struct {
http *resty.Client
log zerolog.Logger
ownerID string
}
type Config config.Eway
func New(cfg Config, log zerolog.Logger) client {
if cfg.Contract == "" {
cfg.Contract = "6101"
func New(cfg Config, log zerolog.Logger) (client, error) {
if cfg.SessionID == "" {
return client{}, entity.SimpleError("no session id provided")
}
if cfg.SessionUser == "" {
return client{}, entity.SimpleError("no session user provided")
}
cookies := []*http.Cookie{
@ -47,12 +57,6 @@ func New(cfg Config, log zerolog.Logger) client {
Domain: "eway.elevel.ru",
HttpOnly: true,
},
{
Name: "contract",
Value: cfg.Contract,
Domain: "eway.elevel.ru",
HttpOnly: true,
},
}
httpclient := resty.New().
@ -63,17 +67,11 @@ func New(cfg Config, log zerolog.Logger) client {
return client{
http: httpclient,
log: log.With().Str("client", "eway").Logger(),
}
}
type getGoodsNewOrder struct {
Column int
Dir string
}, nil
}
type GetGoodsNewParams struct {
Draw int
Order getGoodsNewOrder
Start int
// 100 is max
Length int
@ -212,7 +210,7 @@ func (c client) GetGoodsNew(
"remnants_atleast": "5",
}).
SetQueryParam("category_id", "0").
SetQueryParam("own", "26476"). // user id?
SetQueryParam("own", c.ownerID). // user id?
SetDoNotParseResponse(true).
Post("/goods_new")
if err != nil {

View File

@ -42,7 +42,7 @@ func (c *goodsItemClient) prefixedStr(key string) []byte {
return c.prefixed(keyBytes)
}
func (c *goodsItemClient) prefixedIDByCartStr(key int64) []byte {
func (c *goodsItemClient) prefixedIDByCartInt64(key int64) []byte {
var keyBytes [8]byte
binary.BigEndian.PutUint64(keyBytes[:], uint64(key))
return c.prefixedIDByCart(keyBytes[:])
@ -168,22 +168,21 @@ func (c *goodsItemClient) Get(
func (c *goodsItemClient) GetByCart(ctx context.Context, id int64) (out entity.GoodsItem, err error) {
err = c.db.View(func(txn *badger.Txn) error {
var idByte [8]byte
binary.BigEndian.PutUint64(idByte[:], uint64(id))
item, err := txn.Get(c.prefixedIDByCart(idByte[:]))
idxKey := c.prefixedIDByCartInt64(id)
skuByCartIDItem, err := txn.Get(idxKey)
if err != nil {
return fmt.Errorf("getting key: %w", err)
}
sku := make([]byte, item.ValueSize())
sku, err = item.ValueCopy(sku)
sku := make([]byte, skuByCartIDItem.ValueSize())
sku, err = skuByCartIDItem.ValueCopy(sku)
if err != nil {
return fmt.Errorf("getting value of idx: %w", err)
}
// well, yeah, that's kind of dumb to trim prefix here and
// and prefix later, but who cares.
sku = bytes.TrimPrefix(sku, c.prefix())
out, err = c.getBySKU(sku, txn)
return err
})
@ -206,40 +205,44 @@ func (c *goodsItemClient) upsertByBatch(ctx context.Context, items []entity.Good
batch := c.db.NewWriteBatch()
defer batch.Cancel()
log := zerolog.Ctx(ctx)
err := func() error {
for _, item := range items {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
for _, item := range items {
select {
case <-ctx.Done():
break
default:
}
key := c.prefixedStr(item.Articul)
var value []byte
if useJSON {
value, _ = json.Marshal(item)
} else {
value = fbs.MakeDomainGoodItemFinished(item)
key := c.prefixedStr(item.Articul)
var value []byte
if useJSON {
value, _ = json.Marshal(item)
} else {
value = fbs.MakeDomainGoodItemFinished(item)
}
idxValue := make([]byte, len(key))
copy(idxValue, key)
coreEntry := badger.NewEntry(key, value)
if err := batch.SetEntry(coreEntry); err != nil {
return fmt.Errorf("setting core entry: %w", err)
}
idxKey := c.prefixedIDByCartInt64(item.Cart)
idxEntry := badger.NewEntry(idxKey, idxValue)
if err := batch.SetEntry(idxEntry); err != nil {
return fmt.Errorf("setting index entry: %w", err)
}
}
idxValue := make([]byte, len(key))
copy(idxValue, key)
coreEntry := badger.NewEntry(key, value)
if err := batch.SetEntry(coreEntry); err != nil {
log.Warn().Err(err).Msg("unable to set item, breaking")
break
}
idxKey := c.prefixedIDByCartStr(item.Cart)
idxEntry := badger.NewEntry(idxKey, idxValue)
if err := batch.SetEntry(idxEntry); err != nil {
log.Warn().Err(err).Msg("unable to set idx, breaking")
break
}
return nil
}()
if err != nil && !errors.Is(err, context.Canceled) {
return err
}
err := batch.Flush()
err = batch.Flush()
if err != nil {
return fmt.Errorf("flushing changes: %w", err)
}