Skip to content
Snippets Groups Projects
prepare.go 9.32 KiB
Newer Older
package template

import (
	"encoding/json"
	"fmt"
	"github.com/andybalholm/cascadia"
	"gitlab.schukai.com/oss/bob/types"
	"gitlab.schukai.com/oss/bob/util"
	"golang.org/x/net/html"
	"golang.org/x/net/html/atom"
	"path"
	"strings"
)

const attributePrefix = "data-"
const attributeAttributes = "data-attributes"
const attributeReplace = "data-replace"

func removeAttribute(attrs []html.Attribute, key string) []html.Attribute {

	var result []html.Attribute

	for _, attr := range attrs {
		if attr.Key == key {
			continue
		}

		result = append(result, attr)
	}

	return result
}

//func setDataAttribute(node *html.Node, name, attribute, instruction string) {
//	node.Attr = removeAttribute(node.Attr, name)
//	node.Attr = append(node.Attr, html.Attribute{Key: name, Val: attribute + " " + instruction})
//}

func setDataAttributesAttribute(node *html.Node, name, attribute, instruction string) {

	value := util.GetAttribute(node.Attr, attributeAttributes)
	if value != "" {
		l := strings.Split(value, ",")
		m := make(map[string]string)
		for _, v := range l {
			v = strings.TrimSpace(v)
			x := strings.Index(v, " ")
			if x > 0 {
				a := v[:x]
				b := v[x+1:]

				if a != attribute {
					m[a] = b
				}
			}
		}
		value = ""
		for k, v := range m {
			if value != "" {
				value += ","
			}
			value += k + " " + v
		}
	}

	node.Attr = removeAttribute(node.Attr, attributeAttributes)

	if value != "" {
		value += ","
	}
	value += attribute + " " + instruction
	node.Attr = append(node.Attr, html.Attribute{Key: name, Val: value})

}

func prepareMeta(node *html.Node, attrKey, attrValue string, storage *types.PageData) {

	if storage.Meta == nil {
		storage.Meta = make(map[string]string)
	}

	if _, ok := storage.Meta[attrValue]; ok {
		return
	}

	sel, err := cascadia.Parse("meta[" + attrKey + "=" + attrValue + "]")
	if err != nil {
		return
	}

	meta := cascadia.Query(node, sel)
	if meta == nil {
		return
	}

	setDataAttributesAttribute(meta, attributeAttributes, "content", "path:meta."+attrValue)

	storage.Meta[attrValue] = util.GetAttribute(meta.Attr, "content")

}

func prepareLanguage(node *html.Node, storage *types.PageData) {

	selector, err := cascadia.Parse("html")
	if err != nil {
		return
	}

	n := cascadia.Query(node, selector)
	if n == nil {
		return
	}

	setDataAttributesAttribute(n, attributeAttributes, "lang", "path:lang")

	lang := util.GetAttribute(n.Attr, "lang")
	if lang == "" {
		lang = "en"
	}

	storage.Lang = lang

}

func prepareTitle(node *html.Node, storage *types.PageData) {

	if storage.Title != "" {
		return
	}

	selector, err := cascadia.Parse("title")
	if err != nil {
		return
	}

	n := cascadia.Query(node, selector)
	if n == nil {
		return
	}

	n.Attr = removeAttribute(node.Attr, attributeReplace)
	n.Attr = append(node.Attr, html.Attribute{Key: attributeReplace, Val: "path:title"})

	title := ""
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		if c.Type == html.TextNode {
			title += c.Data
		}
	}

	storage.Title = title

}

func prepareAnchors(node *html.Node, storage *types.PageData) {
	selector, err := cascadia.Parse("a")
	if err != nil {
		return
	}

	list := cascadia.QueryAll(node, selector)
	if list == nil {
		return
	}

	copyOfAnchors := make([]types.Anchor, len(storage.Anchors))
	copy(copyOfAnchors, storage.Anchors)
	storage.Anchors = []types.Anchor{}

	for _, n := range list {

		title := util.GetAttribute(n.Attr, "title")
		hreflang := util.GetAttribute(n.Attr, "hreflang")
		href := util.GetAttribute(n.Attr, "href")

		id := util.GetOrCreateId(n, title+hreflang+href)
		if id == "" {
			id, _ = util.RandomString(8)
			n.Attr = removeAttribute(n.Attr, "id")
			n.Attr = append(n.Attr, html.Attribute{Key: "id", Val: id})
		}

		setDataAttributesAttribute(n, attributeAttributes, "href", "path:anchors."+id+".href")
		setDataAttributesAttribute(n, attributeAttributes, "title", "path:anchors."+id+".title")
		setDataAttributesAttribute(n, attributeAttributes, "hreflang", "path:anchors."+id+".hreflang")

		storage.Anchors = append(storage.Anchors, types.Anchor{
			Id:       id,
			Title:    title,
			HrefLang: hreflang,
			Href:     href,
		})

	}

	for _, anchor := range copyOfAnchors {
		foundIndex := -1
		for index, i := range storage.Images {
			if i.Id == anchor.Id {
				foundIndex = index
				break
			}
		}

		if foundIndex > -1 {
			storage.Anchors[foundIndex] = anchor
		}
	}

}

func prepareImages(node *html.Node, storage *types.PageData) {

	selector, err := cascadia.Parse("img")
	if err != nil {
		return
	}

	list := cascadia.QueryAll(node, selector)
	if list == nil {
		return
	}

	copyOfImages := make([]types.Image, len(storage.Images))
	copy(copyOfImages, storage.Images)
	storage.Images = []types.Image{}

	for _, n := range list {

		alt := util.GetAttribute(n.Attr, "alt")
		title := util.GetAttribute(n.Attr, "title")
		source := util.GetAttribute(n.Attr, "src")

		id := util.GetOrCreateId(n, alt+title+source)
		if id == "" {
			id, _ = util.RandomString(8)
			n.Attr = removeAttribute(n.Attr, "id")
			n.Attr = append(n.Attr, html.Attribute{Key: "id", Val: id})
		}

		setDataAttributesAttribute(n, attributeAttributes, "src", "path:content."+id+".src")
		setDataAttributesAttribute(n, attributeAttributes, "alt", "path:content."+id+".alt")
		setDataAttributesAttribute(n, attributeAttributes, "title", "path:content."+id+".title")

		storage.Images = append(storage.Images, types.Image{
			Id:     id,
			Alt:    alt,
			Title:  title,
			Source: source,
		})

	}

	for _, image := range copyOfImages {
		foundIndex := -1
		for index, i := range storage.Images {
			if i.Id == image.Id {
				foundIndex = index
				break
			}
		}

		if foundIndex > -1 {
			storage.Images[foundIndex] = image
		}
	}

func prepareTranslationJson(node *html.Node, storage *types.PageData) {
	selector, err := cascadia.Parse("script[data-monster-role=translation]")
	if err != nil {
		return
	}

	list := cascadia.QueryAll(node, selector)
	if list == nil {
		return
	}

	copyOfTranslations := make([]types.Translations, len(storage.Translations))
	copy(copyOfTranslations, storage.Translations)
	storage.Translations = []types.Translations{}

	for _, n := range list {

		id := util.GetAttribute(n.Attr, "id")
		typ := util.GetAttribute(n.Attr, "type")

		n.Attr = removeAttribute(n.Attr, attributeReplace)
		n.Attr = append(n.Attr, html.Attribute{Key: attributeReplace, Val: "path:translations." + id + ".content"})

		content := ""
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			if c.Type == html.TextNode {
				content += c.Data
			}
		}

		content = strings.TrimSpace(content)

		t := make(map[string]any)
		err := json.Unmarshal([]byte(content), &t)
		if err != nil {
			fmt.Println(err)
		}

		storage.Translations = append(storage.Translations, types.Translations{
			Id:        id,
			Type:      typ,
			KeyValues: t,
		})

	}

	for _, translation := range copyOfTranslations {
		foundIndex := -1
		for index, t := range storage.Translations {
			if t.Id == translation.Id {
				foundIndex = index
				break
			}
		}

		if foundIndex > -1 {
			storage.Translations[foundIndex] = translation
		}
	}

func prepareTextNodes(node *html.Node, storage *types.PageData) {

	selector, err := cascadia.Parse("body")
	if err != nil {
		return
	}

	body := cascadia.Query(node, selector)
	if body == nil {
		return
	}

	copyOfTextNodes := make([]types.Text, len(storage.Text))
	copy(copyOfTextNodes, storage.Text)
	storage.Text = []types.Text{}

	runNodes(body, storage)

	for _, text := range copyOfTextNodes {
		foundIndex := -1
		for index, t := range storage.Text {
			if t.Id == text.Id {
				foundIndex = index
				break
			}
		}

		if foundIndex > -1 {
			storage.Text[foundIndex] = text
		}
	}

}

func runNodes(n *html.Node, storage *types.PageData) {
Volker Schukai's avatar
Volker Schukai committed
	nodeList := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		nodeList = append(nodeList, c)
	}

	for _, n := range nodeList {
		checkNodes(n, storage)
		runNodes(n, storage)
	}

}

func checkNodes(n *html.Node, storage *types.PageData) {
	if n.Parent != nil {
		if n.Parent.Type == html.ElementNode {
			if n.Parent.Data == "script" || n.Parent.Data == "style" {
				return
			}
		}
	}

Volker Schukai's avatar
Volker Schukai committed
	if n.Type != html.TextNode {
		return
	}
Volker Schukai's avatar
Volker Schukai committed
	content := strings.TrimSpace(n.Data)
	if content == "" {
		return
	}
Volker Schukai's avatar
Volker Schukai committed
	id, err := util.BuildTextKey(content)
	if err != nil || id == "" {
		id = util.GetNextId()
	}
Volker Schukai's avatar
Volker Schukai committed
	parent := n.Parent
	span := &html.Node{
		Type: html.ElementNode,
		Data: atom.Span.String(),
		Attr: []html.Attribute{
			{
				Key: "id",
				Val: id,
Volker Schukai's avatar
Volker Schukai committed
			{
				Key: "data-replace-self",
				Val: "path:text." + id + ".text",
			},
		},
Volker Schukai's avatar
Volker Schukai committed
	parent.InsertBefore(span, n)
	parent.RemoveChild(n)
Volker Schukai's avatar
Volker Schukai committed
	span.AppendChild(n)

	storage.Text = append(storage.Text, types.Text{
		Id:   id,
		Text: content,
	})
func PrepareHtmlFile(from, to string, storage types.PageDataStorage) (string, error) {
	node, err := util.LoadHtml(from)
	if err != nil {
		return "", err
	var pd *types.PageData
	var ok bool

	p := path.Base(from)

	if pd, ok = storage[p]; !ok {
		pd = types.NewPageData()
		storage[p] = pd
	}

	prepareLanguage(node, pd)
	prepareTitle(node, pd)
	prepareMeta(node, "name", "description", pd)
	prepareMeta(node, "name", "keywords", pd)
	prepareMeta(node, "name", "author", pd)

	prepareImages(node, pd)
	prepareAnchors(node, pd)
	prepareTextNodes(node, pd)

	prepareTranslationJson(node, pd)

Volker Schukai's avatar
Volker Schukai committed
	pd.Export = path.Join(pd.Lang, path.Base(from))
	to = path.Join(to, path.Base(from))
Volker Schukai's avatar
Volker Schukai committed

	return p, util.SaveHtml(to, node)