package template import ( "encoding/json" "fmt" "github.com/andybalholm/cascadia" "gitlab.schukai.com/oss/bob/types" "gitlab.schukai.com/oss/bob/util" "golang.org/x/net/html" "golang.org/x/net/html/atom" "path" "strings" ) const attributePrefix = "data-" const attributeAttributes = "data-attributes" const attributeReplace = "data-replace" func removeAttribute(attrs []html.Attribute, key string) []html.Attribute { var result []html.Attribute for _, attr := range attrs { if attr.Key == key { continue } result = append(result, attr) } return result } //func setDataAttribute(node *html.Node, name, attribute, instruction string) { // node.Attr = removeAttribute(node.Attr, name) // node.Attr = append(node.Attr, html.Attribute{Key: name, Val: attribute + " " + instruction}) //} func setDataAttributesAttribute(node *html.Node, name, attribute, instruction string) { value := util.GetAttribute(node.Attr, attributeAttributes) if value != "" { l := strings.Split(value, ",") m := make(map[string]string) for _, v := range l { v = strings.TrimSpace(v) x := strings.Index(v, " ") if x > 0 { a := v[:x] b := v[x+1:] if a != attribute { m[a] = b } } } value = "" for k, v := range m { if value != "" { value += "," } value += k + " " + v } } node.Attr = removeAttribute(node.Attr, attributeAttributes) if value != "" { value += "," } value += attribute + " " + instruction node.Attr = append(node.Attr, html.Attribute{Key: name, Val: value}) } func prepateMeta(node *html.Node, attrKey, attrValue string, storage *types.PageData) { sel, err := cascadia.Parse("meta[" + attrKey + "=" + attrValue + "]") if err != nil { return } meta := cascadia.Query(node, sel) if meta == nil { return } setDataAttributesAttribute(meta, attributeAttributes, "content", "path:meta."+attrValue) storage.Meta[attrValue] = util.GetAttribute(meta.Attr, "content") } func prepareLanguage(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("html") if err != nil { return } n := cascadia.Query(node, selector) if n == nil { return } setDataAttributesAttribute(n, attributeAttributes, "lang", "path:lang") lang := util.GetAttribute(n.Attr, "lang") if lang == "" { lang = "en" } storage.Lang = lang } func prepareTitle(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("title") if err != nil { return } n := cascadia.Query(node, selector) if n == nil { return } n.Attr = removeAttribute(node.Attr, attributeReplace) n.Attr = append(node.Attr, html.Attribute{Key: attributeReplace, Val: "path:title"}) title := "" for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.TextNode { title += c.Data } } storage.Title = title } func prepareAnchors(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("a") if err != nil { return } list := cascadia.QueryAll(node, selector) if list == nil { return } for _, n := range list { title := util.GetAttribute(n.Attr, "title") hreflang := util.GetAttribute(n.Attr, "hreflang") href := util.GetAttribute(n.Attr, "href") id := util.GetOrCreateId(n, title+hreflang+href) if id == "" { id, _ = util.RandomString(8) n.Attr = removeAttribute(n.Attr, "id") n.Attr = append(n.Attr, html.Attribute{Key: "id", Val: id}) } setDataAttributesAttribute(n, attributeAttributes, "href", "path:anchors."+id+".href") setDataAttributesAttribute(n, attributeAttributes, "title", "path:anchors."+id+".title") setDataAttributesAttribute(n, attributeAttributes, "hreflang", "path:anchors."+id+".hreflang") storage.Anchors = append(storage.Anchors, types.Anchor{ Id: id, Title: title, HrefLang: hreflang, Href: href, }) } } func prepareImages(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("img") if err != nil { return } list := cascadia.QueryAll(node, selector) if list == nil { return } for _, n := range list { alt := util.GetAttribute(n.Attr, "alt") title := util.GetAttribute(n.Attr, "title") source := util.GetAttribute(n.Attr, "src") id := util.GetOrCreateId(n, alt+title+source) if id == "" { id, _ = util.RandomString(8) n.Attr = removeAttribute(n.Attr, "id") n.Attr = append(n.Attr, html.Attribute{Key: "id", Val: id}) } setDataAttributesAttribute(n, attributeAttributes, "src", "path:content."+id+".src") setDataAttributesAttribute(n, attributeAttributes, "alt", "path:content."+id+".alt") setDataAttributesAttribute(n, attributeAttributes, "title", "path:content."+id+".title") storage.Images = append(storage.Images, types.Image{ Id: id, Alt: alt, Title: title, Source: source, }) } } func prepareTranslationJson(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("script[data-monster-role=translation]") if err != nil { return } list := cascadia.QueryAll(node, selector) if list == nil { return } for _, n := range list { id := util.GetAttribute(n.Attr, "id") typ := util.GetAttribute(n.Attr, "type") n.Attr = removeAttribute(n.Attr, attributeReplace) n.Attr = append(n.Attr, html.Attribute{Key: attributeReplace, Val: "path:translations." + id + ".content"}) content := "" for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.TextNode { content += c.Data } } content = strings.TrimSpace(content) t := make(map[string]any) err := json.Unmarshal([]byte(content), &t) if err != nil { fmt.Println(err) } storage.Translations = append(storage.Translations, types.Translations{ Id: id, Type: typ, KeyValues: t, }) } } func prepareTextNodes(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("body") if err != nil { return } body := cascadia.Query(node, selector) if body == nil { return } runNodes(body, storage) } func runNodes(n *html.Node, storage *types.PageData) { nodeList := []*html.Node{} for c := n.FirstChild; c != nil; c = c.NextSibling { nodeList = append(nodeList, c) } for _, n := range nodeList { checkNodes(n, storage) runNodes(n, storage) } } func checkNodes(n *html.Node, storage *types.PageData) { if n.Parent != nil { if n.Parent.Type == html.ElementNode { if n.Parent.Data == "script" || n.Parent.Data == "style" { return } } } if n.Type != html.TextNode { return } content := strings.TrimSpace(n.Data) if content == "" { return } id, err := util.BuildTextKey(content) if err != nil || id == "" { id = util.GetNextId() } parent := n.Parent span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{ { Key: "id", Val: id, }, { Key: "data-replace-self", Val: "path:text." + id + ".text", }, }, } parent.InsertBefore(span, n) parent.RemoveChild(n) span.AppendChild(n) storage.Text = append(storage.Text, types.Text{ Id: id, Text: content, }) } func PrepareHtmlFile(from, to string, storage types.PageDataStorage) error { node, err := util.LoadHtml(from) if err != nil { return err } p := path.Base(from) pd := types.NewPageData() storage[p] = pd prepareLanguage(node, pd) prepareTitle(node, pd) prepateMeta(node, "name", "description", pd) prepateMeta(node, "name", "keywords", pd) prepateMeta(node, "name", "author", pd) prepareImages(node, pd) prepareAnchors(node, pd) prepareTextNodes(node, pd) prepareTranslationJson(node, pd) pd.Export = path.Join(pd.Lang, path.Base(from)) to = path.Join(to, path.Base(from)) return util.SaveHtml(to, node) }