package template import ( "encoding/json" "fmt" "github.com/andybalholm/cascadia" "gitlab.schukai.com/oss/bob/constants" "gitlab.schukai.com/oss/bob/types" "gitlab.schukai.com/oss/bob/util" "golang.org/x/net/html" "golang.org/x/net/html/atom" "path" "strings" ) const attributePrefix = "data-" const attributeAttributes = "data-attributes" const attributeReplace = "data-replace" func removeAttribute(attrs []html.Attribute, key string) []html.Attribute { var result []html.Attribute for _, attr := range attrs { if attr.Key == key { continue } result = append(result, attr) } return result } //func setDataAttribute(node *html.Node, name, attribute, instruction string) { // node.Attr = removeAttribute(node.Attr, name) // node.Attr = append(node.Attr, html.Attribute{Key: name, Val: attribute + " " + instruction}) //} func setDataAttributesAttribute(node *html.Node, name, attribute, instruction string) { value := util.GetAttribute(node.Attr, attributeAttributes) if value != "" { l := strings.Split(value, ",") m := make(map[string]string) for _, v := range l { v = strings.TrimSpace(v) x := strings.Index(v, " ") if x > 0 { a := v[:x] b := v[x+1:] if a != attribute { m[a] = b } } } value = "" for k, v := range m { if value != "" { value += "," } value += k + " " + v } } node.Attr = removeAttribute(node.Attr, attributeAttributes) if value != "" { value += "," } value += attribute + " " + instruction node.Attr = append(node.Attr, html.Attribute{Key: name, Val: value}) } func prepareMeta(node *html.Node, attrKey, attrValue string, storage *types.PageData) { if storage.Meta == nil { storage.Meta = make(map[string]string) } if _, ok := storage.Meta[attrValue]; ok { return } sel, err := cascadia.Parse("meta[" + attrKey + "=" + attrValue + "]") if err != nil { return } meta := cascadia.Query(node, sel) if meta == nil { return } setDataAttributesAttribute(meta, attributeAttributes, "content", "path:meta."+attrValue) storage.Meta[attrValue] = util.GetAttribute(meta.Attr, "content") } func prepareLanguage(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("html") if err != nil { return } n := cascadia.Query(node, selector) if n == nil { return } setDataAttributesAttribute(n, attributeAttributes, "lang", "path:lang") lang := util.GetAttribute(n.Attr, "lang") if lang == "" { lang = "en" } storage.Lang = lang } func prepareTitle(node *html.Node, storage *types.PageData) { if storage.Title != "" { return } selector, err := cascadia.Parse("title") if err != nil { return } n := cascadia.Query(node, selector) if n == nil { return } n.Attr = removeAttribute(node.Attr, attributeReplace) n.Attr = append(node.Attr, html.Attribute{Key: attributeReplace, Val: "path:title"}) title := "" for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.TextNode { title += c.Data } } storage.Title = title } func prepareAnchors(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("a") if err != nil { return } list := cascadia.QueryAll(node, selector) if list == nil { return } copyOfAnchors := make([]types.Anchor, len(storage.Anchors)) copy(copyOfAnchors, storage.Anchors) storage.Anchors = []types.Anchor{} for _, n := range list { title := util.GetAttribute(n.Attr, "title") hreflang := util.GetAttribute(n.Attr, "hreflang") href := util.GetAttribute(n.Attr, "href") id := util.GetOrCreateReference(n, title+hreflang+href) if id == "" { id, _ = util.RandomString(8) n.Attr = removeAttribute(n.Attr, constants.DataBobReferenceAttributeKey) n.Attr = append(n.Attr, html.Attribute{Key: constants.DataBobReferenceAttributeKey, Val: id}) } setDataAttributesAttribute(n, attributeAttributes, "href", "path:anchors."+id+".href") setDataAttributesAttribute(n, attributeAttributes, "title", "path:anchors."+id+".title") setDataAttributesAttribute(n, attributeAttributes, "hreflang", "path:anchors."+id+".hreflang") storage.Anchors = append(storage.Anchors, types.Anchor{ Id: id, Title: title, HrefLang: hreflang, Href: href, }) } for _, anchor := range copyOfAnchors { foundIndex := -1 for index, i := range storage.Images { if i.Id == anchor.Id { foundIndex = index break } } if foundIndex > -1 { storage.Anchors[foundIndex] = anchor } } } func prepareImages(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("img") if err != nil { return } list := cascadia.QueryAll(node, selector) if list == nil { return } copyOfImages := make([]types.Image, len(storage.Images)) copy(copyOfImages, storage.Images) storage.Images = []types.Image{} for _, n := range list { alt := util.GetAttribute(n.Attr, "alt") title := util.GetAttribute(n.Attr, "title") source := util.GetAttribute(n.Attr, "src") id := util.GetOrCreateReference(n, alt+title+source) if id == "" { id, _ = util.RandomString(8) n.Attr = removeAttribute(n.Attr, constants.DataBobReferenceAttributeKey) n.Attr = append(n.Attr, html.Attribute{Key: constants.DataBobReferenceAttributeKey, Val: id}) } setDataAttributesAttribute(n, attributeAttributes, "src", "path:content."+id+".src") setDataAttributesAttribute(n, attributeAttributes, "alt", "path:content."+id+".alt") setDataAttributesAttribute(n, attributeAttributes, "title", "path:content."+id+".title") storage.Images = append(storage.Images, types.Image{ Id: id, Alt: alt, Title: title, Source: source, }) } for _, image := range copyOfImages { foundIndex := -1 for index, i := range storage.Images { if i.Id == image.Id { foundIndex = index break } } if foundIndex > -1 { storage.Images[foundIndex] = image } } } func prepareTranslationJson(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("script[data-monster-role=translation]") if err != nil { return } list := cascadia.QueryAll(node, selector) if list == nil { return } copyOfTranslations := make([]types.Translations, len(storage.Translations)) copy(copyOfTranslations, storage.Translations) storage.Translations = []types.Translations{} for _, n := range list { id := util.GetAttribute(n.Attr, constants.DataBobReferenceAttributeKey) typ := util.GetAttribute(n.Attr, "type") n.Attr = removeAttribute(n.Attr, attributeReplace) n.Attr = append(n.Attr, html.Attribute{Key: attributeReplace, Val: "path:translations." + id + ".content"}) content := "" for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.TextNode { content += c.Data } } content = strings.TrimSpace(content) t := make(map[string]any) err := json.Unmarshal([]byte(content), &t) if err != nil { fmt.Println(err) } storage.Translations = append(storage.Translations, types.Translations{ Id: id, Type: typ, KeyValues: t, }) } for _, translation := range copyOfTranslations { foundIndex := -1 for index, t := range storage.Translations { if t.Id == translation.Id { foundIndex = index break } } if foundIndex > -1 { storage.Translations[foundIndex] = translation } } } func prepareTextNodes(node *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("body") if err != nil { return } body := cascadia.Query(node, selector) if body == nil { return } copyOfTextNodes := make([]types.Text, len(storage.Text)) copy(copyOfTextNodes, storage.Text) storage.Text = []types.Text{} runNodes(body, storage) for _, text := range copyOfTextNodes { foundIndex := -1 for index, t := range storage.Text { if t.Id == text.Id { foundIndex = index break } } if foundIndex > -1 { storage.Text[foundIndex] = text } } } func runNodes(n *html.Node, storage *types.PageData) { nodeList := []*html.Node{} for c := n.FirstChild; c != nil; c = c.NextSibling { nodeList = append(nodeList, c) } for _, n := range nodeList { checkNodes(n, storage) runNodes(n, storage) } } func handleTextNode(n *html.Node, storage *types.PageData) { content := strings.TrimSpace(n.Data) if content == "" { return } id, err := util.BuildTextKey(content) if err != nil || id == "" { id = util.GetNextId() } parent := n.Parent span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{ { Key: constants.DataBobReferenceAttributeKey, Val: id, }, { Key: "data-replace-self", Val: "path:text." + id + ".text", }, }, } parent.InsertBefore(span, n) parent.RemoveChild(n) span.AppendChild(n) storage.Text = append(storage.Text, types.Text{ Id: id, Text: content, }) } func checkNodes(n *html.Node, storage *types.PageData) { if n.Parent != nil { if n.Parent.Type == html.ElementNode { if n.Parent.Data == "script" || n.Parent.Data == "style" { return } } } if n.Type == html.TextNode { handleTextNode(n, storage) } else if n.Type == html.ElementNode { switch n.Data { case "monster-datatable": checkMonsterDatatableHead(n, storage) } } } func checkMonsterDatatableHead(n *html.Node, storage *types.PageData) { selector, err := cascadia.Parse("[data-monster-head]") if err != nil { return } list := cascadia.QueryAll(n, selector) if list == nil { return } for _, div := range list { head := util.GetAttribute(div.Attr, "data-monster-head") id := util.GetAttribute(div.Attr, constants.DataBobReferenceAttributeKey) if id == "" { headID := util.BuildTextKey(head) if headID == "" { id = util.GetNextId() } else { id = headID } div.Attr = append(div.Attr, html.Attribute{Key: constants.DataBobReferenceAttributeKey, Val: id}) } div.Attr = removeAttribute(div.Attr, "data-attributes") div.Attr = append(div.Attr, html.Attribute{Key: "data-attributes", Val: "data-monster-head path:text." + id + ".text"}) storage.Text = append(storage.Text, types.Text{ Id: id, Text: head, }) } } func PrepareHtmlFile(from, to string, storage types.PageDataStorage) (string, error) { node, err := util.LoadHtml(from) if err != nil { return "", err } var pd *types.PageData var ok bool p := path.Base(from) if pd, ok = storage[p]; !ok { pd = types.NewPageData() storage[p] = pd } prepareLanguage(node, pd) prepareTitle(node, pd) prepareMeta(node, "name", "description", pd) prepareMeta(node, "name", "keywords", pd) prepareMeta(node, "name", "author", pd) prepareImages(node, pd) prepareAnchors(node, pd) prepareTextNodes(node, pd) prepareTranslationJson(node, pd) pd.Export = path.Join(pd.Lang, path.Base(from)) to = path.Join(to, path.Base(from)) return p, util.SaveHtml(to, node) }