Newer
Older
"gitlab.schukai.com/oss/bob/types"
"gitlab.schukai.com/oss/bob/util"
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"path"
"strings"
)
const attributePrefix = "data-"
const attributeAttributes = "data-attributes"
const attributeReplace = "data-replace"
func removeAttribute(attrs []html.Attribute, key string) []html.Attribute {
var result []html.Attribute
for _, attr := range attrs {
if attr.Key == key {
continue
}
result = append(result, attr)
}
return result
}
//func setDataAttribute(node *html.Node, name, attribute, instruction string) {
// node.Attr = removeAttribute(node.Attr, name)
// node.Attr = append(node.Attr, html.Attribute{Key: name, Val: attribute + " " + instruction})
//}
func setDataAttributesAttribute(node *html.Node, name, attribute, instruction string) {
value := util.GetAttribute(node.Attr, attributeAttributes)
if value != "" {
l := strings.Split(value, ",")
m := make(map[string]string)
for _, v := range l {
v = strings.TrimSpace(v)
x := strings.Index(v, " ")
if x > 0 {
a := v[:x]
b := v[x+1:]
if a != attribute {
m[a] = b
}
}
}
value = ""
for k, v := range m {
if value != "" {
value += ","
}
value += k + " " + v
}
}
node.Attr = removeAttribute(node.Attr, attributeAttributes)
if value != "" {
value += ","
}
value += attribute + " " + instruction
node.Attr = append(node.Attr, html.Attribute{Key: name, Val: value})
}
func prepareMeta(node *html.Node, attrKey, attrValue string, storage *types.PageData) {
if storage.Meta == nil {
storage.Meta = make(map[string]string)
}
if _, ok := storage.Meta[attrValue]; ok {
return
}
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
sel, err := cascadia.Parse("meta[" + attrKey + "=" + attrValue + "]")
if err != nil {
return
}
meta := cascadia.Query(node, sel)
if meta == nil {
return
}
setDataAttributesAttribute(meta, attributeAttributes, "content", "path:meta."+attrValue)
storage.Meta[attrValue] = util.GetAttribute(meta.Attr, "content")
}
func prepareLanguage(node *html.Node, storage *types.PageData) {
selector, err := cascadia.Parse("html")
if err != nil {
return
}
n := cascadia.Query(node, selector)
if n == nil {
return
}
setDataAttributesAttribute(n, attributeAttributes, "lang", "path:lang")
lang := util.GetAttribute(n.Attr, "lang")
if lang == "" {
lang = "en"
}
storage.Lang = lang
}
func prepareTitle(node *html.Node, storage *types.PageData) {
if storage.Title != "" {
return
}
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
selector, err := cascadia.Parse("title")
if err != nil {
return
}
n := cascadia.Query(node, selector)
if n == nil {
return
}
n.Attr = removeAttribute(node.Attr, attributeReplace)
n.Attr = append(node.Attr, html.Attribute{Key: attributeReplace, Val: "path:title"})
title := ""
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
title += c.Data
}
}
storage.Title = title
}
func prepareAnchors(node *html.Node, storage *types.PageData) {
selector, err := cascadia.Parse("a")
if err != nil {
return
}
list := cascadia.QueryAll(node, selector)
if list == nil {
return
}
copyOfAnchors := make([]types.Anchor, len(storage.Anchors))
copy(copyOfAnchors, storage.Anchors)
storage.Anchors = []types.Anchor{}
for _, n := range list {
title := util.GetAttribute(n.Attr, "title")
hreflang := util.GetAttribute(n.Attr, "hreflang")
href := util.GetAttribute(n.Attr, "href")
id := util.GetOrCreateId(n, title+hreflang+href)
if id == "" {
id, _ = util.RandomString(8)
n.Attr = removeAttribute(n.Attr, "id")
n.Attr = append(n.Attr, html.Attribute{Key: "id", Val: id})
}
setDataAttributesAttribute(n, attributeAttributes, "href", "path:anchors."+id+".href")
setDataAttributesAttribute(n, attributeAttributes, "title", "path:anchors."+id+".title")
setDataAttributesAttribute(n, attributeAttributes, "hreflang", "path:anchors."+id+".hreflang")
storage.Anchors = append(storage.Anchors, types.Anchor{
Id: id,
Title: title,
HrefLang: hreflang,
Href: href,
})
}
for _, anchor := range copyOfAnchors {
foundIndex := -1
for index, i := range storage.Images {
if i.Id == anchor.Id {
foundIndex = index
break
}
}
if foundIndex > -1 {
storage.Anchors[foundIndex] = anchor
}
}
}
func prepareImages(node *html.Node, storage *types.PageData) {
selector, err := cascadia.Parse("img")
if err != nil {
return
}
list := cascadia.QueryAll(node, selector)
if list == nil {
return
}
copyOfImages := make([]types.Image, len(storage.Images))
copy(copyOfImages, storage.Images)
storage.Images = []types.Image{}
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
for _, n := range list {
alt := util.GetAttribute(n.Attr, "alt")
title := util.GetAttribute(n.Attr, "title")
source := util.GetAttribute(n.Attr, "src")
id := util.GetOrCreateId(n, alt+title+source)
if id == "" {
id, _ = util.RandomString(8)
n.Attr = removeAttribute(n.Attr, "id")
n.Attr = append(n.Attr, html.Attribute{Key: "id", Val: id})
}
setDataAttributesAttribute(n, attributeAttributes, "src", "path:content."+id+".src")
setDataAttributesAttribute(n, attributeAttributes, "alt", "path:content."+id+".alt")
setDataAttributesAttribute(n, attributeAttributes, "title", "path:content."+id+".title")
storage.Images = append(storage.Images, types.Image{
Id: id,
Alt: alt,
Title: title,
Source: source,
})
}
for _, image := range copyOfImages {
foundIndex := -1
for index, i := range storage.Images {
if i.Id == image.Id {
foundIndex = index
break
}
}
if foundIndex > -1 {
storage.Images[foundIndex] = image
}
}
func prepareTranslationJson(node *html.Node, storage *types.PageData) {
selector, err := cascadia.Parse("script[data-monster-role=translation]")
if err != nil {
return
}
list := cascadia.QueryAll(node, selector)
if list == nil {
return
}
copyOfTranslations := make([]types.Translations, len(storage.Translations))
copy(copyOfTranslations, storage.Translations)
storage.Translations = []types.Translations{}
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
for _, n := range list {
id := util.GetAttribute(n.Attr, "id")
typ := util.GetAttribute(n.Attr, "type")
n.Attr = removeAttribute(n.Attr, attributeReplace)
n.Attr = append(n.Attr, html.Attribute{Key: attributeReplace, Val: "path:translations." + id + ".content"})
content := ""
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.TextNode {
content += c.Data
}
}
content = strings.TrimSpace(content)
t := make(map[string]any)
err := json.Unmarshal([]byte(content), &t)
if err != nil {
fmt.Println(err)
}
storage.Translations = append(storage.Translations, types.Translations{
Id: id,
Type: typ,
KeyValues: t,
})
}
for _, translation := range copyOfTranslations {
foundIndex := -1
for index, t := range storage.Translations {
if t.Id == translation.Id {
foundIndex = index
break
}
}
if foundIndex > -1 {
storage.Translations[foundIndex] = translation
}
}
func prepareTextNodes(node *html.Node, storage *types.PageData) {
selector, err := cascadia.Parse("body")
if err != nil {
return
}
body := cascadia.Query(node, selector)
if body == nil {
return
}
copyOfTextNodes := make([]types.Text, len(storage.Text))
copy(copyOfTextNodes, storage.Text)
storage.Text = []types.Text{}
for _, text := range copyOfTextNodes {
foundIndex := -1
for index, t := range storage.Text {
if t.Id == text.Id {
foundIndex = index
break
}
}
if foundIndex > -1 {
storage.Text[foundIndex] = text
}
}
}
func runNodes(n *html.Node, storage *types.PageData) {
nodeList := []*html.Node{}
for c := n.FirstChild; c != nil; c = c.NextSibling {
nodeList = append(nodeList, c)
}
for _, n := range nodeList {
checkNodes(n, storage)
runNodes(n, storage)
}
}
func checkNodes(n *html.Node, storage *types.PageData) {
if n.Parent != nil {
if n.Parent.Type == html.ElementNode {
if n.Parent.Data == "script" || n.Parent.Data == "style" {
return
}
}
}
content := strings.TrimSpace(n.Data)
if content == "" {
return
}
id, err := util.BuildTextKey(content)
if err != nil || id == "" {
id = util.GetNextId()
}
parent := n.Parent
span := &html.Node{
Type: html.ElementNode,
Data: atom.Span.String(),
Attr: []html.Attribute{
{
Key: "id",
Val: id,
{
Key: "data-replace-self",
Val: "path:text." + id + ".text",
},
},
parent.InsertBefore(span, n)
parent.RemoveChild(n)
span.AppendChild(n)
storage.Text = append(storage.Text, types.Text{
Id: id,
Text: content,
})
func PrepareHtmlFile(from, to string, storage types.PageDataStorage) (string, error) {
node, err := util.LoadHtml(from)
if err != nil {
var pd *types.PageData
var ok bool
if pd, ok = storage[p]; !ok {
pd = types.NewPageData()
storage[p] = pd
}
prepareLanguage(node, pd)
prepareTitle(node, pd)
prepareMeta(node, "name", "description", pd)
prepareMeta(node, "name", "keywords", pd)
prepareMeta(node, "name", "author", pd)
prepareImages(node, pd)
prepareAnchors(node, pd)
prepareTextNodes(node, pd)
to = path.Join(to, path.Base(from))