// Copyright 2023 schukai GmbH // SPDX-License-Identifier: AGPL-3.0 package document import ( "bytes" "crypto/md5" "encoding/base64" "fmt" embed "github.com/13rac1/goldmark-embed" "github.com/PuerkitoBio/goquery" "github.com/andybalholm/cascadia" "github.com/gomarkdown/markdown" "github.com/gomarkdown/markdown/ast" markdownHTML "github.com/gomarkdown/markdown/html" "github.com/gomarkdown/markdown/parser" "github.com/gosimple/slug" "github.com/mattn/go-shellwords" "github.com/tdewolff/minify/v2" minHTML "github.com/tdewolff/minify/v2/html" "github.com/yuin/goldmark" "github.com/yuin/goldmark/extension" goldmarkParser "github.com/yuin/goldmark/parser" goldmarkHTML "github.com/yuin/goldmark/renderer/html" "gitlab.schukai.com/oss/utilities/documentation-manager/environment" "gitlab.schukai.com/oss/utilities/documentation-manager/translations" "gitlab.schukai.com/oss/utilities/documentation-manager/utils" "golang.org/x/net/html" "golang.org/x/net/html/atom" "io" "io/ioutil" "log" "math/rand" "os" "path" "path/filepath" "regexp" "strconv" "strings" "text/template" "time" ) func init() { rand.Seed(time.Now().UnixNano()) } const defaultSet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" type BuildHtmlEnvironment struct { SourcePath string DateFormat string OutputPath string Verbose bool SinglePage bool Templates struct { HTML string Components map[string]string } } func (t *BuildHtmlEnvironment) GetComponentsTemplates() map[string]string { return t.Templates.Components } type HtmlDocument struct { ID string Content string *SourceFile } type DocumentNode struct { Level int ID string Title string } type SinglePageHtmlDataset struct { Documents []HtmlDocument CreatedFormat string // date format Meta struct { Title string ShortTitle string Description string Keywords string Language string } Version string DocumentTree *Tree[DocumentNode] TOC []DocumentNode } func BuildHTML(env BuildHtmlEnvironment) error { if env.SinglePage { return renderSinglePageHtml(env) } return renderMultiPageHtml(env) } func getSingleHtmlOutputFile(env BuildHtmlEnvironment) string { output := env.OutputPath if env.DateFormat != "" { dateFormat = env.DateFormat } if !filepath.IsAbs(output) { pwd := os.Getenv("PWD") output = path.Clean(path.Join(pwd, output)) } if output == "" { environment.ExitWithError(2, "the output option must be specified") } fileInfo, err := os.Stat(output) if err != nil { if !os.IsNotExist(err) { environment.ExitWithError(2, "cannot stat output file", err.Error()) } } else { if fileInfo.IsDir() { output = path.Join(output, "index.html") } } return output } func renderSinglePageHtml(env BuildHtmlEnvironment) error { output := getSingleHtmlOutputFile(env) t := environment.ReadTemplate(env.Templates.HTML) doc, err := html.Parse(strings.NewReader(t)) checkError(err) extractTemplate(&env, doc, "#doc-api-request") stringBuilder := &strings.Builder{} err = html.Render(stringBuilder, doc) p, err := template.New("HTML").Funcs(getFuncMap(&env)).Parse(stringBuilder.String()) checkError(err) d, err := NewHTMLDataset(env) checkError(err) buf := new(bytes.Buffer) err = p.Execute(buf, d) checkError(err) m := minify.New() m.AddFunc("text/html", minHTML.Minify) out := new(bytes.Buffer) err = m.Minify("text/html", out, buf) checkError(err) err = os.WriteFile(output, out.Bytes(), 0644) checkError(err) return nil } func extractTemplate(env *BuildHtmlEnvironment, doc *html.Node, id string) { sel, err := cascadia.Parse(id) checkError(err) node := cascadia.Query(doc, sel) if node == nil { return } node.Parent.RemoveChild(node) container := html.Node{ Type: html.ElementNode, Data: "div", DataAtom: atom.Div, } var nodeList []*html.Node fn := node.FirstChild for fn != nil { nodeList = append(nodeList, fn) fn = fn.NextSibling } for _, n := range nodeList { n.Parent.RemoveChild(n) container.AppendChild(n) } stringBuilderTemplate := &strings.Builder{} err = html.Render(stringBuilderTemplate, &container) checkError(err) if env.Templates.Components == nil { env.Templates.Components = make(map[string]string) } env.Templates.Components[id] = stringBuilderTemplate.String() } func renderMultiPageHtml(env BuildHtmlEnvironment) error { environment.ExitWithError(2, "not implemented") return nil } func buildTree(body string) *Tree[DocumentNode] { buf := new(bytes.Buffer) buf.Write([]byte(body)) doc, err := goquery.NewDocumentFromReader(buf) if err != nil { log.Fatal(err) } obj := newTree[DocumentNode]() ptr := obj doc.Find("h1,h2,h3,h4,h5,h6").Each(func(k int, s *goquery.Selection) { e := s.Get(0) t := []rune(e.DataAtom.String()) l := t[1:len(t)] i, err := strconv.Atoi(string(l)) checkError(err) currentLevel := ptr.getLevel() aID, found := s.Attr("id") if !found { if title, err := s.Html(); err == nil { aID = slug.Make(title) } if aID == "" { aID = "" + e.DataAtom.String() + "-" + randomID() } } payload := DocumentNode{ Level: i, ID: aID, Title: s.Text(), } if currentLevel == i { ptr = ptr.appendAndGet() ptr.setPayload(payload) return } if currentLevel < i { ptr = ptr.down(i) ptr.setPayload(payload) return } ptr = ptr.up(i) ptr = ptr.appendAndGet() ptr.setPayload(payload) }) return obj } func randomID() string { var chars = []rune(defaultSet) s := make([]rune, 5) for i := range s { s[i] = chars[rand.Intn(len(chars))] } return string(s) } func getThunderClientAPI(env GetComponentsTemplatesInterface, content string) (string, map[string]string) { m := map[string]string{} regEx := regexp.MustCompile(`\{%\s*thunderClientAPI\s+(?P<args>.*)\%\}`) matches := regEx.FindAllStringSubmatch(content, -1) if matches == nil { return content, m } for _, match := range matches { if len(match) < 2 { continue } fullMatch := match[0] args, err := shellwords.Parse(match[1]) checkError(err) if len(args) != 2 { environment.ExitWithError(2, "thunderClientAPI requires two arguments") } path := args[0] id := args[1] key := randomID() m[key] = embedThunderClientAPI(env, path, id) content = strings.Replace(content, fullMatch, key, -1) } return content, m } func getHtmlImages(content string, absolute string) (string, map[string]string) { regEx := regexp.MustCompile(`(?P<match>\!\[(?P<label>[^]]*)\]\((?P<path>[^)]*)\))`) matches := regEx.FindAllStringSubmatch(content, -1) if matches == nil { return content, nil } m := map[string]string{} for _, match := range matches { result := make(map[string]string) for i, name := range regEx.SubexpNames() { if i != 0 && name != "" { result[name] = match[i] } } if utils.IsUrl(result["path"]) { continue } if filepath.IsAbs(result["path"]) { continue } p := filepath.Join(absolute, result["path"]) path := path.Clean(p) fc, err := ioutil.ReadFile(path) if err != nil { continue } data := "data:image/jpeg;base64," + base64.StdEncoding.EncodeToString(fc) hash := fmt.Sprintf("%x", md5.Sum([]byte(data))) m[hash] = data content = strings.Replace(content, result["match"], "!["+result["label"]+"]("+hash+")", -1) } return content, m } func replaceThunderClientAPI(content string, boxMap map[string]string) string { for k, v := range boxMap { content = strings.Replace(content, k, v, -1) } return content } func replaceImages(content string, images map[string]string) string { for k, v := range images { content = strings.Replace(content, k, v, -1) } return content } func NewHTMLDataset(env BuildHtmlEnvironment) (*SinglePageHtmlDataset, error) { files, err := getFiles(&env, env.SourcePath) if err != nil { return nil, err } mapFiles, keys := buildFileMap(files) d := &SinglePageHtmlDataset{} d.Meta.Language = environment.State.GetHTMLMetaLanguage("") d.Meta.Title = environment.State.GetHTMLMetaTitle("") d.Meta.ShortTitle = environment.State.GetHTMLMetaShortTitle("") d.Meta.Description = environment.State.GetHTMLMetaDescription("") d.Meta.Keywords = environment.State.GetHTMLMetaKeywords("") d.Version = environment.State.GetHTMLVersion("") docs := []string{} for _, key := range keys { text := mapFiles[key].textMeta.text text, s1Map := utils.MaskCodeBlocks(text, mapFiles[key].relSourcePath, 3) text, s2Map := utils.MaskCodeBlocks(text, mapFiles[key].relSourcePath, 1) text = convertHeadlines(text, mapFiles[key].level, mapFiles[key].textMeta.meta.Level) text, boxMap := convertAwesomeBoxesToHTML(text) text, imgMap := getHtmlImages(text, mapFiles[key].baseDir) text, thunderApiMap := getThunderClientAPI(&env, text) text = replaceRelativeLinksToHTML(text, mapFiles[key], mapFiles) text = utils.InsertCodeBlocks(text, s2Map) text = utils.InsertCodeBlocks(text, s1Map) text = createHtmlFromMarkdown(text) text = replaceImages(text, imgMap) text = replaceAwesomeBoxes(text, boxMap) text = replaceThunderClientAPI(text, thunderApiMap) id := mapFiles[key].textMeta.meta.ID if id == "" { id = mapFiles[key].hash } doc := HtmlDocument{ ID: id, Content: text, } doc.SourceFile = mapFiles[key] d.Documents = append(d.Documents, doc) docs = append(docs, strings.TrimSpace(doc.Content)) } d.DocumentTree = buildTree(strings.Join(docs, "\n")) d.TOC = []DocumentNode{} d.DocumentTree.recursiveIterate(func(t *Tree[DocumentNode]) { if t.getLevel() == 0 { return } d.TOC = append(d.TOC, t.Payload) }) format := environment.State.GetDocumentDateFormat("") now := time.Now() d.CreatedFormat = now.Format(format) if env.Verbose { fmt.Println(d.Documents) } return d, nil } func convertAwesomeBoxesToHTML(content string) (string, map[string]string) { regEx := regexp.MustCompile(`(?m)(?P<matches>!!!\s*(?P<type>[^\s]+)\s+?(?P<title>[^\n]*)\n(?P<lines>(?P<lastline>[[:blank:]]+[^\n]+\n)+))`) matches := regEx.FindAllStringSubmatch(content, -1) if matches == nil { return content, nil } s := map[string]string{} for _, match := range matches { result := make(map[string]string) for i, name := range regEx.SubexpNames() { if i != 0 && name != "" { result[name] = match[i] } } boxtype := "note" switch { case utils.Contains([]string{"notebox", "note", "info"}, result["type"]): boxtype = "primary" case utils.Contains([]string{"tipbox", "tip", "hint"}, result["type"]): boxtype = "secondary" case utils.Contains([]string{"warningbox", "warning", "warn"}, result["type"]): boxtype = "warning" case utils.Contains([]string{"cautionbox", "caution", "danger"}, result["type"]): boxtype = "danger" case utils.Contains([]string{"importantbox", "important"}, result["type"]): boxtype = "danger" } c := "<div class=\"alert alert-" + boxtype + "\" role=\"" + boxtype + "\">" if result["title"] != "" { c += "<strong class=\"alert-heading\">" + utils.TrimQuotes(result["title"]) + "</strong><br>" } lines := result["lines"] c += lines c += "</div>" hash := fmt.Sprintf("box%x", md5.Sum([]byte(c))) s[hash] = c content = strings.Replace(content, result["matches"], "\n"+hash+"\n", 1) } return content, s } func replaceAwesomeBoxes(content string, boxMap map[string]string) string { for k, v := range boxMap { content = strings.Replace(content, k, v, -1) } return content } func replaceRelativeLinksToHTML(content string, f *SourceFile, fileMap SourceFileMap) string { label := "link_" + f.hash content = "\n<div id=\"" + label + "\"></div>\n\n" + strings.TrimSpace(content) + "\n\n" regEx := regexp.MustCompile(`(?:^|[^!])(?P<match>\[(?P<label>[^]]*)\]\((?P<path>[^)]*)\))`) matches := regEx.FindAllStringSubmatch(content, -1) if matches == nil { return content } for _, match := range matches { result := make(map[string]string) for i, name := range regEx.SubexpNames() { if i != 0 && name != "" { result[name] = match[i] } } if filepath.IsAbs(result["path"]) { continue } if utils.IsUrl(result["path"]) { continue } d := filepath.Dir(f.relSourcePath) p := filepath.Join(d, result["path"]) p = strings.Split(p, "#")[0] ext := filepath.Ext(p) if ext == "" { environment.State.AddWarning(translations.T.Sprintf("No extension, of the link %s, in the file %s is not supported.", p, f.absSourcePath)) continue } if ext != ".md" && ext != ".markdown" { environment.State.AddWarning(translations.T.Sprintf("The extension %s, of the link %s, in the file %s is not supported.", ext, p, f.absSourcePath)) continue } s := fileMap.findByRelativePath(p) if s == nil { environment.State.AddWarning(translations.T.Sprintf("relative path %s, in file %s, cannot be resolved", result["path"], f.absSourcePath)) continue } replace := "<a href=\"#link_" + s.hash + "\">" + result["label"] + "</a>" content = strings.Replace(content, result["match"], replace, -1) } return content } func renderHook(w io.Writer, node ast.Node, entering bool) (ast.WalkStatus, bool) { switch node.(type) { case *ast.HTMLSpan: // allow the html to be escaped return ast.GoToNext, true case *ast.HTMLBlock: // allow the html to be escaped return ast.GoToNext, true } return ast.GoToNext, false } func createHtmlFromMarkdown(text string) string { md := goldmark.New( goldmark.WithExtensions( extension.GFM, extension.DefinitionList, extension.Footnote, extension.Typographer, extension.Linkify, extension.Table, extension.Strikethrough, embed.New(), extension.TaskList), goldmark.WithParserOptions( goldmarkParser.WithAutoHeadingID(), goldmarkParser.WithAttribute(), goldmarkParser.WithHeadingAttribute(), ), goldmark.WithRendererOptions( goldmarkHTML.WithUnsafe(), ), ) var buf bytes.Buffer err := md.Convert([]byte(text), &buf) checkError(err) return buf.String() } func c2reateHtmlFromMarkdown(text string) string { htmlFlags := markdownHTML.CommonFlags | markdownHTML.HrefTargetBlank | markdownHTML.SkipHTML opts := markdownHTML.RendererOptions{ AbsolutePrefix: "", FootnoteAnchorPrefix: "", FootnoteReturnLinkContents: "", CitationFormatString: "", HeadingIDPrefix: "rel-", HeadingIDSuffix: "", Title: "", CSS: "", Icon: "", Head: nil, Flags: htmlFlags, RenderNodeHook: renderHook, Comments: nil, Generator: "", } renderer := markdownHTML.NewRenderer(opts) /** CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | Autolink | Strikethrough | SpaceHeadings | HeadingIDs | BackslashLineBreak | DefinitionLists | MathJax NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words Tables // Parse tables FencedCode // Parse fenced code blocks Autolink // Detect embedded URLs that are not explicitly marked Strikethrough // Strikethrough text using ~~test~~ LaxHTMLBlocks // Loosen up HTML block parsing rules SpaceHeadings // Be strict about prefix heading rules HardLineBreak // Translate newlines into line breaks NonBlockingSpace // Translate backspace spaces into line non-blocking spaces TabSizeEight // Expand tabs to eight spaces instead of four Footnotes // Pandoc-style footnotes NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block HeadingIDs // specify heading IDs with {#id} Titleblock // Titleblock ala pandoc AutoHeadingIDs // Create the heading ID from the text BackslashLineBreak // Translate trailing backslashes into line breaks DefinitionLists // Parse definition lists MathJax // Parse MathJax OrderedListStart // Keep track of the first number used when starting an ordered list. Attributes // Block Attributes SuperSubscript // Super- and subscript support: 2^10^, H~2~O. EmptyLinesBreakList // 2 empty lines break out of list Includes // Support including other files. Mmark // Support Mmark syntax, see https://mmark.nl/syntax */ extensions := parser.CommonExtensions | parser.AutoHeadingIDs parser := parser.NewWithExtensions(extensions) bytes := []byte(text) html := markdown.ToHTML(bytes, parser, renderer) return string(html) }