package html import ( "fmt" "github.com/andybalholm/cascadia" "gitlab.schukai.com/oss/bob/types" "gitlab.schukai.com/oss/libraries/go/markup/html/engine" "golang.org/x/net/html" "gopkg.in/yaml.v3" "os" "path/filepath" "strings" ) type Specification struct { Selector string } func readHTML(p string) (*html.Node, error) { htmlFile, err := os.Open(p) if err != nil { return nil, err } defer htmlFile.Close() return html.Parse(htmlFile) } type StringNodeMap map[string]*html.Node type StringListNodeMap map[string][]*html.Node func getSourceFileMap(specification types.SyncSpecification) (StringNodeMap, error) { sourceFiles := make(StringNodeMap) for _, r := range specification.Sync { source := r.Source absSource, err := filepath.Abs(source.Path) if err != nil { return nil, err } // if already read, skip if _, ok := sourceFiles[absSource]; ok { continue } if sourceFiles[absSource], err = readHTML(absSource); err != nil { return nil, err } } return sourceFiles, nil } func getDestinationFiles(specification types.SyncSpecification) (StringNodeMap, error) { fileMap := make(StringNodeMap) for _, r := range specification.Sync { for _, d := range r.Destination.Path { d, err := filepath.Abs(d) if err != nil { return nil, err } fileInfo, err := os.Stat(d) if err != nil { return nil, err } if fileInfo.IsDir() { if err = filepath.Walk(d, func(pp string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { return nil } ext := filepath.Ext(pp) if ext != ".html" { return nil } if _, ok := fileMap[pp]; ok { return nil } var dd *html.Node if dd, err = readHTML(pp); err != nil { return err } fileMap[pp] = dd return nil }); err != nil { return nil, err } } else if filepath.Ext(d) == ".html" { if _, ok := fileMap[d]; ok { continue } var dd *html.Node if dd, err = readHTML(d); err != nil { return nil, err } fileMap[d] = dd } } } return fileMap, nil } func getAllDestinationFilesWithoutExcludes(destination types.Destination) ([]string, error) { var files []string for _, p := range destination.Path { p, err := filepath.Abs(p) if err != nil { return nil, err } if err = filepath.Walk(p, func(pp string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() { return nil } ext := filepath.Ext(pp) if ext != ".html" { return nil } exclude, err := checkExcludes(destination.Exclude, pp) if err != nil { return err } if exclude { return nil } files = append(files, pp) return nil }); err != nil { return nil, err } } return files, nil } func SyncHtml(p string) error { currentDir, _ := os.Getwd() defer func() { _ = os.Chdir(currentDir) }() err, specification, err2 := readSpec(p) if err2 != nil { return err2 } var sourceFiles StringNodeMap sourceFiles, err = getSourceFileMap(specification) if err != nil { return err } var destinationFiles StringNodeMap destinationFiles, err = getDestinationFiles(specification) if err != nil { return err } changedFiles := make(map[string]struct{}) for _, r := range specification.Sync { source := r.Source absSource, err := filepath.Abs(source.Path) if err != nil { return err } sourceNode, ok := sourceFiles[absSource] if !ok { return fmt.Errorf("source file isn't found: %s", absSource) } sourceSelector := source.Selector query, err := cascadia.Compile(sourceSelector) if err != nil { return err } sourceNode = query.MatchFirst(sourceNode) if sourceNode == nil { return fmt.Errorf("source selector not found: %s in %s", sourceSelector, absSource) } list, err := getAllDestinationFilesWithoutExcludes(r.Destination) if err != nil { return err } for _, d := range list { destinationSelector := r.Destination.Selector if destinationSelector == "" { destinationSelector = sourceSelector } keepMap := make(StringListNodeMap) for _, n := range r.Destination.Keep { q, err := cascadia.Compile(n) if err != nil { return err } kNode := q.MatchAll(destinationFiles[d]) if kNode == nil { fmt.Println("keep node %s not found in %s", n, d) continue } for _, k := range kNode { keepMap[n] = append(keepMap[n], engine.CloneNode(k)) } } query, err := cascadia.Compile(destinationSelector) if err != nil { return err } destinationData := query.MatchFirst(destinationFiles[d]) if destinationData == nil { return fmt.Errorf("could not find destination selector %s in %s", destinationSelector, d) } n := engine.CloneNode(sourceNode) destinationParent := destinationData.Parent if destinationParent == nil { return fmt.Errorf("destination parent is nil") } content := strings.Builder{} _ = html.Render(&content, n) cc := content.String() _ = cc destinationParent.InsertBefore(n, destinationData) _ = html.Render(&content, n) cc = content.String() destinationParent.RemoveChild(destinationData) _ = html.Render(&content, n) cc = content.String() for sel, k := range keepMap { cas, err := cascadia.Compile(sel) if err != nil { return err } x := cas.MatchAll(destinationFiles[d]) if x == nil { for _, kk := range k { ckk := engine.CloneNode(kk) query.MatchFirst(destinationParent).AppendChild(ckk) } continue } for _, n1 := range x { for _, kk := range k { // node already removed, for example, by a previous keep if n1.Parent == nil { continue } n1.Parent.InsertBefore(engine.CloneNode(kk), n1) n1.Parent.RemoveChild(n1) } } } changedFiles[d] = struct{}{} } } for d, _ := range changedFiles { fp, err := os.Create(d) if err != nil { return err } htmlContent := destinationFiles[d] cleanHTML(htmlContent) err = html.Render(fp, htmlContent) err2 := fp.Close() if err2 != nil { return err2 } if err != nil { return err } } return nil } // cleanHTML removes trailing newlines before </body> func cleanHTML(n *html.Node) { if n.Type == html.ElementNode && n.Data == "body" { // Remove text nodes containing only whitespace at the end of the body for n.LastChild != nil && isWhitespaceNode(n.LastChild) { n.RemoveChild(n.LastChild) } } // Recursively clean child nodes for c := n.FirstChild; c != nil; c = c.NextSibling { cleanHTML(c) } } // isWhitespaceNode checks if a node is a text node containing only whitespace func isWhitespaceNode(n *html.Node) bool { return n.Type == html.TextNode && strings.TrimSpace(n.Data) == "" } func readSpec(p string) (error, types.SyncSpecification, error) { content, err := os.ReadFile(p) if err != nil { return nil, types.SyncSpecification{}, err } if err := os.Chdir(filepath.Dir(p)); err != nil { return nil, types.SyncSpecification{}, err } specification := types.SyncSpecification{} if err := yaml.Unmarshal(content, &specification); err != nil { return nil, types.SyncSpecification{}, err } return err, specification, nil } func checkExcludes(exclude []string, d string) (bool, error) { for _, e := range exclude { e, err := filepath.Abs(e) if err != nil { return false, err } if e == "" { continue } if e == d { return true, nil } fileInfo, err := os.Stat(e) if err != nil { return false, err } if fileInfo.IsDir() { if strings.HasPrefix(d, e) { return true, nil } } if r, err := filepath.Match(e, d); err != nil { return false, err } else if r { return true, nil } } return false, nil }