of the new doc overrides the base <title> unless the base // contains {{content}}, in which case the new title is substituted // - <style> elements from the new doc are placed after any base <style> // - all other elements are appended to the base // // <body> // - the <header> and <footer> and <main> elemnts are merged. // in the base document, these elements may appear abitrarily nested inside // wrapper elements; mergehtml will reproduce that structure in the merged document. // if there are multiples, only the first will be used for merging. // if they do not appear in the base doc, they will be appended to the body (header: prepended). // in the source document, these elements will be removed from any // containing elements before merging. if there are multiples their contents // will be concatenated. // html5 allows multiple <main> elements if all but one are hidden. mergehtml does not. // - <script> elements are placed at the end of the body // - all other elements are appended to <main> if it exists, or <body> if not. // // any classes attached to the <body>, <header>, <footer>, or <main> will be merged. // // all scripts and images will be collected and returned in the Assets object // func Merge(base, doc io.Reader) (merged string, assets *Assets, _ error) { baseParsed, err := html.Parse(base) if err != nil { return "", nil, fmt.Errorf("couldn't parse base html: %w", err) } docParsed, err := html.Parse(doc) if err != nil { return "", nil, fmt.Errorf("couldn't parse doc: %w", err) } out := merge(baseParsed, docParsed) buf := new(bytes.Buffer) err = html.Render(buf, out) if err != nil { panic(err) } assets = new(Assets) return buf.String(), assets, nil } // merges doc into base. doc and base are modified in the process. func merge(base, doc *html.Node) (out *html.Node) { out, err := html.Parse(strings.NewReader("<!doctype html><html><head></head><body></body></html>")) if err != nil { panic("mergehtml: internal error: " + err.Error()) } // TODO: title html := find(out, atom.Html) if html == nil { log.Panicf("no <html> in %v", out) } head := find(html, atom.Head) reparentChildren(head, find(find(base, atom.Html), atom.Head)) reparentChildren(head, find(find(doc, atom.Html), atom.Head)) body := find(html, atom.Body) if body == nil { log.Panicf("no <body> in %v", html) } baseBody := find(find(base, atom.Html), atom.Body) newBody := find(find(doc, atom.Html), atom.Body) reparentChildren(body, baseBody) header := findRec(body, atom.Header) footer := findRec(body, atom.Footer) main := findRec(body, atom.Main) // TODO: merge attributes for { n := findRec(newBody, atom.Header) if n == nil { break } if header == nil { header = createElement(atom.Header) body.InsertBefore(header, body.FirstChild) } reparentChildren(header, n) n.Parent.RemoveChild(n) } for { n := findRec(newBody, atom.Footer) if n == nil { break } if footer == nil { footer = createElement(atom.Header) body.InsertBefore(footer, body.FirstChild) } reparentChildren(footer, n) n.Parent.RemoveChild(n) } // if neither has main, just merge the bodies // if doc has main but base doesn't, just merge the bodies // if base has main but doc doesn't, merge doc body into base main // if both have main, merge doc main(s) into base main and then merge the bodies if main == nil { reparentBefore(body, newBody, footer) } else if findRec(newBody, atom.Main) == nil { reparentBefore(main, newBody, footer) } else { for { n := findRec(newBody, atom.Main) if n == nil { break } reparentChildren(main, n) n.Parent.RemoveChild(n) } reparentBefore(body, newBody, footer) } return out } func createElement(a atom.Atom) *html.Node { return &html.Node{DataAtom: a, Data: a.String(), Type: html.ElementNode} } // reparentChildren reparents all of src's child nodes to dst. func reparentChildren(dst, src *html.Node) { if src == nil { return } for { child := src.FirstChild if child == nil { break } src.RemoveChild(child) dst.AppendChild(child) } } // reparentBefore reparents all of src's child nodes to dst, inserting them before the given element. func reparentBefore(dst, src, locus *html.Node) { if src == nil { return } for { child := src.FirstChild if child == nil { break } src.RemoveChild(child) dst.InsertBefore(child, locus) } } func find(elem *html.Node, tagName atom.Atom) *html.Node { if elem != nil { for n := elem.FirstChild; n != nil; n = n.NextSibling { if n.DataAtom == tagName { return n } } } return nil } // Find the first element named tagName that is a descendent of the given HTML node. // Returns nil if no element found. func findRec(elem *html.Node, tagName atom.Atom) *html.Node { if elem != nil { for c := elem.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.ElementNode && c.DataAtom == tagName { return c } if d := findRec(c, tagName); d != nil { return d } } } return nil } // clone returns a new node with the same type, data and attributes. // The clone has no parent, no siblings and no children. func clone(n *html.Node) *html.Node { m := &html.Node{ Type: n.Type, DataAtom: n.DataAtom, Data: n.Data, Attr: make([]html.Attribute, len(n.Attr)), } copy(m.Attr, n.Attr) return m }

package internal import ( "bytes" "fmt" "io" "log" "strings" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) type Doc struct{} type Assets struct { Images []Asset Styles []Asset Scripts []Asset } type Asset struct{} // Merges an HTML document over top of a base template. // // // - of the new doc overrides the base <title> unless the base // contains {{content}}, in which case the new title is substituted // - <style> elements from the new doc are placed after any base <style> // - all other elements are appended to the base // // <body> // - the <header> and <footer> and <main> elemnts are merged. // in the base document, these elements may appear abitrarily nested inside // wrapper elements; mergehtml will reproduce that structure in the merged document. // if there are multiples, only the first will be used for merging. // if they do not appear in the base doc, they will be appended to the body (header: prepended). // in the source document, these elements will be removed from any // containing elements before merging. if there are multiples their contents // will be concatenated. // html5 allows multiple <main> elements if all but one are hidden. mergehtml does not. // - <script> elements are placed at the end of the body // - all other elements are appended to <main> if it exists, or <body> if not. // // any classes attached to the <body>, <header>, <footer>, or <main> will be merged. // // all scripts and images will be collected and returned in the Assets object // func Merge(base, doc io.Reader) (merged string, assets *Assets, _ error) { baseParsed, err := html.Parse(base) if err != nil { return "", nil, fmt.Errorf("couldn't parse base html: %w", err) } docParsed, err := html.Parse(doc) if err != nil { return "", nil, fmt.Errorf("couldn't parse doc: %w", err) } out := merge(baseParsed, docParsed) buf := new(bytes.Buffer) err = html.Render(buf, out) if err != nil { panic(err) } assets = new(Assets) return buf.String(), assets, nil } // merges doc into base. doc and base are modified in the process. func merge(base, doc *html.Node) (out *html.Node) { out, err := html.Parse(strings.NewReader("<!doctype html><html><head></head><body></body></html>")) if err != nil { panic("mergehtml: internal error: " + err.Error()) } // TODO: title html := find(out, atom.Html) if html == nil { log.Panicf("no <html> in %v", out) } head := find(html, atom.Head) reparentChildren(head, find(find(base, atom.Html), atom.Head)) reparentChildren(head, find(find(doc, atom.Html), atom.Head)) body := find(html, atom.Body) if body == nil { log.Panicf("no <body> in %v", html) } baseBody := find(find(base, atom.Html), atom.Body) newBody := find(find(doc, atom.Html), atom.Body) reparentChildren(body, baseBody) header := findRec(body, atom.Header) footer := findRec(body, atom.Footer) main := findRec(body, atom.Main) // TODO: merge attributes for { n := findRec(newBody, atom.Header) if n == nil { break } if header == nil { header = createElement(atom.Header) body.InsertBefore(header, body.FirstChild) } reparentChildren(header, n) n.Parent.RemoveChild(n) } for { n := findRec(newBody, atom.Footer) if n == nil { break } if footer == nil { footer = createElement(atom.Header) body.InsertBefore(footer, body.FirstChild) } reparentChildren(footer, n) n.Parent.RemoveChild(n) } // if neither has main, just merge the bodies // if doc has main but base doesn't, just merge the bodies // if base has main but doc doesn't, merge doc body into base main // if both have main, merge doc main(s) into base main and then merge the bodies if main == nil { reparentBefore(body, newBody, footer) } else if findRec(newBody, atom.Main) == nil { reparentBefore(main, newBody, footer) } else { for { n := findRec(newBody, atom.Main) if n == nil { break } reparentChildren(main, n) n.Parent.RemoveChild(n) } reparentBefore(body, newBody, footer) } return out } func createElement(a atom.Atom) *html.Node { return &html.Node{DataAtom: a, Data: a.String(), Type: html.ElementNode} } // reparentChildren reparents all of src's child nodes to dst. func reparentChildren(dst, src *html.Node) { if src == nil { return } for { child := src.FirstChild if child == nil { break } src.RemoveChild(child) dst.AppendChild(child) } } // reparentBefore reparents all of src's child nodes to dst, inserting them before the given element. func reparentBefore(dst, src, locus *html.Node) { if src == nil { return } for { child := src.FirstChild if child == nil { break } src.RemoveChild(child) dst.InsertBefore(child, locus) } } func find(elem *html.Node, tagName atom.Atom) *html.Node { if elem != nil { for n := elem.FirstChild; n != nil; n = n.NextSibling { if n.DataAtom == tagName { return n } } } return nil } // Find the first element named tagName that is a descendent of the given HTML node. // Returns nil if no element found. func findRec(elem *html.Node, tagName atom.Atom) *html.Node { if elem != nil { for c := elem.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.ElementNode && c.DataAtom == tagName { return c } if d := findRec(c, tagName); d != nil { return d } } } return nil } // clone returns a new node with the same type, data and attributes. // The clone has no parent, no siblings and no children. func clone(n *html.Node) *html.Node { m := &html.Node{ Type: n.Type, DataAtom: n.DataAtom, Data: n.Data, Attr: make([]html.Attribute, len(n.Attr)), } copy(m.Attr, n.Attr) return m }