mergehtml/internal/merge.go

220 lines
5.6 KiB
Go

package internal
import (
"bytes"
"fmt"
"io"
"log"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
type Doc struct{}
type Assets struct {
Images []Asset
Styles []Asset
Scripts []Asset
}
type Asset struct{}
// Merges an HTML document over top of a base template.
//
// <head>
// - <title> of the new doc overrides the base <title> unless the base
// contains {{content}}, in which case the new title is substituted
// - <style> elements from the new doc are placed after any base <style>
// - all other elements are appended to the base
//
// <body>
// - the <header> and <footer> and <main> elemnts are merged.
// in the base document, these elements may appear abitrarily nested inside
// wrapper elements; mergehtml will reproduce that structure in the merged document.
// if there are multiples, only the first will be used for merging.
// if they do not appear in the base doc, they will be appended to the body (header: prepended).
// in the source document, these elements will be removed from any
// containing elements before merging. if there are multiples their contents
// will be concatenated.
// html5 allows multiple <main> elements if all but one are hidden. mergehtml does not.
// - <script> elements are placed at the end of the body
// - all other elements are appended to <main> if it exists, or <body> if not.
//
// any classes attached to the <body>, <header>, <footer>, or <main> will be merged.
//
// all scripts and images will be collected and returned in the Assets object
//
func Merge(base, doc io.Reader) (merged string, assets *Assets, _ error) {
baseParsed, err := html.Parse(base)
if err != nil {
return "", nil, fmt.Errorf("couldn't parse base html: %w", err)
}
docParsed, err := html.Parse(doc)
if err != nil {
return "", nil, fmt.Errorf("couldn't parse doc: %w", err)
}
out := merge(baseParsed, docParsed)
buf := new(bytes.Buffer)
err = html.Render(buf, out)
if err != nil {
panic(err)
}
assets = new(Assets)
return buf.String(), assets, nil
}
// merges doc into base. doc and base are modified in the process.
func merge(base, doc *html.Node) (out *html.Node) {
out, err := html.Parse(strings.NewReader("<!doctype html><html><head></head><body></body></html>"))
if err != nil {
panic("merge: internal error: " + err.Error())
}
// TODO: title
html := find(out, atom.Html)
if html == nil {
log.Panicf("merge: no <html> in %v", out)
}
head := find(html, atom.Head)
reparentChildren(head, find(find(base, atom.Html), atom.Head))
reparentChildren(head, find(find(doc, atom.Html), atom.Head))
body := find(html, atom.Body)
if body == nil {
log.Panicf("merge: no <body> in %v", html)
}
baseBody := find(find(base, atom.Html), atom.Body)
newBody := find(find(doc, atom.Html), atom.Body)
reparentChildren(body, baseBody)
header := findRec(body, atom.Header)
footer := findRec(body, atom.Footer)
main := findRec(body, atom.Main)
// TODO: merge attributes
for {
n := findRec(newBody, atom.Header)
if n == nil {
break
}
if header == nil {
header = createElement(atom.Header)
body.InsertBefore(header, body.FirstChild)
}
reparentChildren(header, n)
n.Parent.RemoveChild(n)
}
for {
n := findRec(newBody, atom.Footer)
if n == nil {
break
}
if footer == nil {
footer = createElement(atom.Header)
body.InsertBefore(footer, body.FirstChild)
}
reparentChildren(footer, n)
n.Parent.RemoveChild(n)
}
// if neither has main, just merge the bodies
// if doc has main but base doesn't, just merge the bodies
// if base has main but doc doesn't, merge doc body into base main
// if both have main, merge doc main(s) into base main and then merge the bodies
if main == nil {
reparentBefore(body, newBody, footer)
} else if findRec(newBody, atom.Main) == nil {
reparentBefore(main, newBody, footer)
} else {
for {
n := findRec(newBody, atom.Main)
if n == nil {
break
}
reparentChildren(main, n)
n.Parent.RemoveChild(n)
}
reparentBefore(body, newBody, footer)
}
return out
}
func createElement(a atom.Atom) *html.Node {
return &html.Node{DataAtom: a, Data: a.String(), Type: html.ElementNode}
}
// reparentChildren reparents all of src's child nodes to dst.
func reparentChildren(dst, src *html.Node) {
if src == nil {
return
}
for {
child := src.FirstChild
if child == nil {
break
}
src.RemoveChild(child)
dst.AppendChild(child)
}
}
// reparentBefore reparents all of src's child nodes to dst, inserting them before the given element.
func reparentBefore(dst, src, locus *html.Node) {
if src == nil {
return
}
for {
child := src.FirstChild
if child == nil {
break
}
src.RemoveChild(child)
dst.InsertBefore(child, locus)
}
}
func find(elem *html.Node, tagName atom.Atom) *html.Node {
if elem != nil {
for n := elem.FirstChild; n != nil; n = n.NextSibling {
if n.DataAtom == tagName {
return n
}
}
}
return nil
}
// Find the first element named tagName that is a descendent of the given HTML node.
// Returns nil if no element found.
func findRec(elem *html.Node, tagName atom.Atom) *html.Node {
if elem != nil {
for c := elem.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.ElementNode && c.DataAtom == tagName {
return c
}
if d := findRec(c, tagName); d != nil {
return d
}
}
}
return nil
}
// clone returns a new node with the same type, data and attributes.
// The clone has no parent, no siblings and no children.
func clone(n *html.Node) *html.Node {
m := &html.Node{
Type: n.Type,
DataAtom: n.DataAtom,
Data: n.Data,
Attr: make([]html.Attribute, len(n.Attr)),
}
copy(m.Attr, n.Attr)
return m
}