2021-09-24 06:49:47 +00:00
|
|
|
package internal
|
|
|
|
|
|
|
|
import (
|
2021-09-25 20:12:26 +00:00
|
|
|
"bytes"
|
|
|
|
"fmt"
|
2021-09-24 06:49:47 +00:00
|
|
|
"io"
|
2021-09-25 20:12:26 +00:00
|
|
|
"log"
|
|
|
|
"strings"
|
2021-09-24 06:49:47 +00:00
|
|
|
|
|
|
|
"golang.org/x/net/html"
|
2021-09-25 20:12:26 +00:00
|
|
|
"golang.org/x/net/html/atom"
|
2021-09-24 06:49:47 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type Doc struct{}
|
|
|
|
|
|
|
|
type Assets struct {
|
|
|
|
Images []Asset
|
|
|
|
Styles []Asset
|
|
|
|
Scripts []Asset
|
|
|
|
}
|
|
|
|
|
2021-09-25 20:12:26 +00:00
|
|
|
type Asset struct{}
|
|
|
|
|
2021-09-24 06:49:47 +00:00
|
|
|
// Merges an HTML document over top of a base template.
|
|
|
|
//
|
|
|
|
// <head>
|
|
|
|
// - <title> of the new doc overrides the base <title> unless the base
|
|
|
|
// contains {{content}}, in which case the new title is substituted
|
|
|
|
// - <style> elements from the new doc are placed after any base <style>
|
|
|
|
// - all other elements are appended to the base
|
|
|
|
//
|
|
|
|
// <body>
|
|
|
|
// - the <header> and <footer> and <main> elemnts are merged.
|
|
|
|
// in the base document, these elements may appear abitrarily nested inside
|
|
|
|
// wrapper elements; mergehtml will reproduce that structure in the merged document.
|
|
|
|
// if there are multiples, only the first will be used for merging.
|
|
|
|
// if they do not appear in the base doc, they will be appended to the body (header: prepended).
|
|
|
|
// in the source document, these elements will be removed from any
|
|
|
|
// containing elements before merging. if there are multiples their contents
|
|
|
|
// will be concatenated.
|
|
|
|
// html5 allows multiple <main> elements if all but one are hidden. mergehtml does not.
|
|
|
|
// - <script> elements are placed at the end of the body
|
|
|
|
// - all other elements are appended to <main> if it exists, or <body> if not.
|
|
|
|
//
|
|
|
|
// any classes attached to the <body>, <header>, <footer>, or <main> will be merged.
|
|
|
|
//
|
|
|
|
// all scripts and images will be collected and returned in the Assets object
|
|
|
|
//
|
2021-09-25 20:12:26 +00:00
|
|
|
func Merge(base, doc io.Reader) (merged string, assets *Assets, _ error) {
|
|
|
|
baseParsed, err := html.Parse(base)
|
|
|
|
if err != nil {
|
|
|
|
return "", nil, fmt.Errorf("couldn't parse base html: %w", err)
|
|
|
|
}
|
|
|
|
docParsed, err := html.Parse(doc)
|
|
|
|
if err != nil {
|
|
|
|
return "", nil, fmt.Errorf("couldn't parse doc: %w", err)
|
|
|
|
}
|
|
|
|
out := merge(baseParsed, docParsed)
|
|
|
|
buf := new(bytes.Buffer)
|
|
|
|
err = html.Render(buf, out)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2021-09-24 06:49:47 +00:00
|
|
|
assets = new(Assets)
|
2021-09-25 20:12:26 +00:00
|
|
|
return buf.String(), assets, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// merges doc into base. doc and base are modified in the process.
|
|
|
|
func merge(base, doc *html.Node) (out *html.Node) {
|
|
|
|
out, err := html.Parse(strings.NewReader("<!doctype html><html><head></head><body></body></html>"))
|
2021-09-24 06:49:47 +00:00
|
|
|
if err != nil {
|
|
|
|
panic("mergehtml: internal error: " + err.Error())
|
|
|
|
}
|
2021-09-25 20:12:26 +00:00
|
|
|
|
|
|
|
// TODO: title
|
|
|
|
html := find(out, atom.Html)
|
|
|
|
if html == nil {
|
|
|
|
log.Panicf("no <html> in %v", out)
|
|
|
|
}
|
|
|
|
head := find(html, atom.Head)
|
|
|
|
reparentChildren(head, find(find(base, atom.Html), atom.Head))
|
|
|
|
reparentChildren(head, find(find(doc, atom.Html), atom.Head))
|
|
|
|
|
|
|
|
body := find(html, atom.Body)
|
|
|
|
if body == nil {
|
|
|
|
log.Panicf("no <body> in %v", html)
|
|
|
|
}
|
|
|
|
baseBody := find(find(base, atom.Html), atom.Body)
|
|
|
|
newBody := find(find(doc, atom.Html), atom.Body)
|
|
|
|
|
|
|
|
reparentChildren(body, baseBody)
|
|
|
|
header := findRec(body, atom.Header)
|
|
|
|
footer := findRec(body, atom.Footer)
|
|
|
|
main := findRec(body, atom.Main)
|
|
|
|
|
|
|
|
if header == nil {
|
|
|
|
header = createElement(atom.Header)
|
|
|
|
body.InsertBefore(header, body.FirstChild)
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
n := findRec(newBody, atom.Header)
|
|
|
|
if n == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
reparentChildren(header, n)
|
|
|
|
n.Parent.RemoveChild(n)
|
|
|
|
}
|
|
|
|
|
|
|
|
if footer == nil {
|
|
|
|
footer = createElement(atom.Header)
|
|
|
|
body.InsertBefore(footer, body.FirstChild)
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
n := findRec(newBody, atom.Footer)
|
|
|
|
if n == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
reparentChildren(footer, n)
|
|
|
|
n.Parent.RemoveChild(n)
|
|
|
|
}
|
|
|
|
|
|
|
|
_ = main // TODO
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
func createElement(a atom.Atom) *html.Node {
|
|
|
|
return &html.Node{DataAtom: a, Data: a.String(), Type: html.ElementNode}
|
|
|
|
}
|
|
|
|
|
|
|
|
// reparentChildren reparents all of src's child nodes to dst.
|
|
|
|
func reparentChildren(dst, src *html.Node) {
|
|
|
|
if src == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
child := src.FirstChild
|
|
|
|
if child == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
src.RemoveChild(child)
|
|
|
|
dst.AppendChild(child)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func find(elem *html.Node, tagName atom.Atom) *html.Node {
|
|
|
|
if elem != nil {
|
|
|
|
for n := elem.FirstChild; n != nil; n = n.NextSibling {
|
|
|
|
if n.DataAtom == tagName {
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the first element named tagName that is a descendent of the given HTML node.
|
|
|
|
// Returns nil if no element found.
|
|
|
|
func findRec(elem *html.Node, tagName atom.Atom) *html.Node {
|
|
|
|
if elem != nil {
|
|
|
|
for c := elem.FirstChild; c != nil; c = c.NextSibling {
|
|
|
|
if c.Type == html.ElementNode && c.DataAtom == tagName {
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
if d := findRec(c, tagName); d != nil {
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// clone returns a new node with the same type, data and attributes.
|
|
|
|
// The clone has no parent, no siblings and no children.
|
|
|
|
func clone(n *html.Node) *html.Node {
|
|
|
|
m := &html.Node{
|
|
|
|
Type: n.Type,
|
|
|
|
DataAtom: n.DataAtom,
|
|
|
|
Data: n.Data,
|
|
|
|
Attr: make([]html.Attribute, len(n.Attr)),
|
|
|
|
}
|
|
|
|
copy(m.Attr, n.Attr)
|
|
|
|
return m
|
2021-09-24 06:49:47 +00:00
|
|
|
}
|