Compare commits

..

4 Commits

4 changed files with 246 additions and 7 deletions

6
go.mod
View File

@ -2,4 +2,8 @@ module git.tilde.town/magical/mergehtml
go 1.14 go 1.14
require golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc // indirect require (
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883
github.com/sergi/go-diff v1.1.0 // indirect
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc
)

23
go.sum
View File

@ -1,3 +1,20 @@
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ=
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
@ -7,3 +24,9 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View File

@ -1,9 +1,14 @@
package internal package internal
import ( import (
"bytes"
"fmt"
"io" "io"
"log"
"strings"
"golang.org/x/net/html" "golang.org/x/net/html"
"golang.org/x/net/html/atom"
) )
type Doc struct{} type Doc struct{}
@ -14,6 +19,8 @@ type Assets struct {
Scripts []Asset Scripts []Asset
} }
type Asset struct{}
// Merges an HTML document over top of a base template. // Merges an HTML document over top of a base template.
// //
// <head> // <head>
@ -39,12 +46,174 @@ type Assets struct {
// //
// all scripts and images will be collected and returned in the Assets object // all scripts and images will be collected and returned in the Assets object
// //
func Merge(base, doc io.Reader) (merged string, assets *Assets, _ error) func Merge(base, doc io.Reader) (merged string, assets *Assets, _ error) {
baseParsed, err := html.Parse(base)
func merge(base, doc *html.Node) (out *html.Node, assets *Assets) {
assets = new(Assets)
out, err = html.Parse("<!doctype html><html><head></head><body></body></html>")
if err != nil { if err != nil {
panic("mergehtml: internal error: " + err.Error()) return "", nil, fmt.Errorf("couldn't parse base html: %w", err)
}
docParsed, err := html.Parse(doc)
if err != nil {
return "", nil, fmt.Errorf("couldn't parse doc: %w", err)
}
out := merge(baseParsed, docParsed)
buf := new(bytes.Buffer)
err = html.Render(buf, out)
if err != nil {
panic(err)
}
assets = new(Assets)
return buf.String(), assets, nil
}
// merges doc into base. doc and base are modified in the process.
func merge(base, doc *html.Node) (out *html.Node) {
out, err := html.Parse(strings.NewReader("<!doctype html><html><head></head><body></body></html>"))
if err != nil {
panic("merge: internal error: " + err.Error())
}
// TODO: title
html := find(out, atom.Html)
if html == nil {
log.Panicf("merge: no <html> in %v", out)
}
head := find(html, atom.Head)
reparentChildren(head, find(find(base, atom.Html), atom.Head))
reparentChildren(head, find(find(doc, atom.Html), atom.Head))
body := find(html, atom.Body)
if body == nil {
log.Panicf("merge: no <body> in %v", html)
}
baseBody := find(find(base, atom.Html), atom.Body)
newBody := find(find(doc, atom.Html), atom.Body)
reparentChildren(body, baseBody)
header := findRec(body, atom.Header)
footer := findRec(body, atom.Footer)
main := findRec(body, atom.Main)
// TODO: merge attributes
for {
n := findRec(newBody, atom.Header)
if n == nil {
break
}
if header == nil {
header = createElement(atom.Header)
body.InsertBefore(header, body.FirstChild)
}
reparentChildren(header, n)
n.Parent.RemoveChild(n)
}
for {
n := findRec(newBody, atom.Footer)
if n == nil {
break
}
if footer == nil {
footer = createElement(atom.Header)
body.InsertBefore(footer, body.FirstChild)
}
reparentChildren(footer, n)
n.Parent.RemoveChild(n)
}
// if neither has main, just merge the bodies
// if doc has main but base doesn't, just merge the bodies
// if base has main but doc doesn't, merge doc body into base main
// if both have main, merge doc main(s) into base main and then merge the bodies
if main == nil {
reparentBefore(body, newBody, footer)
} else if findRec(newBody, atom.Main) == nil {
reparentBefore(main, newBody, footer)
} else {
for {
n := findRec(newBody, atom.Main)
if n == nil {
break
}
reparentChildren(main, n)
n.Parent.RemoveChild(n)
}
reparentBefore(body, newBody, footer)
}
return out
}
func createElement(a atom.Atom) *html.Node {
return &html.Node{DataAtom: a, Data: a.String(), Type: html.ElementNode}
}
// reparentChildren reparents all of src's child nodes to dst.
func reparentChildren(dst, src *html.Node) {
if src == nil {
return
}
for {
child := src.FirstChild
if child == nil {
break
}
src.RemoveChild(child)
dst.AppendChild(child)
} }
} }
// reparentBefore reparents all of src's child nodes to dst, inserting them before the given element.
func reparentBefore(dst, src, locus *html.Node) {
if src == nil {
return
}
for {
child := src.FirstChild
if child == nil {
break
}
src.RemoveChild(child)
dst.InsertBefore(child, locus)
}
}
func find(elem *html.Node, tagName atom.Atom) *html.Node {
if elem != nil {
for n := elem.FirstChild; n != nil; n = n.NextSibling {
if n.DataAtom == tagName {
return n
}
}
}
return nil
}
// Find the first element named tagName that is a descendent of the given HTML node.
// Returns nil if no element found.
func findRec(elem *html.Node, tagName atom.Atom) *html.Node {
if elem != nil {
for c := elem.FirstChild; c != nil; c = c.NextSibling {
if c.Type == html.ElementNode && c.DataAtom == tagName {
return c
}
if d := findRec(c, tagName); d != nil {
return d
}
}
}
return nil
}
// clone returns a new node with the same type, data and attributes.
// The clone has no parent, no siblings and no children.
func clone(n *html.Node) *html.Node {
m := &html.Node{
Type: n.Type,
DataAtom: n.DataAtom,
Data: n.Data,
Attr: make([]html.Attribute, len(n.Attr)),
}
copy(m.Attr, n.Attr)
return m
}

View File

@ -0,0 +1,43 @@
package internal
import (
"strings"
"testing"
"github.com/andreyvit/diff"
)
const baseText = `
<style>body { background: #ccc; color: black; }</style>
<header></header>
<footer></footer>
`
const pageText = `
<title>This is a title</title>
<style>p { color: red; }</style>
<header><h1>Title</h1></header>
<p>Body text</p>
`
const mergeText = `<!DOCTYPE html><html><head><style>body { background: #ccc; color: black; }</style>
<title>This is a title</title>
<style>p { color: red; }</style>
</head><body><header><h1>Title</h1></header>
<p>Body text</p>
<footer></footer>
</body></html>`
func TestMerge(t *testing.T) {
got, _, err := Merge(strings.NewReader(baseText), strings.NewReader(pageText))
if err != nil {
t.Error(err)
return
}
if got != mergeText {
//fmt.Println(got)
//t.Errorf("merge mismatch (-want, +got):\n%s", cmp.Diff(mergeText, got))
t.Errorf("merge mismatch (-want, +got):\n%s", diff.LineDiff(mergeText, got))
}
}