diff --git a/go.mod b/go.mod index 7d0845a..a041318 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,8 @@ module git.tilde.town/magical/mergehtml go 1.14 -require golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc // indirect +require ( + github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 + github.com/sergi/go-diff v1.1.0 // indirect + golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc +) diff --git a/go.sum b/go.sum index 445d19a..f64bbaf 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,20 @@ +github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ= +github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= +github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -7,3 +24,9 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/internal/merge.go b/internal/merge.go index 904a878..e4ae80b 100644 --- a/internal/merge.go +++ b/internal/merge.go @@ -1,9 +1,14 @@ package internal import ( + "bytes" + "fmt" "io" + "log" + "strings" "golang.org/x/net/html" + "golang.org/x/net/html/atom" ) type Doc struct{} @@ -14,6 +19,8 @@ type Assets struct { Scripts []Asset } +type Asset struct{} + // Merges an HTML document over top of a base template. // // @@ -39,12 +46,139 @@ type Assets struct { // // all scripts and images will be collected and returned in the Assets object // -func Merge(base, doc io.Reader) (merged string, assets *Assets, _ error) - -func merge(base, doc *html.Node) (out *html.Node, assets *Assets) { +func Merge(base, doc io.Reader) (merged string, assets *Assets, _ error) { + baseParsed, err := html.Parse(base) + if err != nil { + return "", nil, fmt.Errorf("couldn't parse base html: %w", err) + } + docParsed, err := html.Parse(doc) + if err != nil { + return "", nil, fmt.Errorf("couldn't parse doc: %w", err) + } + out := merge(baseParsed, docParsed) + buf := new(bytes.Buffer) + err = html.Render(buf, out) + if err != nil { + panic(err) + } assets = new(Assets) - out, err = html.Parse("") + return buf.String(), assets, nil +} + +// merges doc into base. doc and base are modified in the process. +func merge(base, doc *html.Node) (out *html.Node) { + out, err := html.Parse(strings.NewReader("")) if err != nil { panic("mergehtml: internal error: " + err.Error()) } + + // TODO: title + html := find(out, atom.Html) + if html == nil { + log.Panicf("no in %v", out) + } + head := find(html, atom.Head) + reparentChildren(head, find(find(base, atom.Html), atom.Head)) + reparentChildren(head, find(find(doc, atom.Html), atom.Head)) + + body := find(html, atom.Body) + if body == nil { + log.Panicf("no in %v", html) + } + baseBody := find(find(base, atom.Html), atom.Body) + newBody := find(find(doc, atom.Html), atom.Body) + + reparentChildren(body, baseBody) + header := findRec(body, atom.Header) + footer := findRec(body, atom.Footer) + main := findRec(body, atom.Main) + + if header == nil { + header = createElement(atom.Header) + body.InsertBefore(header, body.FirstChild) + } + for { + n := findRec(newBody, atom.Header) + if n == nil { + break + } + reparentChildren(header, n) + n.Parent.RemoveChild(n) + } + + if footer == nil { + footer = createElement(atom.Header) + body.InsertBefore(footer, body.FirstChild) + } + for { + n := findRec(newBody, atom.Footer) + if n == nil { + break + } + reparentChildren(footer, n) + n.Parent.RemoveChild(n) + } + + _ = main // TODO + + return out +} + +func createElement(a atom.Atom) *html.Node { + return &html.Node{DataAtom: a, Data: a.String(), Type: html.ElementNode} +} + +// reparentChildren reparents all of src's child nodes to dst. +func reparentChildren(dst, src *html.Node) { + if src == nil { + return + } + for { + child := src.FirstChild + if child == nil { + break + } + src.RemoveChild(child) + dst.AppendChild(child) + } +} + +func find(elem *html.Node, tagName atom.Atom) *html.Node { + if elem != nil { + for n := elem.FirstChild; n != nil; n = n.NextSibling { + if n.DataAtom == tagName { + return n + } + } + } + return nil +} + +// Find the first element named tagName that is a descendent of the given HTML node. +// Returns nil if no element found. +func findRec(elem *html.Node, tagName atom.Atom) *html.Node { + if elem != nil { + for c := elem.FirstChild; c != nil; c = c.NextSibling { + if c.Type == html.ElementNode && c.DataAtom == tagName { + return c + } + if d := findRec(c, tagName); d != nil { + return d + } + } + } + return nil +} + +// clone returns a new node with the same type, data and attributes. +// The clone has no parent, no siblings and no children. +func clone(n *html.Node) *html.Node { + m := &html.Node{ + Type: n.Type, + DataAtom: n.DataAtom, + Data: n.Data, + Attr: make([]html.Attribute, len(n.Attr)), + } + copy(m.Attr, n.Attr) + return m } diff --git a/internal/merge_test.go b/internal/merge_test.go new file mode 100644 index 0000000..408bf6f --- /dev/null +++ b/internal/merge_test.go @@ -0,0 +1,41 @@ +package internal + +import ( + "strings" + "testing" + + "github.com/andreyvit/diff" +) + +const baseText = ` + +
+ +` + +const pageText = ` +This is a title + +

Title

+

Body text +` + +const mergeText = ` +This is a title + +

Title

+ +` + +func TestMerge(t *testing.T) { + got, _, err := Merge(strings.NewReader(baseText), strings.NewReader(pageText)) + if err != nil { + t.Error(err) + return + } + if got != mergeText { + //fmt.Println(got) + //t.Errorf("merge mismatch (-want, +got):\n%s", cmp.Diff(mergeText, got)) + t.Errorf("merge mismatch (-want, +got):\n%s", diff.LineDiff(mergeText, got)) + } +}