diff --git a/cmd/cutup.go b/cmd/cutup.go new file mode 100644 index 0000000..5537b2e --- /dev/null +++ b/cmd/cutup.go @@ -0,0 +1,19 @@ +package cmd + +import ( + "os" + + "github.com/spf13/cobra" + "github.com/vilmibm/trunkless/cutup" +) + +func init() { + rootCmd.AddCommand(cutupCmd) +} + +var cutupCmd = &cobra.Command{ + Use: "cutup", + Run: func(cmd *cobra.Command, args []string) { + cutup.Cutup(os.Stdin) + }, +} diff --git a/cmd/ingest.go b/cmd/ingest.go new file mode 100644 index 0000000..ba3ce1b --- /dev/null +++ b/cmd/ingest.go @@ -0,0 +1,20 @@ +package cmd + +import ( + "os" + "strings" + + "github.com/spf13/cobra" + "github.com/vilmibm/trunkless/ingest" +) + +func init() { + rootCmd.AddCommand(ingestCmd) +} + +var ingestCmd = &cobra.Command{ + Use: "ingest", + RunE: func(cmd *cobra.Command, args []string) error { + return ingest.Ingest(strings.Join(args[1:], " "), os.Stdin) + }, +} diff --git a/cmd/root.go b/cmd/root.go new file mode 100644 index 0000000..378354d --- /dev/null +++ b/cmd/root.go @@ -0,0 +1,19 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "trunkless", +} + +func Execute() { + if err := rootCmd.Execute(); err != nil { + fmt.Fprintf(os.Stderr, err.Error()) + os.Exit(1) + } +} diff --git a/cmd/serve.go b/cmd/serve.go new file mode 100644 index 0000000..4006d68 --- /dev/null +++ b/cmd/serve.go @@ -0,0 +1,17 @@ +package cmd + +import ( + "github.com/spf13/cobra" + "github.com/vilmibm/trunkless/web" +) + +func init() { + rootCmd.AddCommand(serveCmd) +} + +var serveCmd = &cobra.Command{ + Use: "serve", + RunE: func(cmd *cobra.Command, args []string) error { + return web.Serve() + }, +} diff --git a/cutup/cutup.go b/cutup/cutup.go new file mode 100644 index 0000000..a1e568b --- /dev/null +++ b/cutup/cutup.go @@ -0,0 +1,175 @@ +package cutup + +import ( + "bufio" + "fmt" + "io" + "strings" +) + +func conjPrep(phraseBuff []byte, r rune) int { + if r != ' ' { + return -1 + } + + suffices := []string{"from", "at", "but", "however", "yet", "though", "and", "to", "on", "or"} + maxLen := 8 // TODO magic number based on longest suffix + offset := len(phraseBuff) - maxLen + if offset < 0 { + offset = 0 + } + end := string(phraseBuff[offset:]) + for _, s := range suffices { + if strings.HasSuffix(end, " "+s) { + return len(s) + } + } + return -1 +} + +func Cutup(ins io.Reader) { + phraseMarkers := map[rune]bool{ + ';': true, + ',': true, + ':': true, + '.': true, + '?': true, + '!': true, + //'(': true, + ')': true, + //'{': true, + '}': true, + //'[': true, + ']': true, + //'\'': true, + //'"': true, + //'“': true, + '”': true, + '=': true, + '`': true, + '-': true, + '|': true, + '>': true, + } + + // I want to experiment with treating prepositions and conjunctions as phrase + // markers. + + // to do this i would need to check the phraseBuff when I check phraseMarkers and then split accordingly + + s := bufio.NewScanner(ins) + phraseBuff := []byte{} + printed := false + for s.Scan() { + text := strings.TrimSpace(s.Text()) + for i, r := range text { + if ok := phraseMarkers[r]; ok { + if len(phraseBuff) >= 10 { + cleaned := clean(phraseBuff) + if len(cleaned) > 0 { + fmt.Println(cleaned) + printed = true + } + } + if !printed { + //fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff)) + } + printed = false + phraseBuff = []byte{} + } else if v := conjPrep(phraseBuff, r); v > 0 { + // TODO erase or keep? starting with erase. + phraseBuff = phraseBuff[0 : len(phraseBuff)-v] + // TODO this pasta is copied + if len(phraseBuff) >= 10 { + cleaned := clean(phraseBuff) + if len(cleaned) > 0 { + fmt.Println(cleaned) + printed = true + } + } + if !printed { + //fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff)) + } + printed = false + phraseBuff = []byte{} + } else { + asStr := string(phraseBuff) + if r == ' ' && strings.HasSuffix(asStr, " ") { + continue + } + if i == 0 && len(phraseBuff) > 0 && phraseBuff[len(phraseBuff)-1] != ' ' && r != ' ' { + phraseBuff = append(phraseBuff, byte(' ')) + } + phraseBuff = append(phraseBuff, byte(r)) + } + } + } +} + +func isAlpha(r rune) bool { + alphaChars := map[rune]bool{ + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + } + lookup := strings.ToLower(string(r)) + return alphaChars[rune(lookup[0])] +} + +func alphaPercent(s string) float64 { + total := 0.0 + alpha := 0.0 + + for _, r := range s { + total++ + if isAlpha(r) { + alpha++ + } + } + + return 100 * (alpha / total) +} + +func clean(bs []byte) string { + s := string(bs) + s = strings.ReplaceAll(s, "’", "'") + s = strings.ReplaceAll(s, "\"", "") + s = strings.ReplaceAll(s, "(", "") + s = strings.ReplaceAll(s, "[", "") + s = strings.ReplaceAll(s, "{", "") + s = strings.ReplaceAll(s, "<", "") + s = strings.ReplaceAll(s, "_", "") + s = strings.ReplaceAll(s, "*", "") + s = strings.TrimLeft(s, "'\"") + s = strings.TrimSpace(s) + s = strings.ToLower(s) + + if alphaPercent(s) < 50.0 { + return "" + } + + return s +} diff --git a/db/db.go b/db/db.go new file mode 100644 index 0000000..74565f8 --- /dev/null +++ b/db/db.go @@ -0,0 +1,21 @@ +package db + +import ( + "database/sql" + + _ "github.com/mattn/go-sqlite3" +) + +const ( + dsn = "phrase.db?cache=shared&mode=r" + MaxID = 467014991 +) + +func Connect() (*sql.DB, error) { + db, err := sql.Open("sqlite3", dsn) + if err != nil { + return nil, err + } + + return db, nil +} diff --git a/go.mod b/go.mod index 2e5cf51..656278e 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/goccy/go-json v0.10.2 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.4 // indirect github.com/leodido/go-urn v1.2.4 // indirect @@ -20,6 +21,8 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect + github.com/spf13/cobra v1.8.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect golang.org/x/arch v0.3.0 // indirect diff --git a/go.sum b/go.sum index 7d308c5..da29301 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,7 @@ github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZX github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= @@ -23,6 +24,8 @@ github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MG github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -42,6 +45,11 @@ github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjY github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= diff --git a/cmd/ingest/main.go b/ingest/ingest.go similarity index 76% rename from cmd/ingest/main.go rename to ingest/ingest.go index e1a40fe..a6b3a49 100644 --- a/cmd/ingest/main.go +++ b/ingest/ingest.go @@ -1,12 +1,10 @@ -package main +package ingest import ( "bufio" "database/sql" - "errors" "fmt" - "os" - "strings" + "io" _ "github.com/mattn/go-sqlite3" ) @@ -29,10 +27,7 @@ func createSource(db *sql.DB, sourceName string) (int64, error) { return result.LastInsertId() } -func _main(args []string) error { - if len(os.Args) == 0 { - return errors.New("need a source name argument") - } +func Ingest(sourceName string, ins io.Reader) error { db, err := sql.Open("sqlite3", dsn) if err != nil { return err @@ -40,9 +35,8 @@ func _main(args []string) error { defer db.Close() - s := bufio.NewScanner(os.Stdin) + s := bufio.NewScanner(ins) - sourceName := strings.Join(os.Args[1:], " ") sourceID, err := createSource(db, sourceName) if err != nil { return fmt.Errorf("could not make source: %w", err) @@ -68,10 +62,3 @@ func _main(args []string) error { return tx.Commit() } - -func main() { - if err := _main(os.Args[1:]); err != nil { - fmt.Fprintf(os.Stderr, "error: %s", err) - os.Exit(1) - } -} diff --git a/main.go b/main.go index 6e693db..9dbb053 100644 --- a/main.go +++ b/main.go @@ -1,106 +1,9 @@ package main import ( - "html/template" - "log" - "math/big" - "net/http" - "strings" - - "crypto/rand" - "database/sql" - - "github.com/gin-gonic/gin" - - _ "github.com/mattn/go-sqlite3" + "github.com/vilmibm/trunkless/cmd" ) -const ( - dsn = "phrase.db?cache=shared&mode=r" - maxID = 467014991 -) - -func connectDB() (*sql.DB, error) { - db, err := sql.Open("sqlite3", dsn) - if err != nil { - return nil, err - } - - return db, nil -} - -type source struct { - ID int64 - Name string -} - -type phrase struct { - ID int64 - Text string - Source source -} - func main() { - r := gin.Default() - r.SetFuncMap(template.FuncMap{ - "upper": strings.ToUpper, - }) - r.LoadHTMLFiles("templates/index.tmpl") - r.StaticFile("/cutive.ttf", "./assets/cutive.ttf") - r.StaticFile("/favicon.ico", "./assets/favicon.ico") - r.StaticFile("/bg_light.gif", "./assets/bg_light.gif") - r.StaticFile("/bg_dark.gif", "./assets/bg_dark.gif") - r.StaticFile("/main.js", "./assets/main.js") - r.StaticFile("/html2canvas.min.js", "./assets/html2canvas.min.js") - - randMax := big.NewInt(maxID) - - r.HEAD("/", func(c *gin.Context) { - c.String(http.StatusOK, "") - }) - - r.GET("/", func(c *gin.Context) { - c.HTML(http.StatusOK, "index.tmpl", struct { - MaxID int - // TODO anything else? - }{maxID}) - }) - - r.GET("/line", func(c *gin.Context) { - db, err := connectDB() - if err != nil { - log.Println(err.Error()) - c.String(http.StatusInternalServerError, "oh no.") - return - } - defer db.Close() - - id, err := rand.Int(rand.Reader, randMax) - if err != nil { - log.Println(err.Error()) - c.String(http.StatusInternalServerError, "oh no.") - return - } - - stmt, err := db.Prepare("select p.phrase, p.id, s.name from phrases p join sources s on p.sourceid = s.id where p.id = ?") - if err != nil { - log.Println(err.Error()) - c.String(http.StatusInternalServerError, "oh no.") - return - } - - row := stmt.QueryRow(id.Int64()) - var p phrase - var s source - err = row.Scan(&p.Text, &s.ID, &s.Name) - if err != nil { - log.Println(err.Error()) - c.String(http.StatusInternalServerError, "oh no.") - } - p.Source = s - p.ID = id.Int64() - c.JSON(http.StatusOK, p) - }) - - r.Run() // 8080 + cmd.Execute() } diff --git a/assets/bg_dark.gif b/web/assets/bg_dark.gif similarity index 100% rename from assets/bg_dark.gif rename to web/assets/bg_dark.gif diff --git a/assets/bg_light.gif b/web/assets/bg_light.gif similarity index 100% rename from assets/bg_light.gif rename to web/assets/bg_light.gif diff --git a/assets/cutive.ttf b/web/assets/cutive.ttf similarity index 100% rename from assets/cutive.ttf rename to web/assets/cutive.ttf diff --git a/assets/html2canvas.min.js b/web/assets/html2canvas.min.js similarity index 100% rename from assets/html2canvas.min.js rename to web/assets/html2canvas.min.js diff --git a/assets/main.js b/web/assets/main.js similarity index 100% rename from assets/main.js rename to web/assets/main.js diff --git a/web/web.go b/web/web.go new file mode 100644 index 0000000..e58c886 --- /dev/null +++ b/web/web.go @@ -0,0 +1,89 @@ +package web + +import ( + "crypto/rand" + "html/template" + "log" + "math/big" + "net/http" + "strings" + + "github.com/gin-gonic/gin" + "github.com/vilmibm/trunkless/db" +) + +type source struct { + ID int64 + Name string +} + +type phrase struct { + ID int64 + Text string + Source source +} + +func Serve() error { + r := gin.Default() + r.SetFuncMap(template.FuncMap{ + "upper": strings.ToUpper, + }) + r.LoadHTMLFiles("templates/index.tmpl") + r.StaticFile("/cutive.ttf", "./web/assets/cutive.ttf") + r.StaticFile("/favicon.ico", "./web/assets/favicon.ico") + r.StaticFile("/bg_light.gif", "./web/assets/bg_light.gif") + r.StaticFile("/bg_dark.gif", "./web/assets/bg_dark.gif") + r.StaticFile("/main.js", "./web/assets/main.js") + r.StaticFile("/html2canvas.min.js", "./web/assets/html2canvas.min.js") + + randMax := big.NewInt(db.MaxID) + + r.HEAD("/", func(c *gin.Context) { + c.String(http.StatusOK, "") + }) + + r.GET("/", func(c *gin.Context) { + c.HTML(http.StatusOK, "index.tmpl", struct { + MaxID int + // TODO anything else? + }{db.MaxID}) + }) + + r.GET("/line", func(c *gin.Context) { + db, err := db.Connect() + if err != nil { + log.Println(err.Error()) + c.String(http.StatusInternalServerError, "oh no.") + return + } + defer db.Close() + + id, err := rand.Int(rand.Reader, randMax) + if err != nil { + log.Println(err.Error()) + c.String(http.StatusInternalServerError, "oh no.") + return + } + + stmt, err := db.Prepare("select p.phrase, p.id, s.name from phrases p join sources s on p.sourceid = s.id where p.id = ?") + if err != nil { + log.Println(err.Error()) + c.String(http.StatusInternalServerError, "oh no.") + return + } + + row := stmt.QueryRow(id.Int64()) + var p phrase + var s source + err = row.Scan(&p.Text, &s.ID, &s.Name) + if err != nil { + log.Println(err.Error()) + c.String(http.StatusInternalServerError, "oh no.") + } + p.Source = s + p.ID = id.Int64() + c.JSON(http.StatusOK, p) + }) + + return r.Run() // 8080 +}