From d29a80817dba7261ce1d044aef9c544787fb40a8 Mon Sep 17 00:00:00 2001 From: nate smith Date: Sun, 21 Apr 2024 20:02:48 -0700 Subject: [PATCH] split on conjugations and prepositions (some of them) --- cmd/phraser/main.go | 43 ++++++++++++++++- cmd/phraser/phraser_test.go | 92 +++++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) diff --git a/cmd/phraser/main.go b/cmd/phraser/main.go index face2b1..493d85f 100644 --- a/cmd/phraser/main.go +++ b/cmd/phraser/main.go @@ -14,6 +14,26 @@ import ( "strings" ) +func conjPrep(phraseBuff []byte, r rune) int { + if r != ' ' { + return -1 + } + + suffices := []string{"from", "at", "but", "however", "yet", "though", "and", "to", "on", "or"} + maxLen := 8 // TODO magic number based on longest suffix + offset := len(phraseBuff) - maxLen + if offset < 0 { + offset = 0 + } + end := string(phraseBuff[offset:]) + for _, s := range suffices { + if strings.HasSuffix(end, " "+s) { + return len(s) + } + } + return -1 +} + func main() { phraseMarkers := map[rune]bool{ ';': true, @@ -39,13 +59,34 @@ func main() { '>': true, } + // I want to experiment with treating prepositions and conjunctions as phrase + // markers. + + // to do this i would need to check the phraseBuff when I check phraseMarkers and then split accordingly + s := bufio.NewScanner(os.Stdin) phraseBuff := []byte{} printed := false for s.Scan() { text := strings.TrimSpace(s.Text()) for i, r := range text { - if ok, val := phraseMarkers[r]; ok && val { + if ok := phraseMarkers[r]; ok { + if len(phraseBuff) >= 10 { + cleaned := clean(phraseBuff) + if len(cleaned) > 0 { + fmt.Println(cleaned) + printed = true + } + } + if !printed { + //fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff)) + } + printed = false + phraseBuff = []byte{} + } else if v := conjPrep(phraseBuff, r); v > 0 { + // TODO erase or keep? starting with erase. + phraseBuff = phraseBuff[0 : len(phraseBuff)-v] + // TODO this pasta is copied if len(phraseBuff) >= 10 { cleaned := clean(phraseBuff) if len(cleaned) > 0 { diff --git a/cmd/phraser/phraser_test.go b/cmd/phraser/phraser_test.go index 5273820..45e45fc 100644 --- a/cmd/phraser/phraser_test.go +++ b/cmd/phraser/phraser_test.go @@ -2,6 +2,98 @@ package main import "testing" +func Test_conjPrep(t *testing.T) { + type args struct { + buff []byte + r rune + } + cs := []struct { + name string + args args + expected int + }{ + { + name: "empty buffer", + args: args{[]byte(""), ' '}, + expected: -1, + }, + { + name: "not a space yet", + args: args{[]byte("saccharine juice from"), 'x'}, + expected: -1, + }, + { + name: "from", + args: args{[]byte("i will eat from"), ' '}, + expected: 4, + }, + { + name: "no preceding space", + args: args{[]byte("wakkabarblurpfrom"), ' '}, + expected: -1, + }, + { + name: "however", + args: args{[]byte("my eyes are hollow, however"), ' '}, + expected: 7, + }, + { + name: "at", + args: args{[]byte("there will be no more joy at"), ' '}, + expected: 2, + }, + { + name: "but", + args: args{[]byte("i buried him, but"), ' '}, + expected: 3, + }, + { + name: "yet", + args: args{[]byte("the echoes quited yet"), ' '}, + expected: 3, + }, + { + name: "though", + args: args{[]byte("my eyes were closed though"), ' '}, + expected: 6, + }, + { + name: "and", + args: args{[]byte("i raised the torch and"), ' '}, + expected: 3, + }, + { + name: "to", + args: args{[]byte("thousands more to"), ' '}, + expected: 2, + }, + { + name: "on", + args: args{[]byte("bringing rain down on"), ' '}, + expected: 2, + }, + { + name: "no match", + args: args{[]byte("i raised the torch"), ' '}, + expected: -1, + }, + { + name: "or", + args: args{[]byte("whether good or"), ' '}, + expected: 2, + }, + } + + for _, c := range cs { + t.Run(c.name, func(t *testing.T) { + result := conjPrep(c.args.buff, c.args.r) + if result != c.expected { + t.Errorf("got '%v', expected '%v'", result, c.expected) + } + }) + } +} + func Test_isAlpha(t *testing.T) { cs := []struct { arg rune