diff --git a/gutcontent.go b/cmd/gutcontent/main.go similarity index 87% rename from gutcontent.go rename to cmd/gutcontent/main.go index 2e252e1..7484ec1 100644 --- a/gutcontent.go +++ b/cmd/gutcontent/main.go @@ -1,3 +1,7 @@ +/* +Given a project gutenberg plaintext book filename, this program prints just its content (ie with header and footer stripped) +*/ + package main import ( @@ -7,10 +11,6 @@ import ( "strings" ) -/* -Given a project gutenberg plaintext book filename, open it and print just the content. -*/ - func main() { if len(os.Args) < 2 { fmt.Fprintln(os.Stderr, "need a filename argument") diff --git a/cmd/phraser/main.go b/cmd/phraser/main.go new file mode 100644 index 0000000..c4c49b9 --- /dev/null +++ b/cmd/phraser/main.go @@ -0,0 +1,65 @@ +/* +Given plaintext content on STDIN, emit "phrases" to STDOUT. + +Phrases are a loose, artistic concept. The end goal of a phrase is to be useful fodder as a line in a cut-up poem. + +*/ + +package main + +import ( + "bufio" + "fmt" + "os" + "strings" +) + +func main() { + phraseMarkers := map[rune]bool{ + ';': true, + ',': true, + ':': true, + '.': true, + '?': true, + '!': true, + '(': true, + ')': true, + '\'': true, + '{': true, + '}': true, + '[': true, + ']': true, + '“': true, + '”': true, + '=': true, + '`': true, + } + + s := bufio.NewScanner(os.Stdin) + phraseBuff := []byte{} + for s.Scan() { + text := strings.TrimSpace(s.Text()) + seenSpace := false + for i, r := range text { + if r == ' ' { + seenSpace = true + } + if ok, val := phraseMarkers[r]; ok && val { + if len(phraseBuff) >= 20 && seenSpace { + // TODO QA check for alphabetic content + fmt.Println(strings.TrimSpace(string(phraseBuff))) + } + phraseBuff = []byte{} + } else { + asStr := string(phraseBuff) + if r == ' ' && strings.HasSuffix(asStr, " ") { + continue + } + phraseBuff = append(phraseBuff, byte(r)) + if i == len(text)-1 && len(phraseBuff) > 0 && !strings.HasSuffix(asStr, " ") { + phraseBuff = append(phraseBuff, byte(' ')) + } + } + } + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..462e116 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/vilmibm/trunkless + +go 1.21.6