This commit is contained in:
nate smith 2024-01-31 21:34:48 -08:00
parent d3ea130aa7
commit 6a710a4867
3 changed files with 72 additions and 4 deletions

View File

@ -1,3 +1,7 @@
/*
Given a project gutenberg plaintext book filename, this program prints just its content (ie with header and footer stripped)
*/
package main
import (
@ -7,10 +11,6 @@ import (
"strings"
)
/*
Given a project gutenberg plaintext book filename, open it and print just the content.
*/
func main() {
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, "need a filename argument")

65
cmd/phraser/main.go Normal file
View File

@ -0,0 +1,65 @@
/*
Given plaintext content on STDIN, emit "phrases" to STDOUT.
Phrases are a loose, artistic concept. The end goal of a phrase is to be useful fodder as a line in a cut-up poem.
*/
package main
import (
"bufio"
"fmt"
"os"
"strings"
)
func main() {
phraseMarkers := map[rune]bool{
';': true,
',': true,
':': true,
'.': true,
'?': true,
'!': true,
'(': true,
')': true,
'\'': true,
'{': true,
'}': true,
'[': true,
']': true,
'“': true,
'”': true,
'=': true,
'`': true,
}
s := bufio.NewScanner(os.Stdin)
phraseBuff := []byte{}
for s.Scan() {
text := strings.TrimSpace(s.Text())
seenSpace := false
for i, r := range text {
if r == ' ' {
seenSpace = true
}
if ok, val := phraseMarkers[r]; ok && val {
if len(phraseBuff) >= 20 && seenSpace {
// TODO QA check for alphabetic content
fmt.Println(strings.TrimSpace(string(phraseBuff)))
}
phraseBuff = []byte{}
} else {
asStr := string(phraseBuff)
if r == ' ' && strings.HasSuffix(asStr, " ") {
continue
}
phraseBuff = append(phraseBuff, byte(r))
if i == len(text)-1 && len(phraseBuff) > 0 && !strings.HasSuffix(asStr, " ") {
phraseBuff = append(phraseBuff, byte(' '))
}
}
}
}
}

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module github.com/vilmibm/trunkless
go 1.21.6