182 lines
3.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Given plaintext content on STDIN, emit "phrases" to STDOUT.
Phrases are a loose, artistic concept. The end goal of a phrase is to be useful fodder as a line in a cut-up poem.
*/
package main
import (
"bufio"
"fmt"
"os"
"strings"
)
func conjPrep(phraseBuff []byte, r rune) int {
if r != ' ' {
return -1
}
suffices := []string{"from", "at", "but", "however", "yet", "though", "and", "to", "on", "or"}
maxLen := 8 // TODO magic number based on longest suffix
offset := len(phraseBuff) - maxLen
if offset < 0 {
offset = 0
}
end := string(phraseBuff[offset:])
for _, s := range suffices {
if strings.HasSuffix(end, " "+s) {
return len(s)
}
}
return -1
}
func main() {
phraseMarkers := map[rune]bool{
';': true,
',': true,
':': true,
'.': true,
'?': true,
'!': true,
//'(': true,
')': true,
//'{': true,
'}': true,
//'[': true,
']': true,
//'\'': true,
//'"': true,
//'“': true,
'”': true,
'=': true,
'`': true,
'-': true,
'|': true,
'>': true,
}
// I want to experiment with treating prepositions and conjunctions as phrase
// markers.
// to do this i would need to check the phraseBuff when I check phraseMarkers and then split accordingly
s := bufio.NewScanner(os.Stdin)
phraseBuff := []byte{}
printed := false
for s.Scan() {
text := strings.TrimSpace(s.Text())
for i, r := range text {
if ok := phraseMarkers[r]; ok {
if len(phraseBuff) >= 10 {
cleaned := clean(phraseBuff)
if len(cleaned) > 0 {
fmt.Println(cleaned)
printed = true
}
}
if !printed {
//fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff))
}
printed = false
phraseBuff = []byte{}
} else if v := conjPrep(phraseBuff, r); v > 0 {
// TODO erase or keep? starting with erase.
phraseBuff = phraseBuff[0 : len(phraseBuff)-v]
// TODO this pasta is copied
if len(phraseBuff) >= 10 {
cleaned := clean(phraseBuff)
if len(cleaned) > 0 {
fmt.Println(cleaned)
printed = true
}
}
if !printed {
//fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff))
}
printed = false
phraseBuff = []byte{}
} else {
asStr := string(phraseBuff)
if r == ' ' && strings.HasSuffix(asStr, " ") {
continue
}
if i == 0 && len(phraseBuff) > 0 && phraseBuff[len(phraseBuff)-1] != ' ' && r != ' ' {
phraseBuff = append(phraseBuff, byte(' '))
}
phraseBuff = append(phraseBuff, byte(r))
}
}
}
}
func isAlpha(r rune) bool {
alphaChars := map[rune]bool{
'a': true,
'b': true,
'c': true,
'd': true,
'e': true,
'f': true,
'g': true,
'h': true,
'i': true,
'j': true,
'k': true,
'l': true,
'm': true,
'n': true,
'o': true,
'p': true,
'q': true,
'r': true,
's': true,
't': true,
'u': true,
'v': true,
'w': true,
'x': true,
'y': true,
'z': true,
}
lookup := strings.ToLower(string(r))
return alphaChars[rune(lookup[0])]
}
func alphaPercent(s string) float64 {
total := 0.0
alpha := 0.0
for _, r := range s {
total++
if isAlpha(r) {
alpha++
}
}
return 100 * (alpha / total)
}
func clean(bs []byte) string {
s := string(bs)
s = strings.ReplaceAll(s, "", "'")
s = strings.ReplaceAll(s, "\"", "")
s = strings.ReplaceAll(s, "(", "")
s = strings.ReplaceAll(s, "[", "")
s = strings.ReplaceAll(s, "{", "")
s = strings.ReplaceAll(s, "<", "")
s = strings.ReplaceAll(s, "_", "")
s = strings.ReplaceAll(s, "*", "")
s = strings.TrimSpace(s)
s = strings.ToLower(s)
if alphaPercent(s) < 50.0 {
return ""
}
return s
}