From 1803154a766102a914dbbbf5d937c1b7026f4572 Mon Sep 17 00:00:00 2001 From: nate smith Date: Sun, 28 Apr 2024 01:16:10 -0700 Subject: [PATCH] delete old phraser --- cmd/phraser/main.go | 182 ------------------ .../phraser_test.go => cutup/cutup_test.go | 14 +- 2 files changed, 12 insertions(+), 184 deletions(-) delete mode 100644 cmd/phraser/main.go rename cmd/phraser/phraser_test.go => cutup/cutup_test.go (94%) diff --git a/cmd/phraser/main.go b/cmd/phraser/main.go deleted file mode 100644 index e97493d..0000000 --- a/cmd/phraser/main.go +++ /dev/null @@ -1,182 +0,0 @@ -/* -Given plaintext content on STDIN, emit "phrases" to STDOUT. - -Phrases are a loose, artistic concept. The end goal of a phrase is to be useful fodder as a line in a cut-up poem. - -*/ - -package main - -import ( - "bufio" - "fmt" - "os" - "strings" -) - -func conjPrep(phraseBuff []byte, r rune) int { - if r != ' ' { - return -1 - } - - suffices := []string{"from", "at", "but", "however", "yet", "though", "and", "to", "on", "or"} - maxLen := 8 // TODO magic number based on longest suffix - offset := len(phraseBuff) - maxLen - if offset < 0 { - offset = 0 - } - end := string(phraseBuff[offset:]) - for _, s := range suffices { - if strings.HasSuffix(end, " "+s) { - return len(s) - } - } - return -1 -} - -func main() { - phraseMarkers := map[rune]bool{ - ';': true, - ',': true, - ':': true, - '.': true, - '?': true, - '!': true, - //'(': true, - ')': true, - //'{': true, - '}': true, - //'[': true, - ']': true, - //'\'': true, - //'"': true, - //'“': true, - '”': true, - '=': true, - '`': true, - '-': true, - '|': true, - '>': true, - } - - // I want to experiment with treating prepositions and conjunctions as phrase - // markers. - - // to do this i would need to check the phraseBuff when I check phraseMarkers and then split accordingly - - s := bufio.NewScanner(os.Stdin) - phraseBuff := []byte{} - printed := false - for s.Scan() { - text := strings.TrimSpace(s.Text()) - for i, r := range text { - if ok := phraseMarkers[r]; ok { - if len(phraseBuff) >= 10 { - cleaned := clean(phraseBuff) - if len(cleaned) > 0 { - fmt.Println(cleaned) - printed = true - } - } - if !printed { - //fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff)) - } - printed = false - phraseBuff = []byte{} - } else if v := conjPrep(phraseBuff, r); v > 0 { - // TODO erase or keep? starting with erase. - phraseBuff = phraseBuff[0 : len(phraseBuff)-v] - // TODO this pasta is copied - if len(phraseBuff) >= 10 { - cleaned := clean(phraseBuff) - if len(cleaned) > 0 { - fmt.Println(cleaned) - printed = true - } - } - if !printed { - //fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff)) - } - printed = false - phraseBuff = []byte{} - } else { - asStr := string(phraseBuff) - if r == ' ' && strings.HasSuffix(asStr, " ") { - continue - } - if i == 0 && len(phraseBuff) > 0 && phraseBuff[len(phraseBuff)-1] != ' ' && r != ' ' { - phraseBuff = append(phraseBuff, byte(' ')) - } - phraseBuff = append(phraseBuff, byte(r)) - } - } - } -} - -func isAlpha(r rune) bool { - alphaChars := map[rune]bool{ - 'a': true, - 'b': true, - 'c': true, - 'd': true, - 'e': true, - 'f': true, - 'g': true, - 'h': true, - 'i': true, - 'j': true, - 'k': true, - 'l': true, - 'm': true, - 'n': true, - 'o': true, - 'p': true, - 'q': true, - 'r': true, - 's': true, - 't': true, - 'u': true, - 'v': true, - 'w': true, - 'x': true, - 'y': true, - 'z': true, - } - lookup := strings.ToLower(string(r)) - return alphaChars[rune(lookup[0])] -} - -func alphaPercent(s string) float64 { - total := 0.0 - alpha := 0.0 - - for _, r := range s { - total++ - if isAlpha(r) { - alpha++ - } - } - - return 100 * (alpha / total) -} - -func clean(bs []byte) string { - s := string(bs) - s = strings.ReplaceAll(s, "’", "'") - s = strings.ReplaceAll(s, "\"", "") - s = strings.ReplaceAll(s, "(", "") - s = strings.ReplaceAll(s, "[", "") - s = strings.ReplaceAll(s, "{", "") - s = strings.ReplaceAll(s, "<", "") - s = strings.ReplaceAll(s, "_", "") - s = strings.ReplaceAll(s, "*", "") - s = strings.TrimLeft(s, "'\"") - s = strings.TrimSpace(s) - s = strings.ToLower(s) - - if alphaPercent(s) < 50.0 { - return "" - } - - return s -} diff --git a/cmd/phraser/phraser_test.go b/cutup/cutup_test.go similarity index 94% rename from cmd/phraser/phraser_test.go rename to cutup/cutup_test.go index 45e45fc..961a6eb 100644 --- a/cmd/phraser/phraser_test.go +++ b/cutup/cutup_test.go @@ -1,4 +1,4 @@ -package main +package cutup import "testing" @@ -181,7 +181,7 @@ func Test_clean(t *testing.T) { }{ { name: "all whitespace rejected", - arg: " ", + arg: " ", expected: "", }, { @@ -194,6 +194,16 @@ func Test_clean(t *testing.T) { arg: "cats \"eat\" fish", expected: "cats eat fish", }, + { + name: "leading quote removed", + arg: "'cats eat fish", + expected: "cats eat fish", + }, + { + name: "leading double quote removed", + arg: "\"cats eat fish", + expected: "cats eat fish", + }, { name: "lowered", arg: "Cats Eat Fish",