diff --git a/cmd/phraser/main.go b/cmd/phraser/main.go index 0ee7f00..face2b1 100644 --- a/cmd/phraser/main.go +++ b/cmd/phraser/main.go @@ -35,8 +35,8 @@ func main() { '=': true, '`': true, '-': true, - // TODO try adding |. it breaks up content that we don't generally want, - // anyway, and will lead to short garby phrases being discarded. i think. + '|': true, + '>': true, } s := bufio.NewScanner(os.Stdin) @@ -123,13 +123,15 @@ func clean(bs []byte) string { s := string(bs) s = strings.ReplaceAll(s, "’", "'") s = strings.ReplaceAll(s, "\"", "") + s = strings.ReplaceAll(s, "(", "") + s = strings.ReplaceAll(s, "[", "") + s = strings.ReplaceAll(s, "{", "") + s = strings.ReplaceAll(s, "<", "") + s = strings.ReplaceAll(s, "_", "") + s = strings.ReplaceAll(s, "*", "") s = strings.TrimSpace(s) s = strings.ToLower(s) - // TODO strip _ - // TODO strip * - // TODO strip (,{,[ - if alphaPercent(s) < 50.0 { return "" }