diff --git a/cmd/phraser/main.go b/cmd/phraser/main.go index 2597de2..254507e 100644 --- a/cmd/phraser/main.go +++ b/cmd/phraser/main.go @@ -16,56 +16,68 @@ import ( func main() { phraseMarkers := map[rune]bool{ - ';': true, - ',': true, - ':': true, - '.': true, - '?': true, - '!': true, - '(': true, - ')': true, - '\'': true, - '{': true, - '}': true, - '[': true, - ']': true, - '“': true, - '”': true, - '=': true, - '`': true, + ';': true, + ',': true, + ':': true, + '.': true, + '?': true, + '!': true, + //'(': true, + ')': true, + //'{': true, + '}': true, + //'[': true, + ']': true, + //'\'': true, + //'"': true, + //'“': true, + '”': true, + '=': true, + '`': true, + '-': true, } s := bufio.NewScanner(os.Stdin) phraseBuff := []byte{} + printed := false for s.Scan() { text := strings.TrimSpace(s.Text()) - seenSpace := false for i, r := range text { - if r == ' ' { - seenSpace = true - } if ok, val := phraseMarkers[r]; ok && val { - if len(phraseBuff) >= 20 && seenSpace { - // TODO QA check for alphabetic content - fmt.Println(strings.TrimSpace(string(phraseBuff))) + if len(phraseBuff) >= 10 { + cleaned := clean(phraseBuff) + if len(cleaned) > 0 { + fmt.Println(cleaned) + printed = true + } } + if !printed { + fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff)) + } + printed = false phraseBuff = []byte{} } else { asStr := string(phraseBuff) if r == ' ' && strings.HasSuffix(asStr, " ") { continue } - phraseBuff = append(phraseBuff, byte(r)) - if i == len(text)-1 && len(phraseBuff) > 0 && !strings.HasSuffix(asStr, " ") { + if i == 0 && len(phraseBuff) > 0 && phraseBuff[len(phraseBuff)-1] != ' ' && r != ' ' { phraseBuff = append(phraseBuff, byte(' ')) } + phraseBuff = append(phraseBuff, byte(r)) } } } } -func clean(s string) string { +func clean(bs []byte) string { + s := string(bs) s = strings.ReplaceAll(s, "’", "'") + s = strings.ReplaceAll(s, "\"", "") + s = strings.TrimSpace(s) + s = strings.ToLower(s) + + // TODO QA check for alphabetism return s }