split on conjugations and prepositions (some of them)
This commit is contained in:
parent
b49138d0ad
commit
d29a80817d
@ -14,6 +14,26 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func conjPrep(phraseBuff []byte, r rune) int {
|
||||
if r != ' ' {
|
||||
return -1
|
||||
}
|
||||
|
||||
suffices := []string{"from", "at", "but", "however", "yet", "though", "and", "to", "on", "or"}
|
||||
maxLen := 8 // TODO magic number based on longest suffix
|
||||
offset := len(phraseBuff) - maxLen
|
||||
if offset < 0 {
|
||||
offset = 0
|
||||
}
|
||||
end := string(phraseBuff[offset:])
|
||||
for _, s := range suffices {
|
||||
if strings.HasSuffix(end, " "+s) {
|
||||
return len(s)
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func main() {
|
||||
phraseMarkers := map[rune]bool{
|
||||
';': true,
|
||||
@ -39,13 +59,34 @@ func main() {
|
||||
'>': true,
|
||||
}
|
||||
|
||||
// I want to experiment with treating prepositions and conjunctions as phrase
|
||||
// markers.
|
||||
|
||||
// to do this i would need to check the phraseBuff when I check phraseMarkers and then split accordingly
|
||||
|
||||
s := bufio.NewScanner(os.Stdin)
|
||||
phraseBuff := []byte{}
|
||||
printed := false
|
||||
for s.Scan() {
|
||||
text := strings.TrimSpace(s.Text())
|
||||
for i, r := range text {
|
||||
if ok, val := phraseMarkers[r]; ok && val {
|
||||
if ok := phraseMarkers[r]; ok {
|
||||
if len(phraseBuff) >= 10 {
|
||||
cleaned := clean(phraseBuff)
|
||||
if len(cleaned) > 0 {
|
||||
fmt.Println(cleaned)
|
||||
printed = true
|
||||
}
|
||||
}
|
||||
if !printed {
|
||||
//fmt.Fprintf(os.Stderr, "SKIP: %s\n", string(phraseBuff))
|
||||
}
|
||||
printed = false
|
||||
phraseBuff = []byte{}
|
||||
} else if v := conjPrep(phraseBuff, r); v > 0 {
|
||||
// TODO erase or keep? starting with erase.
|
||||
phraseBuff = phraseBuff[0 : len(phraseBuff)-v]
|
||||
// TODO this pasta is copied
|
||||
if len(phraseBuff) >= 10 {
|
||||
cleaned := clean(phraseBuff)
|
||||
if len(cleaned) > 0 {
|
||||
|
@ -2,6 +2,98 @@ package main
|
||||
|
||||
import "testing"
|
||||
|
||||
func Test_conjPrep(t *testing.T) {
|
||||
type args struct {
|
||||
buff []byte
|
||||
r rune
|
||||
}
|
||||
cs := []struct {
|
||||
name string
|
||||
args args
|
||||
expected int
|
||||
}{
|
||||
{
|
||||
name: "empty buffer",
|
||||
args: args{[]byte(""), ' '},
|
||||
expected: -1,
|
||||
},
|
||||
{
|
||||
name: "not a space yet",
|
||||
args: args{[]byte("saccharine juice from"), 'x'},
|
||||
expected: -1,
|
||||
},
|
||||
{
|
||||
name: "from",
|
||||
args: args{[]byte("i will eat from"), ' '},
|
||||
expected: 4,
|
||||
},
|
||||
{
|
||||
name: "no preceding space",
|
||||
args: args{[]byte("wakkabarblurpfrom"), ' '},
|
||||
expected: -1,
|
||||
},
|
||||
{
|
||||
name: "however",
|
||||
args: args{[]byte("my eyes are hollow, however"), ' '},
|
||||
expected: 7,
|
||||
},
|
||||
{
|
||||
name: "at",
|
||||
args: args{[]byte("there will be no more joy at"), ' '},
|
||||
expected: 2,
|
||||
},
|
||||
{
|
||||
name: "but",
|
||||
args: args{[]byte("i buried him, but"), ' '},
|
||||
expected: 3,
|
||||
},
|
||||
{
|
||||
name: "yet",
|
||||
args: args{[]byte("the echoes quited yet"), ' '},
|
||||
expected: 3,
|
||||
},
|
||||
{
|
||||
name: "though",
|
||||
args: args{[]byte("my eyes were closed though"), ' '},
|
||||
expected: 6,
|
||||
},
|
||||
{
|
||||
name: "and",
|
||||
args: args{[]byte("i raised the torch and"), ' '},
|
||||
expected: 3,
|
||||
},
|
||||
{
|
||||
name: "to",
|
||||
args: args{[]byte("thousands more to"), ' '},
|
||||
expected: 2,
|
||||
},
|
||||
{
|
||||
name: "on",
|
||||
args: args{[]byte("bringing rain down on"), ' '},
|
||||
expected: 2,
|
||||
},
|
||||
{
|
||||
name: "no match",
|
||||
args: args{[]byte("i raised the torch"), ' '},
|
||||
expected: -1,
|
||||
},
|
||||
{
|
||||
name: "or",
|
||||
args: args{[]byte("whether good or"), ' '},
|
||||
expected: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cs {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
result := conjPrep(c.args.buff, c.args.r)
|
||||
if result != c.expected {
|
||||
t.Errorf("got '%v', expected '%v'", result, c.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_isAlpha(t *testing.T) {
|
||||
cs := []struct {
|
||||
arg rune
|
||||
|
Loading…
x
Reference in New Issue
Block a user