untested: partitioned corpora

This commit is contained in:
nate smith 2024-04-28 20:47:54 -07:00
parent 6d3546f5ad
commit 591d169fc0

View File

@ -55,6 +55,18 @@ func Ingest(o IngestOpts) error {
defer idx.Close()
corpusid := db.StrToID(o.Corpus)
tablename := fmt.Sprintf("phrases_%s", corpusid)
_, err = conn.Exec(context.Background(),
`CREATE TABLE $1 (
id SERIAL PRIMARY KEY,
sourceid char(7) NOT NULL,
phrase TEXT,
FOREIGN KEY (sourceid) REFERENCES sources(id)
)`, tablename)
if err != nil {
return fmt.Errorf("could not create table '%s': %w", tablename, err)
}
_, err = conn.Exec(context.Background(),
"INSERT INTO corpora (id, name) VALUES ($1, $2) ON CONFLICT DO NOTHING",
corpusid, o.Corpus)
@ -85,7 +97,7 @@ func Ingest(o IngestOpts) error {
continue
}
p := path.Join(cutupDir, e)
sql := fmt.Sprintf("COPY phrases(sourceid, phrase) FROM '%s'", p)
sql := fmt.Sprintf("COPY %s(sourceid, phrase) FROM '%s'", tablename, p)
_, err = conn.Exec(context.Background(), sql)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to ingest '%s': %s\n", p, err.Error())