untested: partitioned corpora
This commit is contained in:
parent
6d3546f5ad
commit
591d169fc0
@ -55,6 +55,18 @@ func Ingest(o IngestOpts) error {
|
|||||||
defer idx.Close()
|
defer idx.Close()
|
||||||
|
|
||||||
corpusid := db.StrToID(o.Corpus)
|
corpusid := db.StrToID(o.Corpus)
|
||||||
|
tablename := fmt.Sprintf("phrases_%s", corpusid)
|
||||||
|
_, err = conn.Exec(context.Background(),
|
||||||
|
`CREATE TABLE $1 (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
sourceid char(7) NOT NULL,
|
||||||
|
phrase TEXT,
|
||||||
|
FOREIGN KEY (sourceid) REFERENCES sources(id)
|
||||||
|
)`, tablename)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("could not create table '%s': %w", tablename, err)
|
||||||
|
}
|
||||||
|
|
||||||
_, err = conn.Exec(context.Background(),
|
_, err = conn.Exec(context.Background(),
|
||||||
"INSERT INTO corpora (id, name) VALUES ($1, $2) ON CONFLICT DO NOTHING",
|
"INSERT INTO corpora (id, name) VALUES ($1, $2) ON CONFLICT DO NOTHING",
|
||||||
corpusid, o.Corpus)
|
corpusid, o.Corpus)
|
||||||
@ -85,7 +97,7 @@ func Ingest(o IngestOpts) error {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
p := path.Join(cutupDir, e)
|
p := path.Join(cutupDir, e)
|
||||||
sql := fmt.Sprintf("COPY phrases(sourceid, phrase) FROM '%s'", p)
|
sql := fmt.Sprintf("COPY %s(sourceid, phrase) FROM '%s'", tablename, p)
|
||||||
_, err = conn.Exec(context.Background(), sql)
|
_, err = conn.Exec(context.Background(), sql)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "failed to ingest '%s': %s\n", p, err.Error())
|
fmt.Fprintf(os.Stderr, "failed to ingest '%s': %s\n", p, err.Error())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user