diff --git a/cmd/ingest.go b/cmd/ingest.go
index 3673d9e..b859db5 100644
--- a/cmd/ingest.go
+++ b/cmd/ingest.go
@@ -1,24 +1,65 @@
 package cmd
 
 import (
-	"fmt"
-
 	"github.com/spf13/cobra"
+	"github.com/vilmibm/trunkless/db"
 	"github.com/vilmibm/trunkless/ingest"
 )
 
 func init() {
+	// TODO option for cutupDir
+
+	ingestCmd.Flags().StringP("cutupdir", "d", "", "directory to files produced by cutup cmd")
+	ingestCmd.MarkFlagRequired("cutupdir")
 	rootCmd.AddCommand(ingestCmd)
 }
 
 var ingestCmd = &cobra.Command{
-	Use:  "ingest corpusname",
-	Args: cobra.ExactArgs(1),
+	Use:   "ingest corpusname",
+	Short: "ingest already cut-up corpora from disk into database",
+	Args:  cobra.ExactArgs(1),
 	RunE: func(cmd *cobra.Command, args []string) error {
-		switch args[0] {
-		case "gutenberg":
-			return ingest.IngestGut()
+		cutupDir := cmd.Flags().Lookup("cutupdir").Value.String()
+		corpus := args[0]
+
+		conn, err := db.Connect()
+		if err != nil {
+			return err
 		}
-		return fmt.Errorf("corpus unknown: %s", args[0])
+
+		opts := ingest.IngestOpts{
+			Conn:     conn,
+			CutupDir: cutupDir,
+			Corpus:   corpus,
+		}
+
+		return ingest.Ingest(opts)
 	},
 }
+
+// thoughts
+//
+// having multitenancy in the db makes phrase selection harder. i need to determine the ID offsets for each corpus's phrase list.
+// currently waiting on an explain analyze for:
+// explain analyze select min(p.id),max(p.id) from phrases p join sources s on s.id = p.sourceid and s.corpusid='cb20c3e';
+// planning time 12ms
+// exec time 91s
+// trying again with inner join which was fast but not noticeably; the explain looks the same (which makes sense--no rows with null allowed are involved).
+
+// if i stick with this i can expect several minutes(!) of startup time to the server; however, since i'm generating ID lookups outside of sql, my lookup should still be O(1).
+// some options:
+// - change everything so every corpus is in its own table:
+//   ${corpus}_phrases: id, sourceid, text
+//   corpora: id, name
+//   sources: id, corpusid, name
+// - cache the result of the min/max id analysis. i could do this to disk or in the db...i would probably do it in the db:
+//   id_ranges: corpusid, minid, maxid
+
+// thinking about this more, as i add corpora the phrases table is going to
+// grow into the billions (assuming other sources are similar in scale to
+// gutenberg). turns out postgresql has table partitioning but idk if that will
+// help me since the ID space will be shared.
+
+// having a table per corpus's phrases will also make tearing down corpora easier -- otherwise i have to regen the entire phrases table to remove gaps in ID space.
+
+// so it's settled; I'm going to retool for table-per-corpus.
diff --git a/db/db.go b/db/db.go
index f6a3ab5..709d8e4 100644
--- a/db/db.go
+++ b/db/db.go
@@ -2,6 +2,7 @@ package db
 
 import (
 	"context"
+	"crypto/sha1"
 	"fmt"
 
 	"github.com/jackc/pgx/v5"
@@ -13,6 +14,10 @@ const (
 	MaxID = 345507789
 )
 
+func StrToID(s string) string {
+	return fmt.Sprintf("%x", sha1.Sum([]byte(s)))[0:6]
+}
+
 func Connect() (*pgx.Conn, error) {
 	conn, err := pgx.Connect(context.Background(), "")
 	if err != nil {
diff --git a/ingest/ingest.go b/ingest/ingest.go
index ffff006..27ad63f 100644
--- a/ingest/ingest.go
+++ b/ingest/ingest.go
@@ -8,6 +8,7 @@ import (
 	"path"
 	"strings"
 
+	"github.com/jackc/pgx/v5"
 	"github.com/vilmibm/trunkless/db"
 )
 
@@ -16,7 +17,9 @@ const cutupDir = "/home/vilmibm/pg_plaintext/cutup"
 // TODO
 // - [X] finalize gutenberg ingestion
 // - [ ] clean up commands
-// - [ ] clean up repo
+// 	- [X] get down to just ingest/cutup/serve
+//  - [ ] add arguments for generalizing
+// - [X] clean up repo
 // - [ ] push and deploy to town with new pg db
 // - [ ] gamefaqs extraction
 // - [ ] corpus selector
@@ -25,24 +28,20 @@ const cutupDir = "/home/vilmibm/pg_plaintext/cutup"
 // - [ ] blog post
 // - [ ] launch
 
-func IngestGut() error {
-	conn, err := db.Connect()
-	if err != nil {
-		return err
-	}
-	defer conn.Close(context.Background())
+type IngestOpts struct {
+	Conn     *pgx.Conn
+	Corpus   string
+	CutupDir string
+}
 
-	dir, err := os.Open(cutupDir)
+func Ingest(o IngestOpts) error {
+	conn := o.Conn
+
+	dir, err := os.Open(o.CutupDir)
 	if err != nil {
 		return fmt.Errorf("could not open %s: %w", cutupDir, err)
 	}
-
-	// echo gutenberg | sha1sum | head -c7
-	corpusid := "cb20c3e"
-	_, err = conn.Exec(context.Background(), "INSERT INTO corpora (id, name) VALUES ($1, $2) ON CONFLICT DO NOTHING", corpusid, "gutenberg")
-	if err != nil {
-		return fmt.Errorf("failed to create gutenberg corpus: %w", err)
-	}
+	defer dir.Close()
 
 	entries, err := dir.Readdirnames(-1)
 	if err != nil {
@@ -53,7 +52,15 @@ func IngestGut() error {
 	if err != nil {
 		return fmt.Errorf("failed to open source index: %w", err)
 	}
+	defer idx.Close()
 
+	corpusid := db.StrToID(o.Corpus)
+	_, err = conn.Exec(context.Background(),
+		"INSERT INTO corpora (id, name) VALUES ($1, $2) ON CONFLICT DO NOTHING",
+		corpusid, o.Corpus)
+	if err != nil {
+		return fmt.Errorf("failed to create '%s' corpus: %w", o.Corpus, err)
+	}
 	tx, err := conn.Begin(context.Background())
 	if err != nil {
 		return fmt.Errorf("could not open transaction: %w", err)
@@ -84,6 +91,5 @@ func IngestGut() error {
 			fmt.Fprintf(os.Stderr, "failed to ingest '%s': %s\n", p, err.Error())
 		}
 	}
-
 	return nil
 }