This commit is contained in:
nate smith 2024-07-01 22:20:39 -07:00
parent 68d33f39fa
commit 6632a3e9de
2 changed files with 157 additions and 13 deletions

View File

@ -133,6 +133,7 @@ func worker(opts CutupOpts, paths <-chan string, sources chan<- string) {
} }
if sourceid == "" { if sourceid == "" {
sourceid = db.StrToID(title) sourceid = db.StrToID(title)
fmt.Fprintln(os.Stderr, sourceid, p)
prefix = sourceid + "\t" prefix = sourceid + "\t"
of, err = os.Create(path.Join(opts.CutupDir, sourceid)) of, err = os.Create(path.Join(opts.CutupDir, sourceid))
if err != nil { if err != nil {

View File

@ -4,10 +4,18 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"sync" "sync"
"unicode/utf8"
) )
const p = "/home/vilmibm/geocities/UPPERCASE/geocities/YAHOOIDS" const (
// TODO LOWERCASE
// TODO NUMBERS
src = "/home/vilmibm/geocities/%s/geocities/YAHOOIDS"
userDirPattern = "/home/vilmibm/geocities/*/geocities/YAHOOIDS/?/?/*"
t = "/home/vilmibm/gc"
)
func main() { func main() {
userDirs := make(chan string) userDirs := make(chan string)
@ -17,11 +25,12 @@ func main() {
if err != nil { if err != nil {
return err return err
} }
if !d.IsDir() { if !d.IsDir() {
return nil return nil
} }
isUserDir, err := filepath.Match("/home/vilmibm/geocities/UPPERCASE/geocities/YAHOOIDS/?/?/*", s) isUserDir, err := filepath.Match(userDirPattern, s)
if err != nil { if err != nil {
return err return err
} }
@ -30,29 +39,163 @@ func main() {
userDirs <- s userDirs <- s
} }
//if len(d.Name()) > 1 && d.IsDir() {
// fmt.Printf("%s %s\n", s, d.Name())
//}
// TODO be able to tell when s is a full path (ie to a file for user)
// TODO sniff what kind of file full path points to
// TODO if text, read and append to file for that user
return nil return nil
} }
go func() { go func() {
err := filepath.WalkDir(p, walkFn) dirs := []string{"UPPERCASE", "LOWERCASE", "NUMBERS"}
close(userDirs) for _, dir := range dirs {
if err != nil { err := filepath.WalkDir(fmt.Sprintf(src, dir), walkFn)
panic(err) if err != nil {
panic(err)
}
} }
close(userDirs)
}() }()
totalUserDirs := 0
for ud := range userDirs { for ud := range userDirs {
totalUserDirs++
wg.Add(1) wg.Add(1)
go processUserDir(&wg, ud) go processUserDir(&wg, ud)
} }
wg.Wait() wg.Wait()
fmt.Printf("processed %d user dirs\n", totalUserDirs)
} }
func processUserDir(wg *sync.WaitGroup, ud string) { func processUserDir(wg *sync.WaitGroup, ud string) {
defer wg.Done() defer wg.Done()
fmt.Printf("GONNA PROCESS %s\n", ud)
var outFile *os.File
walkFn := func(s string, d os.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
if ignoreSuffix(s) {
return nil
}
fmt.Printf("READING: %s\n", s)
f, err := os.Open(s)
if err != nil {
return err
}
defer f.Close()
if !wantSuffix(s) {
// if it's not a text NOR an ignore suffix, try and guess if text
// cribbed from godoc's source
var buf [1024]byte
n, err := f.Read(buf[0:])
if err != nil {
fmt.Printf("\terror reading file: %s\n", err.Error())
return nil
}
if !IsText(buf[0:n]) {
fmt.Printf("NOT TEXT: %s\n", s)
return nil
}
}
if outFile == nil {
outFile, err = os.Create(filepath.Join(t, filepath.Base(ud)))
if err != nil {
panic(err)
}
}
all, err := os.ReadFile(s)
if err != nil {
panic(err)
}
wrote, _ := outFile.Write(all)
fmt.Printf("WROTE: %s (%d bytes)\n", s, wrote)
return nil
}
err := filepath.WalkDir(ud, walkFn)
if err != nil {
panic(err)
}
if outFile != nil {
outFile.Close()
}
}
// from godoc source
func IsText(s []byte) bool {
const max = 1024 // at least utf8.UTFMax
if len(s) > max {
s = s[0:max]
}
for i, c := range string(s) {
if i+utf8.UTFMax > len(s) {
// last char may be incomplete - ignore
break
}
if c == 0xFFFD || c < ' ' && c != '\n' && c != '\t' && c != '\f' {
// decoding error or control character - not a text file
return false
}
}
return true
}
var ignoreSuffices []string
var wantSuffices []string
func init() {
ignoreSuffices = []string{
"jpg",
"jpeg",
"gif",
"js",
"css",
"mp3",
"wav",
"midi",
"JPG",
"JPEG",
"GIF",
"JS",
"CSS",
"MP3",
"WAV",
"MIDI",
}
wantSuffices = []string{
"html",
"htm",
"txt",
"HTML",
"HTM",
"TXT",
}
}
func ignoreSuffix(p string) bool {
for _, s := range ignoreSuffices {
if strings.HasSuffix(p, s) {
return true
}
}
return false
}
func wantSuffix(p string) bool {
for _, s := range wantSuffices {
if strings.HasSuffix(p, s) {
return true
}
}
return false
} }