wip
This commit is contained in:
parent
68d33f39fa
commit
6632a3e9de
@ -133,6 +133,7 @@ func worker(opts CutupOpts, paths <-chan string, sources chan<- string) {
|
||||
}
|
||||
if sourceid == "" {
|
||||
sourceid = db.StrToID(title)
|
||||
fmt.Fprintln(os.Stderr, sourceid, p)
|
||||
prefix = sourceid + "\t"
|
||||
of, err = os.Create(path.Join(opts.CutupDir, sourceid))
|
||||
if err != nil {
|
||||
|
@ -4,10 +4,18 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const p = "/home/vilmibm/geocities/UPPERCASE/geocities/YAHOOIDS"
|
||||
const (
|
||||
// TODO LOWERCASE
|
||||
// TODO NUMBERS
|
||||
src = "/home/vilmibm/geocities/%s/geocities/YAHOOIDS"
|
||||
userDirPattern = "/home/vilmibm/geocities/*/geocities/YAHOOIDS/?/?/*"
|
||||
t = "/home/vilmibm/gc"
|
||||
)
|
||||
|
||||
func main() {
|
||||
userDirs := make(chan string)
|
||||
@ -17,11 +25,12 @@ func main() {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
isUserDir, err := filepath.Match("/home/vilmibm/geocities/UPPERCASE/geocities/YAHOOIDS/?/?/*", s)
|
||||
isUserDir, err := filepath.Match(userDirPattern, s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -30,29 +39,163 @@ func main() {
|
||||
userDirs <- s
|
||||
}
|
||||
|
||||
//if len(d.Name()) > 1 && d.IsDir() {
|
||||
// fmt.Printf("%s %s\n", s, d.Name())
|
||||
//}
|
||||
// TODO be able to tell when s is a full path (ie to a file for user)
|
||||
// TODO sniff what kind of file full path points to
|
||||
// TODO if text, read and append to file for that user
|
||||
return nil
|
||||
}
|
||||
|
||||
go func() {
|
||||
err := filepath.WalkDir(p, walkFn)
|
||||
close(userDirs)
|
||||
dirs := []string{"UPPERCASE", "LOWERCASE", "NUMBERS"}
|
||||
for _, dir := range dirs {
|
||||
err := filepath.WalkDir(fmt.Sprintf(src, dir), walkFn)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
close(userDirs)
|
||||
}()
|
||||
|
||||
totalUserDirs := 0
|
||||
|
||||
for ud := range userDirs {
|
||||
totalUserDirs++
|
||||
wg.Add(1)
|
||||
go processUserDir(&wg, ud)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
fmt.Printf("processed %d user dirs\n", totalUserDirs)
|
||||
}
|
||||
|
||||
func processUserDir(wg *sync.WaitGroup, ud string) {
|
||||
defer wg.Done()
|
||||
fmt.Printf("GONNA PROCESS %s\n", ud)
|
||||
|
||||
var outFile *os.File
|
||||
walkFn := func(s string, d os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
if ignoreSuffix(s) {
|
||||
return nil
|
||||
}
|
||||
fmt.Printf("READING: %s\n", s)
|
||||
f, err := os.Open(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
if !wantSuffix(s) {
|
||||
// if it's not a text NOR an ignore suffix, try and guess if text
|
||||
|
||||
// cribbed from godoc's source
|
||||
var buf [1024]byte
|
||||
n, err := f.Read(buf[0:])
|
||||
if err != nil {
|
||||
fmt.Printf("\terror reading file: %s\n", err.Error())
|
||||
return nil
|
||||
}
|
||||
|
||||
if !IsText(buf[0:n]) {
|
||||
fmt.Printf("NOT TEXT: %s\n", s)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if outFile == nil {
|
||||
outFile, err = os.Create(filepath.Join(t, filepath.Base(ud)))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
all, err := os.ReadFile(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wrote, _ := outFile.Write(all)
|
||||
fmt.Printf("WROTE: %s (%d bytes)\n", s, wrote)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
err := filepath.WalkDir(ud, walkFn)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if outFile != nil {
|
||||
outFile.Close()
|
||||
}
|
||||
}
|
||||
|
||||
// from godoc source
|
||||
func IsText(s []byte) bool {
|
||||
const max = 1024 // at least utf8.UTFMax
|
||||
if len(s) > max {
|
||||
s = s[0:max]
|
||||
}
|
||||
for i, c := range string(s) {
|
||||
if i+utf8.UTFMax > len(s) {
|
||||
// last char may be incomplete - ignore
|
||||
break
|
||||
}
|
||||
if c == 0xFFFD || c < ' ' && c != '\n' && c != '\t' && c != '\f' {
|
||||
// decoding error or control character - not a text file
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var ignoreSuffices []string
|
||||
var wantSuffices []string
|
||||
|
||||
func init() {
|
||||
ignoreSuffices = []string{
|
||||
"jpg",
|
||||
"jpeg",
|
||||
"gif",
|
||||
"js",
|
||||
"css",
|
||||
"mp3",
|
||||
"wav",
|
||||
"midi",
|
||||
"JPG",
|
||||
"JPEG",
|
||||
"GIF",
|
||||
"JS",
|
||||
"CSS",
|
||||
"MP3",
|
||||
"WAV",
|
||||
"MIDI",
|
||||
}
|
||||
wantSuffices = []string{
|
||||
"html",
|
||||
"htm",
|
||||
"txt",
|
||||
"HTML",
|
||||
"HTM",
|
||||
"TXT",
|
||||
}
|
||||
}
|
||||
|
||||
func ignoreSuffix(p string) bool {
|
||||
for _, s := range ignoreSuffices {
|
||||
if strings.HasSuffix(p, s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func wantSuffix(p string) bool {
|
||||
for _, s := range wantSuffices {
|
||||
if strings.HasSuffix(p, s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user