202 lines
3.2 KiB
Go
202 lines
3.2 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
const (
|
|
// TODO LOWERCASE
|
|
// TODO NUMBERS
|
|
src = "/home/vilmibm/geocities/%s/geocities/YAHOOIDS"
|
|
userDirPattern = "/home/vilmibm/geocities/*/geocities/YAHOOIDS/?/?/*"
|
|
t = "/home/vilmibm/gc"
|
|
)
|
|
|
|
func main() {
|
|
userDirs := make(chan string)
|
|
var wg sync.WaitGroup
|
|
|
|
walkFn := func(s string, d os.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !d.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
isUserDir, err := filepath.Match(userDirPattern, s)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if isUserDir {
|
|
userDirs <- s
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
go func() {
|
|
dirs := []string{"UPPERCASE", "LOWERCASE", "NUMBERS"}
|
|
for _, dir := range dirs {
|
|
err := filepath.WalkDir(fmt.Sprintf(src, dir), walkFn)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
close(userDirs)
|
|
}()
|
|
|
|
totalUserDirs := 0
|
|
|
|
for ud := range userDirs {
|
|
totalUserDirs++
|
|
wg.Add(1)
|
|
go processUserDir(&wg, ud)
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
fmt.Printf("processed %d user dirs\n", totalUserDirs)
|
|
}
|
|
|
|
func processUserDir(wg *sync.WaitGroup, ud string) {
|
|
defer wg.Done()
|
|
|
|
var outFile *os.File
|
|
walkFn := func(s string, d os.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
if ignoreSuffix(s) {
|
|
return nil
|
|
}
|
|
fmt.Printf("READING: %s\n", s)
|
|
f, err := os.Open(s)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
if !wantSuffix(s) {
|
|
// if it's not a text NOR an ignore suffix, try and guess if text
|
|
|
|
// cribbed from godoc's source
|
|
var buf [1024]byte
|
|
n, err := f.Read(buf[0:])
|
|
if err != nil {
|
|
fmt.Printf("\terror reading file: %s\n", err.Error())
|
|
return nil
|
|
}
|
|
|
|
if !IsText(buf[0:n]) {
|
|
fmt.Printf("NOT TEXT: %s\n", s)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
if outFile == nil {
|
|
outFile, err = os.Create(filepath.Join(t, filepath.Base(ud)))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
all, err := os.ReadFile(s)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
wrote, _ := outFile.Write(all)
|
|
fmt.Printf("WROTE: %s (%d bytes)\n", s, wrote)
|
|
|
|
return nil
|
|
}
|
|
|
|
err := filepath.WalkDir(ud, walkFn)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if outFile != nil {
|
|
outFile.Close()
|
|
}
|
|
}
|
|
|
|
// from godoc source
|
|
func IsText(s []byte) bool {
|
|
const max = 1024 // at least utf8.UTFMax
|
|
if len(s) > max {
|
|
s = s[0:max]
|
|
}
|
|
for i, c := range string(s) {
|
|
if i+utf8.UTFMax > len(s) {
|
|
// last char may be incomplete - ignore
|
|
break
|
|
}
|
|
if c == 0xFFFD || c < ' ' && c != '\n' && c != '\t' && c != '\f' {
|
|
// decoding error or control character - not a text file
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
var ignoreSuffices []string
|
|
var wantSuffices []string
|
|
|
|
func init() {
|
|
ignoreSuffices = []string{
|
|
"jpg",
|
|
"jpeg",
|
|
"gif",
|
|
"js",
|
|
"css",
|
|
"mp3",
|
|
"wav",
|
|
"midi",
|
|
"JPG",
|
|
"JPEG",
|
|
"GIF",
|
|
"JS",
|
|
"CSS",
|
|
"MP3",
|
|
"WAV",
|
|
"MIDI",
|
|
}
|
|
wantSuffices = []string{
|
|
"html",
|
|
"htm",
|
|
"txt",
|
|
"HTML",
|
|
"HTM",
|
|
"TXT",
|
|
}
|
|
}
|
|
|
|
func ignoreSuffix(p string) bool {
|
|
for _, s := range ignoreSuffices {
|
|
if strings.HasSuffix(p, s) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func wantSuffix(p string) bool {
|
|
for _, s := range wantSuffices {
|
|
if strings.HasSuffix(p, s) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
|
|
}
|