feat(scanner): add folder hash for smarter quick scan change detection (#4220)

* Simplify folder hash migration

* fix hashing lint

* refactor

Signed-off-by: Deluan <deluan@navidrome.org>

* Update scanner/folder_entry.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Deluan Quintão
2025-06-12 13:17:34 -04:00
committed by GitHub
parent 050aa173cc
commit 0d74d36cec
7 changed files with 559 additions and 70 deletions

View File

@@ -0,0 +1,21 @@
package migrations
import (
"context"
"database/sql"
"github.com/pressly/goose/v3"
)
func init() {
goose.AddMigrationContext(upAddFolderHash, downAddFolderHash)
}
func upAddFolderHash(ctx context.Context, tx *sql.Tx) error {
_, err := tx.ExecContext(ctx, `alter table folder add column hash varchar default '' not null;`)
return err
}
func downAddFolderHash(ctx context.Context, tx *sql.Tx) error {
return nil
}

View File

@@ -25,6 +25,7 @@ type Folder struct {
NumPlaylists int `structs:"num_playlists"`
ImageFiles []string `structs:"image_files"`
ImagesUpdatedAt time.Time `structs:"images_updated_at"`
Hash string `structs:"hash"`
Missing bool `structs:"missing"`
UpdateAt time.Time `structs:"updated_at"`
CreatedAt time.Time `structs:"created_at"`
@@ -74,12 +75,17 @@ func NewFolder(lib Library, folderPath string) *Folder {
type FolderCursor iter.Seq2[Folder, error]
type FolderUpdateInfo struct {
UpdatedAt time.Time
Hash string
}
type FolderRepository interface {
Get(id string) (*Folder, error)
GetByPath(lib Library, path string) (*Folder, error)
GetAll(...QueryOptions) ([]Folder, error)
CountAll(...QueryOptions) (int64, error)
GetLastUpdates(lib Library) (map[string]time.Time, error)
GetLastUpdates(lib Library) (map[string]FolderUpdateInfo, error)
Put(*Folder) error
MarkMissing(missing bool, ids ...string) error
GetTouchedWithPlaylists() (FolderCursor, error)

View File

@@ -89,19 +89,20 @@ func (r folderRepository) CountAll(opt ...model.QueryOptions) (int64, error) {
return r.count(sq)
}
func (r folderRepository) GetLastUpdates(lib model.Library) (map[string]time.Time, error) {
sq := r.newSelect().Columns("id", "updated_at").Where(Eq{"library_id": lib.ID, "missing": false})
func (r folderRepository) GetLastUpdates(lib model.Library) (map[string]model.FolderUpdateInfo, error) {
sq := r.newSelect().Columns("id", "updated_at", "hash").Where(Eq{"library_id": lib.ID, "missing": false})
var res []struct {
ID string
UpdatedAt time.Time
Hash string
}
err := r.queryAll(sq, &res)
if err != nil {
return nil, err
}
m := make(map[string]time.Time, len(res))
m := make(map[string]model.FolderUpdateInfo, len(res))
for _, f := range res {
m[f.ID] = f.UpdatedAt
m[f.ID] = model.FolderUpdateInfo{UpdatedAt: f.UpdatedAt, Hash: f.Hash}
}
return m, nil
}

96
scanner/folder_entry.go Normal file
View File

@@ -0,0 +1,96 @@
package scanner
import (
"crypto/md5"
"encoding/hex"
"fmt"
"io"
"io/fs"
"maps"
"slices"
"strings"
"time"
"github.com/navidrome/navidrome/core"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/chrono"
)
func newFolderEntry(job *scanJob, path string) *folderEntry {
id := model.FolderID(job.lib, path)
info := job.popLastUpdate(id)
f := &folderEntry{
id: id,
job: job,
path: path,
audioFiles: make(map[string]fs.DirEntry),
imageFiles: make(map[string]fs.DirEntry),
albumIDMap: make(map[string]string),
updTime: info.UpdatedAt,
prevHash: info.Hash,
}
return f
}
type folderEntry struct {
job *scanJob
elapsed chrono.Meter
path string // Full path
id string // DB ID
modTime time.Time // From FS
updTime time.Time // from DB
audioFiles map[string]fs.DirEntry
imageFiles map[string]fs.DirEntry
numPlaylists int
numSubFolders int
imagesUpdatedAt time.Time
prevHash string // Previous hash from DB
tracks model.MediaFiles
albums model.Albums
albumIDMap map[string]string
artists model.Artists
tags model.TagList
missingTracks []*model.MediaFile
}
func (f *folderEntry) hasNoFiles() bool {
return len(f.audioFiles) == 0 && len(f.imageFiles) == 0 && f.numPlaylists == 0 && f.numSubFolders == 0
}
func (f *folderEntry) isNew() bool {
return f.updTime.IsZero()
}
func (f *folderEntry) toFolder() *model.Folder {
folder := model.NewFolder(f.job.lib, f.path)
folder.NumAudioFiles = len(f.audioFiles)
if core.InPlaylistsPath(*folder) {
folder.NumPlaylists = f.numPlaylists
}
folder.ImageFiles = slices.Collect(maps.Keys(f.imageFiles))
folder.ImagesUpdatedAt = f.imagesUpdatedAt
folder.Hash = f.hash()
return folder
}
func (f *folderEntry) hash() string {
audioKeys := slices.Collect(maps.Keys(f.audioFiles))
slices.Sort(audioKeys)
imageKeys := slices.Collect(maps.Keys(f.imageFiles))
slices.Sort(imageKeys)
h := md5.New()
_, _ = io.WriteString(h, f.modTime.UTC().String())
_, _ = io.WriteString(h, strings.Join(audioKeys, ","))
_, _ = io.WriteString(h, strings.Join(imageKeys, ","))
fmt.Fprintf(h, "%d-%d", f.numPlaylists, f.numSubFolders)
_, _ = io.WriteString(h, f.imagesUpdatedAt.UTC().String())
return hex.EncodeToString(h.Sum(nil))
}
func (f *folderEntry) isOutdated() bool {
if f.job.lib.FullScanInProgress && f.updTime.Before(f.job.lib.LastScanStartedAt) {
return true
}
return f.prevHash != f.hash()
}

View File

@@ -0,0 +1,428 @@
package scanner
import (
"io/fs"
"time"
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/conf/configtest"
"github.com/navidrome/navidrome/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("folder_entry", func() {
var (
lib model.Library
job *scanJob
path string
)
BeforeEach(func() {
DeferCleanup(configtest.SetupConfig())
lib = model.Library{
ID: 500,
Path: "/music",
LastScanStartedAt: time.Now().Add(-1 * time.Hour),
FullScanInProgress: false,
}
job = &scanJob{
lib: lib,
lastUpdates: make(map[string]model.FolderUpdateInfo),
}
path = "test/folder"
})
Describe("newFolderEntry", func() {
It("creates a new folder entry with correct initialization", func() {
folderID := model.FolderID(lib, path)
updateInfo := model.FolderUpdateInfo{
UpdatedAt: time.Now().Add(-30 * time.Minute),
Hash: "previous-hash",
}
job.lastUpdates[folderID] = updateInfo
entry := newFolderEntry(job, path)
Expect(entry.id).To(Equal(folderID))
Expect(entry.job).To(Equal(job))
Expect(entry.path).To(Equal(path))
Expect(entry.audioFiles).To(BeEmpty())
Expect(entry.imageFiles).To(BeEmpty())
Expect(entry.albumIDMap).To(BeEmpty())
Expect(entry.updTime).To(Equal(updateInfo.UpdatedAt))
Expect(entry.prevHash).To(Equal(updateInfo.Hash))
})
It("creates a new folder entry with zero time when no previous update exists", func() {
entry := newFolderEntry(job, path)
Expect(entry.updTime).To(BeZero())
Expect(entry.prevHash).To(BeEmpty())
})
It("removes the lastUpdate from the job after popping", func() {
folderID := model.FolderID(lib, path)
updateInfo := model.FolderUpdateInfo{
UpdatedAt: time.Now().Add(-30 * time.Minute),
Hash: "previous-hash",
}
job.lastUpdates[folderID] = updateInfo
newFolderEntry(job, path)
Expect(job.lastUpdates).ToNot(HaveKey(folderID))
})
})
Describe("folderEntry methods", func() {
var entry *folderEntry
BeforeEach(func() {
entry = newFolderEntry(job, path)
})
Describe("hasNoFiles", func() {
It("returns true when folder has no files or subfolders", func() {
Expect(entry.hasNoFiles()).To(BeTrue())
})
It("returns false when folder has audio files", func() {
entry.audioFiles["test.mp3"] = &fakeDirEntry{name: "test.mp3"}
Expect(entry.hasNoFiles()).To(BeFalse())
})
It("returns false when folder has image files", func() {
entry.imageFiles["cover.jpg"] = &fakeDirEntry{name: "cover.jpg"}
Expect(entry.hasNoFiles()).To(BeFalse())
})
It("returns false when folder has playlists", func() {
entry.numPlaylists = 1
Expect(entry.hasNoFiles()).To(BeFalse())
})
It("returns false when folder has subfolders", func() {
entry.numSubFolders = 1
Expect(entry.hasNoFiles()).To(BeFalse())
})
It("returns false when folder has multiple types of content", func() {
entry.audioFiles["test.mp3"] = &fakeDirEntry{name: "test.mp3"}
entry.imageFiles["cover.jpg"] = &fakeDirEntry{name: "cover.jpg"}
entry.numPlaylists = 2
entry.numSubFolders = 3
Expect(entry.hasNoFiles()).To(BeFalse())
})
})
Describe("isNew", func() {
It("returns true when updTime is zero", func() {
entry.updTime = time.Time{}
Expect(entry.isNew()).To(BeTrue())
})
It("returns false when updTime is not zero", func() {
entry.updTime = time.Now()
Expect(entry.isNew()).To(BeFalse())
})
})
Describe("toFolder", func() {
BeforeEach(func() {
entry.audioFiles = map[string]fs.DirEntry{
"song1.mp3": &fakeDirEntry{name: "song1.mp3"},
"song2.mp3": &fakeDirEntry{name: "song2.mp3"},
}
entry.imageFiles = map[string]fs.DirEntry{
"cover.jpg": &fakeDirEntry{name: "cover.jpg"},
"folder.png": &fakeDirEntry{name: "folder.png"},
}
entry.numPlaylists = 3
entry.imagesUpdatedAt = time.Now()
})
It("converts folder entry to model.Folder correctly", func() {
folder := entry.toFolder()
Expect(folder.LibraryID).To(Equal(lib.ID))
Expect(folder.ID).To(Equal(entry.id))
Expect(folder.NumAudioFiles).To(Equal(2))
Expect(folder.ImageFiles).To(ConsistOf("cover.jpg", "folder.png"))
Expect(folder.ImagesUpdatedAt).To(Equal(entry.imagesUpdatedAt))
Expect(folder.Hash).To(Equal(entry.hash()))
})
It("sets NumPlaylists when folder is in playlists path", func() {
// Mock InPlaylistsPath to return true by setting empty PlaylistsPath
originalPath := conf.Server.PlaylistsPath
conf.Server.PlaylistsPath = ""
DeferCleanup(func() { conf.Server.PlaylistsPath = originalPath })
folder := entry.toFolder()
Expect(folder.NumPlaylists).To(Equal(3))
})
It("does not set NumPlaylists when folder is not in playlists path", func() {
// Mock InPlaylistsPath to return false by setting a different path
originalPath := conf.Server.PlaylistsPath
conf.Server.PlaylistsPath = "different/path"
DeferCleanup(func() { conf.Server.PlaylistsPath = originalPath })
folder := entry.toFolder()
Expect(folder.NumPlaylists).To(BeZero())
})
})
Describe("hash", func() {
BeforeEach(func() {
entry.modTime = time.Date(2023, 1, 15, 12, 0, 0, 0, time.UTC)
entry.imagesUpdatedAt = time.Date(2023, 1, 16, 14, 30, 0, 0, time.UTC)
})
It("produces deterministic hash for same content", func() {
entry.audioFiles = map[string]fs.DirEntry{
"b.mp3": &fakeDirEntry{name: "b.mp3"},
"a.mp3": &fakeDirEntry{name: "a.mp3"},
}
entry.imageFiles = map[string]fs.DirEntry{
"z.jpg": &fakeDirEntry{name: "z.jpg"},
"x.png": &fakeDirEntry{name: "x.png"},
}
entry.numPlaylists = 2
entry.numSubFolders = 3
hash1 := entry.hash()
// Reverse order of maps
entry.audioFiles = map[string]fs.DirEntry{
"a.mp3": &fakeDirEntry{name: "a.mp3"},
"b.mp3": &fakeDirEntry{name: "b.mp3"},
}
entry.imageFiles = map[string]fs.DirEntry{
"x.png": &fakeDirEntry{name: "x.png"},
"z.jpg": &fakeDirEntry{name: "z.jpg"},
}
hash2 := entry.hash()
Expect(hash1).To(Equal(hash2))
})
It("produces different hash when audio files change", func() {
entry.audioFiles = map[string]fs.DirEntry{
"song1.mp3": &fakeDirEntry{name: "song1.mp3"},
}
hash1 := entry.hash()
entry.audioFiles["song2.mp3"] = &fakeDirEntry{name: "song2.mp3"}
hash2 := entry.hash()
Expect(hash1).ToNot(Equal(hash2))
})
It("produces different hash when image files change", func() {
entry.imageFiles = map[string]fs.DirEntry{
"cover.jpg": &fakeDirEntry{name: "cover.jpg"},
}
hash1 := entry.hash()
entry.imageFiles["folder.png"] = &fakeDirEntry{name: "folder.png"}
hash2 := entry.hash()
Expect(hash1).ToNot(Equal(hash2))
})
It("produces different hash when modification time changes", func() {
hash1 := entry.hash()
entry.modTime = entry.modTime.Add(1 * time.Hour)
hash2 := entry.hash()
Expect(hash1).ToNot(Equal(hash2))
})
It("produces different hash when playlist count changes", func() {
hash1 := entry.hash()
entry.numPlaylists = 5
hash2 := entry.hash()
Expect(hash1).ToNot(Equal(hash2))
})
It("produces different hash when subfolder count changes", func() {
hash1 := entry.hash()
entry.numSubFolders = 3
hash2 := entry.hash()
Expect(hash1).ToNot(Equal(hash2))
})
It("produces different hash when images updated time changes", func() {
hash1 := entry.hash()
entry.imagesUpdatedAt = entry.imagesUpdatedAt.Add(2 * time.Hour)
hash2 := entry.hash()
Expect(hash1).ToNot(Equal(hash2))
})
It("produces valid hex-encoded hash", func() {
hash := entry.hash()
Expect(hash).To(HaveLen(32)) // MD5 hash should be 32 hex characters
Expect(hash).To(MatchRegexp("^[a-f0-9]{32}$"))
})
})
Describe("isOutdated", func() {
BeforeEach(func() {
entry.prevHash = entry.hash()
})
Context("when full scan is in progress", func() {
BeforeEach(func() {
entry.job.lib.FullScanInProgress = true
entry.job.lib.LastScanStartedAt = time.Now()
})
It("returns true when updTime is before LastScanStartedAt", func() {
entry.updTime = entry.job.lib.LastScanStartedAt.Add(-1 * time.Hour)
Expect(entry.isOutdated()).To(BeTrue())
})
It("returns false when updTime is after LastScanStartedAt", func() {
entry.updTime = entry.job.lib.LastScanStartedAt.Add(1 * time.Hour)
Expect(entry.isOutdated()).To(BeFalse())
})
It("returns false when updTime equals LastScanStartedAt", func() {
entry.updTime = entry.job.lib.LastScanStartedAt
Expect(entry.isOutdated()).To(BeFalse())
})
})
Context("when full scan is not in progress", func() {
BeforeEach(func() {
entry.job.lib.FullScanInProgress = false
})
It("returns false when hash hasn't changed", func() {
Expect(entry.isOutdated()).To(BeFalse())
})
It("returns true when hash has changed", func() {
entry.numPlaylists = 10 // Change something to change the hash
Expect(entry.isOutdated()).To(BeTrue())
})
It("returns true when prevHash is empty", func() {
entry.prevHash = ""
Expect(entry.isOutdated()).To(BeTrue())
})
})
Context("priority between conditions", func() {
BeforeEach(func() {
entry.job.lib.FullScanInProgress = true
entry.job.lib.LastScanStartedAt = time.Now()
entry.updTime = entry.job.lib.LastScanStartedAt.Add(-1 * time.Hour)
})
It("returns true for full scan condition even when hash hasn't changed", func() {
// Hash is the same but full scan condition should take priority
Expect(entry.isOutdated()).To(BeTrue())
})
It("returns true when full scan condition is not met but hash changed", func() {
entry.updTime = entry.job.lib.LastScanStartedAt.Add(1 * time.Hour)
entry.numPlaylists = 10 // Change hash
Expect(entry.isOutdated()).To(BeTrue())
})
})
})
})
Describe("integration scenarios", func() {
It("handles complete folder lifecycle", func() {
// Create new folder entry
entry := newFolderEntry(job, "music/rock/album")
// Initially new and has no files
Expect(entry.isNew()).To(BeTrue())
Expect(entry.hasNoFiles()).To(BeTrue())
// Add some files
entry.audioFiles["track1.mp3"] = &fakeDirEntry{name: "track1.mp3"}
entry.audioFiles["track2.mp3"] = &fakeDirEntry{name: "track2.mp3"}
entry.imageFiles["cover.jpg"] = &fakeDirEntry{name: "cover.jpg"}
entry.numSubFolders = 1
entry.modTime = time.Now()
entry.imagesUpdatedAt = time.Now()
// No longer empty
Expect(entry.hasNoFiles()).To(BeFalse())
// Set previous hash to current hash (simulating it's been saved)
entry.prevHash = entry.hash()
entry.updTime = time.Now()
// Should not be new or outdated
Expect(entry.isNew()).To(BeFalse())
Expect(entry.isOutdated()).To(BeFalse())
// Convert to model folder
folder := entry.toFolder()
Expect(folder.NumAudioFiles).To(Equal(2))
Expect(folder.ImageFiles).To(HaveLen(1))
Expect(folder.Hash).To(Equal(entry.hash()))
// Modify folder and verify it becomes outdated
entry.audioFiles["track3.mp3"] = &fakeDirEntry{name: "track3.mp3"}
Expect(entry.isOutdated()).To(BeTrue())
})
})
})
// fakeDirEntry implements fs.DirEntry for testing
type fakeDirEntry struct {
name string
isDir bool
typ fs.FileMode
}
func (f *fakeDirEntry) Name() string {
return f.name
}
func (f *fakeDirEntry) IsDir() bool {
return f.isDir
}
func (f *fakeDirEntry) Type() fs.FileMode {
return f.typ
}
func (f *fakeDirEntry) Info() (fs.FileInfo, error) {
return &fakeFileInfo{
name: f.name,
isDir: f.isDir,
mode: f.typ,
}, nil
}
// fakeFileInfo implements fs.FileInfo for testing
type fakeFileInfo struct {
name string
size int64
mode fs.FileMode
modTime time.Time
isDir bool
}
func (f *fakeFileInfo) Name() string { return f.name }
func (f *fakeFileInfo) Size() int64 { return f.size }
func (f *fakeFileInfo) Mode() fs.FileMode { return f.mode }
func (f *fakeFileInfo) ModTime() time.Time { return f.modTime }
func (f *fakeFileInfo) IsDir() bool { return f.isDir }
func (f *fakeFileInfo) Sys() any { return nil }

View File

@@ -62,7 +62,7 @@ type scanJob struct {
lib model.Library
fs storage.MusicFS
cw artwork.CacheWarmer
lastUpdates map[string]time.Time
lastUpdates map[string]model.FolderUpdateInfo
lock sync.Mutex
numFolders atomic.Int64
}
@@ -91,7 +91,7 @@ func newScanJob(ctx context.Context, ds model.DataStore, cw artwork.CacheWarmer,
}, nil
}
func (j *scanJob) popLastUpdate(folderID string) time.Time {
func (j *scanJob) popLastUpdate(folderID string) model.FolderUpdateInfo {
j.lock.Lock()
defer j.lock.Unlock()

View File

@@ -9,78 +9,15 @@ import (
"slices"
"sort"
"strings"
"time"
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/consts"
"github.com/navidrome/navidrome/core"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils"
"github.com/navidrome/navidrome/utils/chrono"
ignore "github.com/sabhiram/go-gitignore"
)
type folderEntry struct {
job *scanJob
elapsed chrono.Meter
path string // Full path
id string // DB ID
modTime time.Time // From FS
updTime time.Time // from DB
audioFiles map[string]fs.DirEntry
imageFiles map[string]fs.DirEntry
numPlaylists int
numSubFolders int
imagesUpdatedAt time.Time
tracks model.MediaFiles
albums model.Albums
albumIDMap map[string]string
artists model.Artists
tags model.TagList
missingTracks []*model.MediaFile
}
func (f *folderEntry) hasNoFiles() bool {
return len(f.audioFiles) == 0 && len(f.imageFiles) == 0 && f.numPlaylists == 0 && f.numSubFolders == 0
}
func (f *folderEntry) isNew() bool {
return f.updTime.IsZero()
}
func (f *folderEntry) toFolder() *model.Folder {
folder := model.NewFolder(f.job.lib, f.path)
folder.NumAudioFiles = len(f.audioFiles)
if core.InPlaylistsPath(*folder) {
folder.NumPlaylists = f.numPlaylists
}
folder.ImageFiles = slices.Collect(maps.Keys(f.imageFiles))
folder.ImagesUpdatedAt = f.imagesUpdatedAt
return folder
}
func newFolderEntry(job *scanJob, path string) *folderEntry {
id := model.FolderID(job.lib, path)
f := &folderEntry{
id: id,
job: job,
path: path,
audioFiles: make(map[string]fs.DirEntry),
imageFiles: make(map[string]fs.DirEntry),
albumIDMap: make(map[string]string),
updTime: job.popLastUpdate(id),
}
return f
}
func (f *folderEntry) isOutdated() bool {
if f.job.lib.FullScanInProgress {
return f.updTime.Before(f.job.lib.LastScanStartedAt)
}
return f.updTime.Before(f.modTime)
}
func walkDirTree(ctx context.Context, job *scanJob) (<-chan *folderEntry, error) {
results := make(chan *folderEntry)
go func() {