Files
stash/pkg/sqlite/migrations/84_migrate.go
Gykes 57ddec93e0 Fix: Update Postmigration 84 to Handle De-Duplicate of Folders. (#6792)
* update postmigration to handle deduplicate folders.
* Split post-migration to perform some tasks before the schema migration
* Reparent files and delete duplicate folder if possible
---------
Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
2026-04-07 16:28:01 +10:00

534 lines
13 KiB
Go

package migrations
import (
"context"
"database/sql"
"errors"
"fmt"
"path/filepath"
"slices"
"time"
"github.com/jmoiron/sqlx"
"github.com/stashapp/stash/internal/manager/config"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/sqlite"
"gopkg.in/guregu/null.v4"
)
func pre84(ctx context.Context, db *sqlx.DB) error {
logger.Info("Running pre-migration for schema version 84")
m := schema84Migrator{
migrator: migrator{
db: db,
},
folderCache: make(map[string]folderInfo),
}
rootPaths := config.GetInstance().GetStashPaths().Paths()
if err := m.createMissingFolderHierarchies(ctx, rootPaths); err != nil {
return fmt.Errorf("creating missing folder hierarchies: %w", err)
}
if err := m.fixIncorrectParents(ctx, rootPaths); err != nil {
return fmt.Errorf("fixing incorrect parent folders: %w", err)
}
if err := m.deduplicateFolders(ctx); err != nil {
return fmt.Errorf("deduplicating folders: %w", err)
}
return nil
}
func post84(ctx context.Context, db *sqlx.DB) error {
logger.Info("Running post-migration for schema version 84")
m := schema84Migrator{
migrator: migrator{
db: db,
},
folderCache: make(map[string]folderInfo),
}
if err := m.migrateFolders(ctx); err != nil {
return fmt.Errorf("migrating folders: %w", err)
}
return nil
}
type schema84Migrator struct {
migrator
folderCache map[string]folderInfo
}
func (m *schema84Migrator) createMissingFolderHierarchies(ctx context.Context, rootPaths []string) error {
// before we set the basenames, we need to address any folders that are missing their
// parent folders.
const (
limit = 1000
logEvery = 10000
)
lastID := 0
count := 0
logged := false
for {
gotSome := false
if err := m.withTxn(ctx, func(tx *sqlx.Tx) error {
query := "SELECT `folders`.`id`, `folders`.`path` FROM `folders` WHERE `folders`.`parent_folder_id` IS NULL "
if lastID != 0 {
query += fmt.Sprintf("AND `folders`.`id` > %d ", lastID)
}
query += fmt.Sprintf("ORDER BY `folders`.`id` LIMIT %d", limit)
rows, err := tx.Query(query)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
// log once if we find any folders with missing parent folders
if !logged {
logger.Info("Migrating folders with missing parents...")
logged = true
}
var id int
var p string
err := rows.Scan(&id, &p)
if err != nil {
return err
}
lastID = id
gotSome = true
count++
// don't try to create parent folders for root paths
if slices.Contains(rootPaths, p) {
continue
}
parentDir := filepath.Dir(p)
if parentDir == p {
// this can happen if the path is something like "C:\", where the parent directory is the same as the current directory
continue
}
parentID, err := m.getOrCreateFolderHierarchy(tx, parentDir, rootPaths)
if err != nil {
return fmt.Errorf("error creating parent folder for folder %d %q: %w", id, p, err)
}
if parentID == nil {
continue
}
// now set the parent folder ID for the current folder
logger.Debugf("Migrating folder %d %q: setting parent folder ID to %d", id, p, *parentID)
_, err = tx.Exec("UPDATE `folders` SET `parent_folder_id` = ? WHERE `id` = ?", *parentID, id)
if err != nil {
return fmt.Errorf("error setting parent folder for folder %d %q: %w", id, p, err)
}
}
return rows.Err()
}); err != nil {
return err
}
if !gotSome {
break
}
if count%logEvery == 0 {
logger.Infof("Migrated %d folders", count)
}
}
return nil
}
func (m *schema84Migrator) findFolderByPath(tx *sqlx.Tx, path string) (*int, error) {
query := "SELECT `folders`.`id` FROM `folders` WHERE `folders`.`path` = ?"
var id int
if err := tx.Get(&id, query, path); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, nil
}
return nil, err
}
return &id, nil
}
// this is a copy of the GetOrCreateFolderHierarchy function from pkg/file/folder.go,
// but modified to use low-level SQL queries instead of the models.FolderFinderCreator interface, to avoid
func (m *schema84Migrator) getOrCreateFolderHierarchy(tx *sqlx.Tx, path string, rootPaths []string) (*int, error) {
// get or create folder hierarchy
folderID, err := m.findFolderByPath(tx, path)
if err != nil {
return nil, err
}
if folderID == nil {
var parentID *int
if !slices.Contains(rootPaths, path) {
parentPath := filepath.Dir(path)
// it's possible that the parent path is the same as the current path, if there are folders outside
// of the root paths. In that case, we should just return nil for the parent ID.
if parentPath == path {
return nil, nil
}
parentID, err = m.getOrCreateFolderHierarchy(tx, parentPath, rootPaths)
if err != nil {
return nil, err
}
}
logger.Debugf("%s doesn't exist. Creating new folder entry...", path)
// we need to set basename to path, which will be addressed in the next step
const insertSQL = "INSERT INTO `folders` (`path`,`parent_folder_id`,`mod_time`,`created_at`,`updated_at`) VALUES (?,?,?,?,?)"
var parentFolderID null.Int
if parentID != nil {
parentFolderID = null.IntFrom(int64(*parentID))
}
now := time.Now()
result, err := tx.Exec(insertSQL, path, parentFolderID, time.Time{}, now, now)
if err != nil {
return nil, fmt.Errorf("creating folder %s: %w", path, err)
}
id, err := result.LastInsertId()
if err != nil {
return nil, fmt.Errorf("creating folder %s: %w", path, err)
}
idInt := int(id)
folderID = &idInt
}
return folderID, nil
}
func (m *schema84Migrator) fixIncorrectParents(ctx context.Context, rootPaths []string) error {
const (
limit = 1000
logEvery = 10000
)
lastID := 0
count := 0
fixed := 0
logged := false
for {
gotSome := false
if err := m.withTxn(ctx, func(tx *sqlx.Tx) error {
query := "SELECT f.id, f.path, f.parent_folder_id, pf.path AS parent_path " +
"FROM folders f " +
"JOIN folders pf ON f.parent_folder_id = pf.id "
if lastID != 0 {
query += fmt.Sprintf("WHERE f.id > %d ", lastID)
}
query += fmt.Sprintf("ORDER BY f.id LIMIT %d", limit)
rows, err := tx.Query(query)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var id int
var p string
var parentFolderID int
var parentPath string
err := rows.Scan(&id, &p, &parentFolderID, &parentPath)
if err != nil {
return err
}
lastID = id
gotSome = true
count++
expectedParent := filepath.Dir(p)
if expectedParent == parentPath {
continue
}
correctParentID, err := m.getOrCreateFolderHierarchy(tx, expectedParent, rootPaths)
if err != nil {
return fmt.Errorf("error getting/creating correct parent for folder %d %q: %w", id, p, err)
}
if correctParentID == nil {
continue
}
if !logged {
logger.Info("Fixing folders with incorrect parent folder assignments...")
logged = true
}
logger.Debugf("Fixing folder %d %q: changing parent_folder_id from %d to %d", id, p, parentFolderID, *correctParentID)
_, err = tx.Exec("UPDATE `folders` SET `parent_folder_id` = ? WHERE `id` = ?", *correctParentID, id)
if err != nil {
return fmt.Errorf("error fixing parent folder for folder %d %q: %w", id, p, err)
}
fixed++
}
return rows.Err()
}); err != nil {
return err
}
if !gotSome {
break
}
if count%logEvery == 0 {
logger.Infof("Checked %d folders", count)
}
}
if fixed > 0 {
logger.Infof("Fixed %d folders with incorrect parent assignments", fixed)
}
return nil
}
// deduplicateFolders finds folders that would have the same (parent_folder_id, basename) after
// migrateFolders sets basename = filepath.Base(path), and merges the duplicates.
// This can happen when the database contains entries for the same physical folder with different
// path representations (e.g., mixed separators like "\data/movies" vs "\data\movies" on Windows).
func (m *schema84Migrator) deduplicateFolders(ctx context.Context) error {
for {
n, err := m.deduplicateFoldersPass(ctx)
if err != nil {
return err
}
// repeat until no more duplicates are found, since merging child folders
// from a duplicate parent into the canonical parent may create new conflicts
if n == 0 {
break
}
}
return nil
}
func (m *schema84Migrator) deduplicateFoldersPass(ctx context.Context) (int, error) {
type folderRow struct {
ID int `db:"id"`
Path string `db:"path"`
ParentFolderID int `db:"parent_folder_id"`
}
var folders []folderRow
if err := m.db.SelectContext(ctx, &folders,
"SELECT id, path, parent_folder_id FROM folders WHERE parent_folder_id IS NOT NULL ORDER BY id"); err != nil {
return 0, fmt.Errorf("loading folders: %w", err)
}
// group by (parent_folder_id, computed basename)
type groupKey struct {
parentID int
basename string
}
groups := make(map[groupKey][]folderRow)
for _, f := range folders {
key := groupKey{
parentID: f.ParentFolderID,
basename: filepath.Base(f.Path),
}
groups[key] = append(groups[key], f)
}
deduped := 0
for _, group := range groups {
if len(group) <= 1 {
continue
}
if deduped == 0 {
logger.Info("Deduplicating folders with conflicting basenames...")
}
// prefer the folder whose path is already normalized for the current OS,
// falling back to the newest entry (highest ID) since it's most likely
// from the current filesystem
keep := group[len(group)-1]
for _, f := range group {
if f.Path == filepath.Clean(f.Path) {
keep = f
break
}
}
for _, dup := range group {
if dup.ID == keep.ID {
continue
}
logger.Infof("Merging duplicate folder %d %q into folder %d %q", dup.ID, dup.Path, keep.ID, keep.Path)
if err := m.withTxn(ctx, func(tx *sqlx.Tx) error {
return m.mergeFolder(tx, keep.ID, dup.ID)
}); err != nil {
return 0, fmt.Errorf("merging folder %d into %d: %w", dup.ID, keep.ID, err)
}
deduped++
}
}
if deduped > 0 {
logger.Infof("Deduplicated %d folder entries", deduped)
}
return deduped, nil
}
func (m *schema84Migrator) mergeFolder(tx *sqlx.Tx, keepID, dupID int) error {
// Re-parent child folders from the duplicate to the canonical folder.
// At this point basenames are still full paths (unique), so this won't cause
// UNIQUE constraint violations on (parent_folder_id, basename).
if _, err := tx.Exec("UPDATE folders SET parent_folder_id = ? WHERE parent_folder_id = ?", keepID, dupID); err != nil {
return fmt.Errorf("re-parenting child folders: %w", err)
}
// re-parent any files under the duplicate folder to the canonical folder.
if _, err := tx.Exec("UPDATE files SET parent_folder_id = ? WHERE parent_folder_id = ?", keepID, dupID); err != nil {
return fmt.Errorf("re-parenting files: %w", err)
}
// delete the duplicate folder entry only if it is not referenced by any galleries
var count int
if err := tx.Get(&count, "SELECT COUNT(*) FROM galleries WHERE folder_id = ?", dupID); err != nil {
return fmt.Errorf("checking for gallery references: %w", err)
}
if count > 0 {
logger.Warnf("Duplicate folder %d is still referenced by %d galleries. Orphaning instead of deleting.", dupID, count)
// Orphan the stale duplicate folder by clearing its parent so the UNIQUE
// constraint on (parent_folder_id, basename) won't be violated when
// migrateFolders sets basenames. Any stale file entries under it are left
// untouched — the clean task will handle them on the next scan.
if _, err := tx.Exec("UPDATE folders SET parent_folder_id = NULL WHERE id = ?", dupID); err != nil {
return fmt.Errorf("orphaning duplicate folder: %w", err)
}
} else {
// delete the duplicate folder entry
if _, err := tx.Exec("DELETE FROM folders WHERE id = ?", dupID); err != nil {
return fmt.Errorf("deleting duplicate folder: %w", err)
}
}
return nil
}
func (m *schema84Migrator) migrateFolders(ctx context.Context) error {
const (
limit = 1000
logEvery = 10000
)
lastID := 0
count := 0
logged := false
for {
gotSome := false
if err := m.withTxn(ctx, func(tx *sqlx.Tx) error {
query := "SELECT `folders`.`id`, `folders`.`path` FROM `folders` "
if lastID != 0 {
query += fmt.Sprintf("WHERE `folders`.`id` > %d ", lastID)
}
query += fmt.Sprintf("ORDER BY `folders`.`id` LIMIT %d", limit)
rows, err := tx.Query(query)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
if !logged {
logger.Infof("Migrating folders to set basenames...")
logged = true
}
var id int
var p string
err := rows.Scan(&id, &p)
if err != nil {
return err
}
lastID = id
gotSome = true
count++
basename := filepath.Base(p)
logger.Debugf("Migrating folder %d %q: setting basename to %q", id, p, basename)
_, err = tx.Exec("UPDATE `folders` SET `basename` = ? WHERE `id` = ?", basename, id)
if err != nil {
return fmt.Errorf("error migrating folder %d %q: %w", id, p, err)
}
}
return rows.Err()
}); err != nil {
return err
}
if !gotSome {
break
}
if count%logEvery == 0 {
logger.Infof("Migrated %d folders", count)
}
}
return nil
}
func init() {
sqlite.RegisterPreMigration(84, pre84)
sqlite.RegisterPostMigration(84, post84)
}