| 12345678910111213141516171819202122232425262728 |
- -- Migration: add_hash_and_destination_tracking
- -- Created at: 2026-01-06T19:47:58.000Z
- -- Add hash and file_size columns to files table for duplicate detection optimization
- ALTER TABLE files ADD COLUMN hash TEXT;
- ALTER TABLE files ADD COLUMN file_size INTEGER;
- -- Add destination_path column to track files in destination locations (vs source files)
- -- This helps us separate files that are being watched from files in destinations
- ALTER TABLE files ADD COLUMN destination_path TEXT;
- -- Create indexes for fast duplicate lookups
- CREATE INDEX IF NOT EXISTS idx_files_hash ON files(hash) WHERE hash IS NOT NULL;
- CREATE INDEX IF NOT EXISTS idx_files_hash_size ON files(hash, file_size) WHERE hash IS NOT NULL;
- CREATE INDEX IF NOT EXISTS idx_files_destination ON files(destination_path) WHERE destination_path IS NOT NULL;
- -- Create a view for easy duplicate detection
- CREATE VIEW IF NOT EXISTS file_duplicates AS
- SELECT
- hash,
- file_size,
- dataset,
- COUNT(*) as file_count,
- GROUP_CONCAT(CASE WHEN destination_path IS NOT NULL THEN destination_path ELSE input END, '|||') as file_paths
- FROM files
- WHERE hash IS NOT NULL
- GROUP BY hash, file_size, dataset
- HAVING COUNT(*) > 1;
|