Переглянути джерело

notifications fix and migration helper

Timothy Pomeroy 4 тижнів тому
батько
коміт
65687ead03

+ 35 - 2
apps/service/src/app.controller.ts

@@ -406,6 +406,38 @@ export class AppController {
     return result;
   }
 
+  // Duplicate file review endpoints
+  @Post('maintenance/duplicates/scan')
+  scanDuplicates(@Body('resetExisting') resetExisting?: boolean) {
+    return this.appService.scanDuplicateFiles(resetExisting);
+  }
+
+  @Get('maintenance/duplicates')
+  listDuplicates(
+    @Query('status') status?: string,
+    @Query('dataset') dataset?: string,
+  ) {
+    return this.appService.listDuplicateGroups(status, dataset);
+  }
+
+  @Post('maintenance/duplicates/:id/mark')
+  markDuplicate(
+    @Param('id') id: string,
+    @Body('status') status: 'pending' | 'reviewed' | 'purged',
+    @Body('note') note?: string,
+  ) {
+    return this.appService.markDuplicateGroup(Number(id), status, note);
+  }
+
+  @Post('maintenance/duplicates/:id/purge')
+  purgeDuplicate(
+    @Param('id') id: string,
+    @Body('files') files: string[],
+    @Body('note') note?: string,
+  ) {
+    return this.appService.purgeDuplicateFiles(Number(id), files || [], note);
+  }
+
   @Get('config/settings')
   getSettings(
     @Query('key') key?: string,
@@ -530,8 +562,9 @@ export class AppController {
   }
 
   @Post('tasks/stop-processing')
-  stopTaskProcessing() {
-    return this.appService.stopTaskProcessing();
+  stopTaskProcessing(@Body('graceful') graceful?: boolean) {
+    const shouldGraceful = graceful !== false;
+    return this.appService.stopTaskProcessing(shouldGraceful);
   }
 
   // Task management endpoints

+ 23 - 2
apps/service/src/app.service.ts

@@ -159,8 +159,8 @@ export class AppService {
     return this.taskQueue.start();
   }
 
-  stopTaskProcessing() {
-    return this.taskQueue.stop();
+  stopTaskProcessing(graceful = true) {
+    return this.taskQueue.stop(graceful);
   }
 
   taskProcessingStatus() {
@@ -189,6 +189,27 @@ export class AppService {
     return this.db.purgeAllTasks();
   }
 
+  // Duplicate management
+  scanDuplicateFiles(resetExisting?: boolean) {
+    return this.maintenance.findDuplicateFiles({ resetExisting });
+  }
+
+  listDuplicateGroups(status?: string, dataset?: string) {
+    return this.db.listDuplicateGroups(status, dataset);
+  }
+
+  markDuplicateGroup(
+    id: number,
+    status: 'pending' | 'reviewed' | 'purged',
+    note?: string,
+  ) {
+    return this.db.markDuplicateGroup(id, status, note);
+  }
+
+  purgeDuplicateFiles(id: number, files: string[], note?: string) {
+    return this.maintenance.purgeDuplicateFiles(id, files, note);
+  }
+
   // Scheduled maintenance
   scheduledTaskCleanup() {
     return this.maintenance.scheduledTaskCleanup();

+ 231 - 86
apps/service/src/db.service.ts

@@ -2,6 +2,7 @@ import { Injectable } from '@nestjs/common';
 import Database from 'better-sqlite3';
 import fs from 'fs';
 import path from 'path';
+import { MigrationRunner } from './migration-runner';
 
 @Injectable()
 export class DbService {
@@ -49,7 +50,11 @@ export class DbService {
 
     try {
       this.db = new Database(rootDataPath);
-      this.migrate();
+
+      // Run migrations
+      const migrationsDir = path.resolve(projectRoot, 'data/migrations');
+      const migrationRunner = new MigrationRunner(this.db, migrationsDir);
+      migrationRunner.applyPendingMigrations();
     } catch (error) {
       console.error('Failed to open database:', error);
       console.error('Database path:', rootDataPath);
@@ -102,6 +107,7 @@ export class DbService {
         input TEXT,
         output TEXT,
         date TEXT,
+        status TEXT DEFAULT 'pending',
         PRIMARY KEY (dataset, input)
       );
     `);
@@ -125,64 +131,197 @@ export class DbService {
     }
   }
 
-  private migrate() {
-    // Migration logic from legacy db.js
-    this.db.exec(`
-      CREATE TABLE IF NOT EXISTS files (
-        dataset TEXT,
-        input TEXT,
-        output TEXT,
-        date TEXT,
-        PRIMARY KEY (dataset, input)
-      );
+  // Duplicate file review helpers
+  clearDuplicateGroups() {
+    this.db.prepare('DELETE FROM duplicate_files').run();
+  }
 
-      CREATE TABLE IF NOT EXISTS tasks (
-        id INTEGER PRIMARY KEY AUTOINCREMENT,
-        type TEXT NOT NULL,
-        status TEXT DEFAULT 'pending',
-        progress INTEGER DEFAULT 0,
-        dataset TEXT,
-        input TEXT,
-        output TEXT,
-        preset TEXT,
-        priority INTEGER DEFAULT 0,
-        retry_count INTEGER DEFAULT 0,
-        max_retries INTEGER,
-        error_message TEXT,
-        created_at TEXT DEFAULT CURRENT_TIMESTAMP,
-        updated_at TEXT DEFAULT CURRENT_TIMESTAMP
-      );
-    `);
+  getDuplicateGroup(id: number) {
+    const row = this.db
+      .prepare('SELECT * FROM duplicate_files WHERE id = ?')
+      .get(id) as
+      | {
+          id: number;
+          dataset: string;
+          destination: string;
+          hash: string;
+          size: number;
+          files: string;
+          status: string;
+          note?: string;
+          created_at: string;
+          reviewed_at?: string;
+        }
+      | undefined;
 
-    // Add missing columns to existing tasks table
-    this.addMissingColumns();
-  }
-
-  private addMissingColumns() {
-    const tableInfo = this.db
-      .prepare('PRAGMA table_info(tasks)')
-      .all() as any[];
-    const existingColumns = tableInfo.map((col) => col.name);
-
-    const columnsToAdd = [
-      { name: 'dataset', type: 'TEXT' },
-      { name: 'input', type: 'TEXT' },
-      { name: 'output', type: 'TEXT' },
-      { name: 'preset', type: 'TEXT' },
-      { name: 'error_message', type: 'TEXT' },
-      { name: 'priority', type: 'INTEGER DEFAULT 0' },
-      { name: 'retry_count', type: 'INTEGER DEFAULT 0' },
-      { name: 'max_retries', type: 'INTEGER' },
-    ];
+    if (!row) return undefined;
+    return { ...row, files: this.safeParseFiles(row.files) };
+  }
 
-    for (const col of columnsToAdd) {
-      if (!existingColumns.includes(col.name)) {
-        try {
-          this.db.exec(`ALTER TABLE tasks ADD COLUMN ${col.name} ${col.type}`);
-        } catch (error) {
-          // Column may already exist or migration not needed
+  getDuplicateGroupByKey(
+    dataset: string,
+    destination: string,
+    hash: string,
+    size: number,
+  ) {
+    const row = this.db
+      .prepare(
+        'SELECT * FROM duplicate_files WHERE dataset = ? AND destination = ? AND hash = ? AND size = ?',
+      )
+      .get(dataset, destination, hash, size) as
+      | {
+          id: number;
+          dataset: string;
+          destination: string;
+          hash: string;
+          size: number;
+          files: string;
+          status: string;
+          note?: string;
+          created_at: string;
+          reviewed_at?: string;
         }
-      }
+      | undefined;
+
+    if (!row) return undefined;
+    return { ...row, files: this.safeParseFiles(row.files) };
+  }
+
+  saveDuplicateGroup(entry: {
+    dataset: string;
+    destination: string;
+    hash: string;
+    size: number;
+    files: string[];
+  }) {
+    const existing = this.getDuplicateGroupByKey(
+      entry.dataset,
+      entry.destination,
+      entry.hash,
+      entry.size,
+    );
+
+    // Do not re-flag entries that were manually reviewed/ignored
+    if (existing && existing.status === 'reviewed') {
+      return existing;
+    }
+
+    const filesJson = JSON.stringify(entry.files);
+
+    if (existing) {
+      this.db
+        .prepare(
+          `UPDATE duplicate_files
+           SET files = ?, size = ?, status = 'pending', note = note, reviewed_at = NULL
+           WHERE id = ?`,
+        )
+        .run(filesJson, entry.size, existing.id);
+      return {
+        ...existing,
+        files: entry.files,
+        size: entry.size,
+        status: 'pending',
+      };
+    }
+
+    const result = this.db
+      .prepare(
+        `INSERT INTO duplicate_files (dataset, destination, hash, size, files)
+         VALUES (?, ?, ?, ?, ?)`,
+      )
+      .run(entry.dataset, entry.destination, entry.hash, entry.size, filesJson);
+
+    return {
+      ...entry,
+      id: result.lastInsertRowid as number,
+      status: 'pending',
+      note: null,
+    };
+  }
+
+  listDuplicateGroups(status?: string, dataset?: string) {
+    let query = 'SELECT * FROM duplicate_files';
+    const params: any[] = [];
+
+    if (status) {
+      query += ' WHERE status = ?';
+      params.push(status);
+    }
+
+    if (dataset) {
+      query += status ? ' AND dataset = ?' : ' WHERE dataset = ?';
+      params.push(dataset);
+    }
+
+    query += ' ORDER BY created_at DESC';
+
+    const rows = this.db.prepare(query).all(...params) as Array<{
+      id: number;
+      dataset: string;
+      destination: string;
+      hash: string;
+      size: number;
+      files: string;
+      status: string;
+      note?: string;
+      created_at: string;
+      reviewed_at?: string;
+    }>;
+
+    return rows.map((row) => ({
+      ...row,
+      files: this.safeParseFiles(row.files),
+    }));
+  }
+
+  markDuplicateGroup(
+    id: number,
+    status: 'pending' | 'reviewed' | 'purged',
+    note?: string,
+  ) {
+    return this.db
+      .prepare(
+        `UPDATE duplicate_files
+         SET status = ?, note = COALESCE(?, note), reviewed_at = CURRENT_TIMESTAMP
+         WHERE id = ?`,
+      )
+      .run(status, note || null, id);
+  }
+
+  updateDuplicateGroupFiles(
+    id: number,
+    files: string[],
+    status?: 'pending' | 'reviewed' | 'purged',
+    note?: string,
+  ) {
+    return this.db
+      .prepare(
+        `UPDATE duplicate_files
+         SET files = ?,
+             status = COALESCE(?, status),
+             note = COALESCE(?, note),
+             reviewed_at = CASE WHEN ? IS NOT NULL THEN CURRENT_TIMESTAMP ELSE reviewed_at END
+         WHERE id = ?`,
+      )
+      .run(
+        JSON.stringify(files),
+        status || null,
+        note || null,
+        status || null,
+        id,
+      );
+  }
+
+  deleteDuplicateGroup(id: number) {
+    return this.db.prepare('DELETE FROM duplicate_files WHERE id = ?').run(id);
+  }
+
+  private safeParseFiles(value: string): string[] {
+    try {
+      const parsed = JSON.parse(value);
+      return Array.isArray(parsed) ? parsed : [];
+    } catch {
+      return [];
     }
   }
 
@@ -192,45 +331,51 @@ export class DbService {
       .get(dataset, file);
   }
 
-  setFile(dataset: string, file: any, payload?: any) {
-    if (!payload && typeof file === 'object') {
-      const rec = file;
-      this.db
-        .prepare(
-          'INSERT INTO files (dataset, input, output, date) VALUES (?, ?, ?, ?)',
-        )
-        .run(
-          dataset,
-          rec.input,
-          rec.output,
-          rec.date ? new Date(rec.date).toISOString() : null,
-        );
-      return;
-    }
-    const found = this.findFile(dataset, file as string);
-    if (found) {
+  setFile(dataset: string, file: string, payload: any) {
+    const existing = this.findFile(dataset, file) as
+      | {
+          dataset: string;
+          input: string;
+          output?: string;
+          date?: string;
+          status?: string;
+        }
+      | undefined;
+
+    const outputValue =
+      payload && payload.output !== undefined
+        ? payload.output
+        : (existing?.output ?? null);
+
+    const statusValue =
+      payload && payload.status !== undefined
+        ? payload.status
+        : (existing?.status ?? 'pending');
+
+    const dateValue =
+      payload && payload.date !== undefined
+        ? new Date(payload.date).toISOString()
+        : existing?.date || new Date().toISOString();
+
+    if (existing) {
       this.db
         .prepare(
-          'UPDATE files SET output = COALESCE(?, output), date = COALESCE(?, date) WHERE dataset = ? AND input = ?',
+          `UPDATE files
+           SET output = COALESCE(?, output),
+               date = COALESCE(?, date),
+               status = COALESCE(?, status)
+           WHERE dataset = ? AND input = ?`,
         )
-        .run(
-          payload.output,
-          payload.date ? new Date(payload.date).toISOString() : null,
-          dataset,
-          file,
-        );
+        .run(outputValue, dateValue, statusValue, dataset, file);
     } else {
       this.db
         .prepare(
-          'INSERT INTO files (dataset, input, output, date) VALUES (?, ?, ?, ?)',
+          'INSERT INTO files (dataset, input, output, date, status) VALUES (?, ?, ?, ?, ?)',
         )
-        .run(
-          dataset,
-          file,
-          payload.output,
-          payload.date ? new Date(payload.date).toISOString() : null,
-        );
+        .run(dataset, file, outputValue, dateValue, statusValue);
     }
+
+    return this.findFile(dataset, file);
   }
 
   removeFile(dataset: string, file: string, soft = true) {

+ 221 - 1
apps/service/src/maintenance.service.ts

@@ -1,13 +1,19 @@
 import { Injectable, Logger } from '@nestjs/common';
 import { Cron, CronExpression } from '@nestjs/schedule';
+import crypto from 'crypto';
 import fs from 'fs';
+import path from 'path';
+import { DatasetsService } from './datasets.service';
 import { DbService } from './db.service';
 
 @Injectable()
 export class MaintenanceService {
   private logger = new Logger('MaintenanceService');
 
-  constructor(private readonly db: DbService) {}
+  constructor(
+    private readonly db: DbService,
+    private readonly datasetsService: DatasetsService,
+  ) {}
 
   cleanup(file: string, dirs: string[]) {
     for (let i = 0, l = dirs.length; i < l; i++) {
@@ -101,4 +107,218 @@ export class MaintenanceService {
       );
     }
   }
+
+  /**
+   * Scan destination folders (as defined in settings.datasets) for duplicate files.
+   * Duplicates are recorded in the duplicate_files table for manual review.
+   */
+  findDuplicateFiles(options: { resetExisting?: boolean } = {}) {
+    const { resetExisting = false } = options;
+
+    if (resetExisting) {
+      this.db.clearDuplicateGroups();
+    }
+
+    const existing = this.db.listDuplicateGroups();
+    const existingMap = new Map<
+      string,
+      { id: number; status: string; files: string[] }
+    >();
+
+    for (const row of existing) {
+      const key = `${row.dataset}|${row.destination}|${row.hash}|${row.size}`;
+      existingMap.set(key, {
+        id: row.id,
+        status: row.status,
+        files: row.files,
+      });
+    }
+
+    const datasetConfig = this.datasetsService.getDatasetConfig();
+    const duplicates: Array<{
+      dataset: string;
+      destination: string;
+      hash: string;
+      size: number;
+      files: string[];
+    }> = [];
+
+    for (const [datasetName, datasetObj] of Object.entries(datasetConfig)) {
+      if (
+        !datasetObj ||
+        datasetObj.enabled === false ||
+        datasetObj.enabled === 'false'
+      ) {
+        continue;
+      }
+
+      const destinations = this.collectDestinations(datasetObj);
+      for (const destination of destinations) {
+        if (!destination || !fs.existsSync(destination)) {
+          this.logger.warn(
+            `Destination not found for dataset ${datasetName}: ${destination}`,
+          );
+          continue;
+        }
+
+        const groups = this.scanDestinationForDuplicates(destination);
+        for (const group of groups) {
+          const entry = {
+            dataset: datasetName,
+            destination,
+            hash: group.hash,
+            size: group.size,
+            files: group.files,
+          };
+
+          const key = `${entry.dataset}|${entry.destination}|${entry.hash}|${entry.size}`;
+          const existingEntry = existingMap.get(key);
+
+          // Skip groups that were marked reviewed/ignored previously
+          if (existingEntry && existingEntry.status === 'reviewed') {
+            continue;
+          }
+
+          duplicates.push(entry);
+
+          if (existingEntry) {
+            this.db.updateDuplicateGroupFiles(
+              existingEntry.id,
+              entry.files,
+              'pending',
+            );
+          } else {
+            this.db.saveDuplicateGroup(entry);
+          }
+        }
+
+        if (groups.length) {
+          this.logger.warn(
+            `Found ${groups.length} duplicate group(s) in destination ${destination} (dataset: ${datasetName})`,
+          );
+        }
+      }
+    }
+
+    return duplicates;
+  }
+
+  private collectDestinations(datasetObj: Record<string, any>): Set<string> {
+    const destinations = new Set<string>();
+
+    if (datasetObj.destination && typeof datasetObj.destination === 'string') {
+      destinations.add(datasetObj.destination);
+    }
+
+    for (const [pathKey, cfg] of Object.entries(datasetObj)) {
+      if (pathKey === 'enabled') continue;
+      if (cfg && typeof cfg === 'object' && cfg.destination) {
+        destinations.add(cfg.destination);
+      }
+    }
+
+    return destinations;
+  }
+
+  private scanDestinationForDuplicates(destination: string) {
+    const files = this.walkFiles(destination);
+    const groups = new Map<string, { size: number; files: string[] }>();
+
+    for (const filePath of files) {
+      try {
+        const stat = fs.statSync(filePath);
+        if (!stat.isFile()) continue;
+
+        const hash = this.hashFile(filePath);
+        if (hash) {
+          const key = `${hash}:${stat.size}`;
+          const group = groups.get(key) || { size: stat.size, files: [] };
+          group.files.push(filePath);
+          groups.set(key, group);
+        }
+      } catch (error) {
+        this.logger.warn(
+          `Failed to process file for duplicate scan: ${filePath} (${error})`,
+        );
+      }
+    }
+
+    return Array.from(groups.entries())
+      .filter(([, group]) => group.files.length > 1)
+      .map(([key, group]) => ({
+        hash: key.split(':')[0],
+        size: group.size,
+        files: group.files,
+      }));
+  }
+
+  private walkFiles(root: string) {
+    const pending = [root];
+    const files: string[] = [];
+
+    while (pending.length) {
+      const current = pending.pop();
+      if (!current) continue;
+
+      let stat: fs.Stats;
+      try {
+        stat = fs.statSync(current);
+      } catch {
+        continue;
+      }
+
+      if (stat.isDirectory()) {
+        const children = fs.readdirSync(current);
+        for (const child of children) {
+          pending.push(path.join(current, child));
+        }
+      } else if (stat.isFile()) {
+        files.push(current);
+      }
+    }
+
+    return files;
+  }
+
+  private hashFile(filePath: string): string | null {
+    try {
+      const hash = crypto.createHash('sha1');
+      const data = fs.readFileSync(filePath);
+      hash.update(data);
+      return hash.digest('hex');
+    } catch (error) {
+      this.logger.warn(`Hashing failed for ${filePath}: ${error}`);
+      return null;
+    }
+  }
+
+  purgeDuplicateFiles(id: number, files: string[], note?: string) {
+    const group = this.db.getDuplicateGroup(id);
+    if (!group) {
+      throw new Error('Duplicate group not found');
+    }
+
+    const toDelete = files && files.length > 0 ? files : [];
+    const deleted: string[] = [];
+    const errors: Array<{ file: string; error: string }> = [];
+
+    for (const filePath of toDelete) {
+      try {
+        if (fs.existsSync(filePath)) {
+          fs.unlinkSync(filePath);
+          deleted.push(filePath);
+        } else {
+          errors.push({ file: filePath, error: 'File not found' });
+        }
+      } catch (error) {
+        errors.push({ file: filePath, error: (error as Error).message });
+      }
+    }
+
+    const remaining = group.files.filter((f) => !toDelete.includes(f));
+    const nextStatus = remaining.length > 1 ? 'pending' : 'purged';
+    this.db.updateDuplicateGroupFiles(id, remaining, nextStatus, note);
+
+    return { deleted, errors, remaining, status: nextStatus };
+  }
 }

+ 60 - 5
apps/service/src/task-queue.service.ts

@@ -39,6 +39,8 @@ export class TaskQueueService implements OnModuleInit {
   private isProcessing = false;
   private processingInterval: NodeJS.Timeout | null = null;
   private activeTasks = new Set<number>();
+  private isStopping = false;
+  private gracefulStopResolver: (() => void) | null = null;
   private queueSettings: QueueSettings;
 
   constructor(
@@ -86,6 +88,10 @@ export class TaskQueueService implements OnModuleInit {
   }
 
   startProcessing() {
+    // Reset any pending stop request
+    this.isStopping = false;
+    this.gracefulStopResolver = null;
+
     if (this.processingInterval) {
       return;
     }
@@ -102,22 +108,68 @@ export class TaskQueueService implements OnModuleInit {
     }
   }
 
+  private stopImmediate() {
+    this.isStopping = false;
+    this.gracefulStopResolver = null;
+    this.stopProcessing();
+    return { stopped: true, activeTasks: this.activeTasks.size };
+  }
+
+  private resolveGracefulStopIfDrained() {
+    if (this.isStopping && this.activeTasks.size === 0 && !this.isProcessing) {
+      const resolver = this.gracefulStopResolver;
+      this.isStopping = false;
+      this.gracefulStopResolver = null;
+      if (resolver) {
+        resolver();
+      }
+    }
+  }
+
+  private async stopGracefully() {
+    this.isStopping = true;
+    this.stopProcessing();
+    this.logger.log(
+      `Graceful stop requested; waiting for ${this.activeTasks.size} active task(s)`,
+    );
+
+    if (this.activeTasks.size === 0 && !this.isProcessing) {
+      this.isStopping = false;
+      return { stopped: true, drained: true, activeTasks: 0 };
+    }
+
+    return new Promise((resolve) => {
+      this.gracefulStopResolver = () => {
+        this.logger.log('All active tasks completed; queue stopped.');
+        resolve({ stopped: true, drained: true, activeTasks: 0 });
+      };
+    });
+  }
+
+  async stop(graceful = true) {
+    if (!graceful) {
+      return this.stopImmediate();
+    }
+
+    return this.stopGracefully();
+  }
+
   // Public API methods
   start() {
     this.startProcessing();
     return { started: true };
   }
 
-  stop() {
-    this.stopProcessing();
-    return { stopped: true };
-  }
-
   private async processPendingTasks() {
     if (this.isProcessing) {
       return; // Already processing
     }
 
+    if (this.isStopping) {
+      this.resolveGracefulStopIfDrained();
+      return;
+    }
+
     try {
       this.isProcessing = true;
 
@@ -175,6 +227,7 @@ export class TaskQueueService implements OnModuleInit {
       this.logger.error(`Error in processPendingTasks: ${error.message}`);
     } finally {
       this.isProcessing = false;
+      this.resolveGracefulStopIfDrained();
     }
   }
 
@@ -340,6 +393,7 @@ export class TaskQueueService implements OnModuleInit {
     } finally {
       // Remove from active tasks
       this.activeTasks.delete(task.id);
+      this.resolveGracefulStopIfDrained();
     }
   }
 
@@ -397,6 +451,7 @@ export class TaskQueueService implements OnModuleInit {
     return {
       isProcessing: !!this.processingInterval, // Whether task processing is enabled/running
       isProcessingCycle: this.isProcessing, // Whether currently in a processing cycle
+      isStopping: this.isStopping, // Whether a graceful stop has been requested
       activeTasks: this.activeTasks.size,
       pending,
       processing,

+ 1 - 0
apps/web/src/app/components/Header.tsx

@@ -10,6 +10,7 @@ import ThemeToggle from "./ThemeToggle";
 const nav = [
   { href: "/", label: "Dashboard" },
   { href: "/files", label: "Files" },
+  { href: "/duplicates", label: "Duplicates" },
   { href: "/tasks", label: "Tasks" },
   { href: "/settings", label: "Settings" }
 ];

+ 42 - 11
apps/web/src/app/components/NotificationContext.tsx

@@ -52,15 +52,26 @@ export function NotificationProvider({ children }: { children: ReactNode }) {
   useEffect(() => {
     const handleFileUpdate = (event: CustomEvent) => {
       const data = event.detail;
-      if (data.type === "add") {
-        // New file detected and task created
+      if (data.type === "created") {
+        // New file record created
         const notification = {
           type: "info" as const,
-          title: "New File Detected",
-          message: `File "${data.file}" in dataset "${data.dataset}" has been detected and queued for processing.`
+          title: "File Record Created",
+          message: `File "${data.file}" in dataset "${data.dataset}" has been recorded.`
         };
         addNotification(notification);
-        toast.success(`New file detected: ${data.file}`, {
+        toast.success(`File record created: ${data.file}`, {
+          duration: 5000
+        });
+      } else if (data.type === "requeued") {
+        // File requeued for processing
+        const notification = {
+          type: "info" as const,
+          title: "File Requeued",
+          message: `File "${data.file}" in dataset "${data.dataset}" has been requeued for processing.`
+        };
+        addNotification(notification);
+        toast.success(`File requeued: ${data.file}`, {
           duration: 5000
         });
       }
@@ -68,19 +79,39 @@ export function NotificationProvider({ children }: { children: ReactNode }) {
 
     const handleTaskUpdate = (event: CustomEvent) => {
       const data = event.detail;
-      // For now, we'll focus on file creation notifications
-      // Task status updates can be added later if needed
-      if (data.type === "created" || data.type === "started") {
-        // This might be redundant with fileUpdate, but could be useful for manual task creation
+      if (data.type === "started") {
+        // Task started
         const notification = {
           type: "info" as const,
-          title: "Task Created",
-          message: `New processing task started for "${data.input}".`
+          title: "Task Started",
+          message: `Processing started for "${data.input}".`
         };
         addNotification(notification);
         toast.success(`Task started for: ${data.input}`, {
           duration: 5000
         });
+      } else if (data.type === "completed") {
+        // Task completed
+        const notification = {
+          type: "success" as const,
+          title: "Task Completed",
+          message: `Processing completed for "${data.input}".`
+        };
+        addNotification(notification);
+        toast.success(`Task completed: ${data.input}`, {
+          duration: 5000
+        });
+      } else if (data.type === "failed") {
+        // Task failed
+        const notification = {
+          type: "error" as const,
+          title: "Task Failed",
+          message: `Processing failed for "${data.input}": ${data.error || 'Unknown error'}`
+        };
+        addNotification(notification);
+        toast.error(`Task failed: ${data.input}`, {
+          duration: 5000
+        });
       }
     };
 

+ 25 - 3
apps/web/src/app/components/NotificationsPanel.tsx

@@ -8,7 +8,8 @@ import {
   XMarkIcon
 } from "@heroicons/react/24/outline";
 import { CheckCircleIcon as CheckCircleIconSolid } from "@heroicons/react/24/solid";
-import { useState } from "react";
+import { useEffect, useState } from "react";
+import { createPortal } from "react-dom";
 import { useNotifications } from "./NotificationContext";
 
 interface NotificationsPanelProps {
@@ -32,6 +33,20 @@ export default function NotificationsPanel({
   const filteredNotifications =
     filter === "unread" ? notifications.filter((n) => !n.read) : notifications;
 
+  useEffect(() => {
+    const handleEscape = (e: KeyboardEvent) => {
+      if (e.key === "Escape") onClose();
+    };
+    if (isOpen) {
+      document.addEventListener("keydown", handleEscape);
+      document.body.style.overflow = "hidden";
+    }
+    return () => {
+      document.removeEventListener("keydown", handleEscape);
+      document.body.style.overflow = "unset";
+    };
+  }, [isOpen, onClose]);
+
   const getIcon = (type: string) => {
     switch (type) {
       case "success":
@@ -75,7 +90,7 @@ export default function NotificationsPanel({
 
   if (!isOpen) return null;
 
-  return (
+  const modalContent = (
     <div className="fixed inset-0 z-[9999]">
       {/* Backdrop */}
       <div
@@ -85,7 +100,7 @@ export default function NotificationsPanel({
 
       {/* Slide-in panel */}
       <div className="absolute top-0 right-0 h-full w-full max-w-md bg-white dark:bg-gray-900 shadow-xl transform translate-x-0 transition-transform duration-300 ease-in-out z-[10000]">
-        <div className="flex h-full flex-col overflow-hidden">
+        <div className="flex h-full flex-col">
           {/* Header */}
           <div className="flex items-center justify-between p-4 border-b border-gray-200 dark:border-gray-700">
             <div className="flex items-center gap-2">
@@ -220,4 +235,11 @@ export default function NotificationsPanel({
       </div>
     </div>
   );
+
+  // Use portal to render at document body level to avoid stacking context issues
+  if (typeof document !== "undefined") {
+    return createPortal(modalContent, document.body);
+  }
+
+  return modalContent;
 }

+ 44 - 7
apps/web/src/app/components/TaskProcessingControls.tsx

@@ -30,17 +30,33 @@ export default function TaskProcessingControls() {
   });
 
   const stopMutation = useMutation({
-    mutationFn: () => post("/tasks/stop-processing"),
+    mutationFn: () => post("/tasks/stop-processing", { graceful: true }),
     onSuccess: () => {
       queryClient.invalidateQueries({
         queryKey: ["tasks", "processing-status"]
       });
       queryClient.invalidateQueries({ queryKey: ["tasks", "queue", "status"] });
-      toast.success("Task processing stopped successfully");
+      toast.success("Graceful stop initiated – finishing active tasks");
       addNotification({
-        type: "success",
-        title: "Task Processing Stopped",
-        message: "The task processing queue has been stopped successfully."
+        type: "info",
+        title: "Graceful Stop Requested",
+        message: "The queue will finish active tasks before stopping."
+      });
+    }
+  });
+
+  const hardStopMutation = useMutation({
+    mutationFn: () => post("/tasks/stop-processing", { graceful: false }),
+    onSuccess: () => {
+      queryClient.invalidateQueries({
+        queryKey: ["tasks", "processing-status"]
+      });
+      queryClient.invalidateQueries({ queryKey: ["tasks", "queue", "status"] });
+      toast.success("Task processing stopped immediately");
+      addNotification({
+        type: "warning",
+        title: "Immediate Stop",
+        message: "Queue scheduling and active work halted immediately."
       });
     }
   });
@@ -101,7 +117,7 @@ export default function TaskProcessingControls() {
         className="inline-flex items-center rounded-md bg-red-600 px-3 py-2 text-sm font-medium text-white shadow-sm hover:bg-red-700 focus:outline-none focus:ring-2 focus:ring-red-500 focus:ring-offset-2 disabled:opacity-50"
         onClick={() => stopMutation.mutate()}
         disabled={!data?.isProcessing || stopMutation.isPending}
-        title="Stop task processing"
+        title="Graceful stop (finish active tasks)"
       >
         <svg
           className="h-4 w-4"
@@ -122,7 +138,28 @@ export default function TaskProcessingControls() {
             d="M9 10a1 1 0 011-1h4a1 1 0 011 1v4a1 1 0 01-1 1h-4a1 1 0 01-1-1v-4z"
           />
         </svg>
-        {stopMutation.isPending ? "Stopping..." : "Stop"}
+        {stopMutation.isPending ? "Stopping..." : "Graceful Stop"}
+      </button>
+      <button
+        className="inline-flex items-center rounded-md bg-orange-600 px-3 py-2 text-sm font-medium text-white shadow-sm hover:bg-orange-700 focus:outline-none focus:ring-2 focus:ring-orange-500 focus:ring-offset-2 disabled:opacity-50"
+        onClick={() => hardStopMutation.mutate()}
+        disabled={!data?.isProcessing || hardStopMutation.isPending}
+        title="Immediate stop (cancel scheduling)"
+      >
+        <svg
+          className="h-4 w-4"
+          fill="none"
+          viewBox="0 0 24 24"
+          stroke="currentColor"
+        >
+          <path
+            strokeLinecap="round"
+            strokeLinejoin="round"
+            strokeWidth={2}
+            d="M6 18L18 6M6 6l12 12"
+          />
+        </svg>
+        {hardStopMutation.isPending ? "Stopping..." : "Hard Stop"}
       </button>
     </div>
   );

+ 576 - 0
apps/web/src/app/duplicates/DuplicateList.tsx

@@ -0,0 +1,576 @@
+"use client";
+
+import {
+  ArrowPathIcon,
+  CheckCircleIcon,
+  EyeSlashIcon,
+  Squares2X2Icon,
+  TrashIcon
+} from "@heroicons/react/24/outline";
+import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
+import { useEffect, useMemo, useState } from "react";
+import toast from "react-hot-toast";
+import { get, post } from "../../lib/api";
+import ConfirmationDialog from "../components/ConfirmationDialog";
+import LoadingCard from "../components/Loading";
+import { useNotifications } from "../components/NotificationContext";
+import { useAppContext } from "../providers/AppContext";
+
+interface DuplicateGroup {
+  id: number;
+  dataset: string;
+  destination: string;
+  hash: string;
+  size: number;
+  files: string[];
+  status: string;
+  created_at?: string;
+  reviewed_at?: string;
+  note?: string;
+}
+
+type SortField = "dataset" | "count" | "size" | "created_at";
+
+type DeleteSelection = {
+  isOpen: boolean;
+  groups: Array<{ id: number; files: string[] }>;
+};
+
+function formatBytes(bytes: number) {
+  if (!bytes) return "0 B";
+  const sizes = ["B", "KB", "MB", "GB", "TB"];
+  const i = Math.floor(Math.log(bytes) / Math.log(1024));
+  return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${sizes[i]}`;
+}
+
+function makeFileKey(groupId: number, path: string) {
+  return `${groupId}::${encodeURIComponent(path)}`;
+}
+
+function parseFileKey(key: string) {
+  const [idStr, encodedPath] = key.split("::");
+  return { groupId: Number(idStr), path: decodeURIComponent(encodedPath) };
+}
+
+export default function DuplicateList() {
+  const queryClient = useQueryClient();
+  const { datasets } = useAppContext();
+  const { addNotification } = useNotifications();
+
+  const {
+    data: duplicateGroups,
+    isLoading,
+    error,
+    refetch
+  } = useQuery<DuplicateGroup[]>({
+    queryKey: ["duplicate-files"],
+    queryFn: async () => get("/maintenance/duplicates?status=pending")
+  });
+
+  const [enabledDatasets, setEnabledDatasets] = useState<Set<string>>(
+    new Set()
+  );
+  const [searchTerm, setSearchTerm] = useState("");
+  const [sortField, setSortField] = useState<SortField>("count");
+  const [sortDirection, setSortDirection] = useState<"asc" | "desc">("desc");
+  const [expandedRows, setExpandedRows] = useState<Set<number>>(new Set());
+  const [selectedGroups, setSelectedGroups] = useState<Set<number>>(new Set());
+  const [selectedFiles, setSelectedFiles] = useState<Set<string>>(new Set());
+  const [deleteSelection, setDeleteSelection] = useState<DeleteSelection>({
+    isOpen: false,
+    groups: []
+  });
+
+  // Initialize enabled datasets on first load
+  useEffect(() => {
+    if (datasets && datasets.length > 0 && enabledDatasets.size === 0) {
+      const names = datasets
+        .map((p: string) => p.split("/").pop())
+        .filter(Boolean) as string[];
+      setEnabledDatasets(new Set(names));
+    }
+  }, [datasets]);
+
+  const scanMutation = useMutation({
+    mutationFn: () =>
+      post("/maintenance/duplicates/scan", { resetExisting: false }),
+    onSuccess: () => {
+      toast.success("Duplicate scan completed");
+      addNotification({
+        type: "info",
+        title: "Scan Finished",
+        message: "Duplicate scan completed. Refreshing results."
+      });
+      refetch();
+    },
+    onError: (err: any) => {
+      console.error(err);
+      toast.error("Failed to start duplicate scan");
+    }
+  });
+
+  const markNotDuplicateMutation = useMutation({
+    mutationFn: async (ids: number[]) => {
+      await Promise.all(
+        ids.map((id) =>
+          post(`/maintenance/duplicates/${id}/mark`, {
+            status: "reviewed",
+            note: "not_duplicate"
+          })
+        )
+      );
+    },
+    onSuccess: () => {
+      toast.success("Marked as not duplicate");
+      setSelectedGroups(new Set());
+      queryClient.invalidateQueries({ queryKey: ["duplicate-files"] });
+    },
+    onError: (err: any) => {
+      console.error(err);
+      toast.error("Failed to update duplicates");
+    }
+  });
+
+  const deleteMutation = useMutation({
+    mutationFn: async (groups: Array<{ id: number; files: string[] }>) => {
+      if (!groups.length) return;
+      await Promise.all(
+        groups.map((group) =>
+          post(`/maintenance/duplicates/${group.id}/purge`, {
+            files: group.files
+          })
+        )
+      );
+    },
+    onSuccess: () => {
+      toast.success("Selected files deleted");
+      setSelectedFiles(new Set());
+      setSelectedGroups(new Set());
+      setDeleteSelection({ isOpen: false, groups: [] });
+      queryClient.invalidateQueries({ queryKey: ["duplicate-files"] });
+    },
+    onError: (err: any) => {
+      console.error(err);
+      toast.error("Failed to delete selected files");
+    }
+  });
+
+  const filteredData = useMemo(() => {
+    if (!duplicateGroups) return [] as DuplicateGroup[];
+
+    return duplicateGroups
+      .filter((group) => enabledDatasets.has(group.dataset))
+      .filter((group) => {
+        if (!searchTerm) return true;
+        const term = searchTerm.toLowerCase();
+        const inFiles = group.files.some((f) => f.toLowerCase().includes(term));
+        const inDest = group.destination?.toLowerCase().includes(term);
+        return inFiles || inDest || group.hash.toLowerCase().includes(term);
+      })
+      .sort((a, b) => {
+        let aVal: number | string = 0;
+        let bVal: number | string = 0;
+
+        switch (sortField) {
+          case "dataset":
+            aVal = a.dataset;
+            bVal = b.dataset;
+            break;
+          case "count":
+            aVal = a.files.length;
+            bVal = b.files.length;
+            break;
+          case "size":
+            aVal = a.size;
+            bVal = b.size;
+            break;
+          case "created_at":
+            aVal = a.created_at || "";
+            bVal = b.created_at || "";
+            break;
+        }
+
+        if (aVal < bVal) return sortDirection === "asc" ? -1 : 1;
+        if (aVal > bVal) return sortDirection === "asc" ? 1 : -1;
+        return 0;
+      });
+  }, [duplicateGroups, enabledDatasets, searchTerm, sortField, sortDirection]);
+
+  const toggleDataset = (dataset: string) => {
+    const next = new Set(enabledDatasets);
+    if (next.has(dataset)) next.delete(dataset);
+    else next.add(dataset);
+    setEnabledDatasets(next);
+  };
+
+  const toggleExpanded = (id: number) => {
+    const next = new Set(expandedRows);
+    if (next.has(id)) next.delete(id);
+    else next.add(id);
+    setExpandedRows(next);
+  };
+
+  const toggleGroupSelection = (id: number) => {
+    const next = new Set(selectedGroups);
+    if (next.has(id)) next.delete(id);
+    else next.add(id);
+    setSelectedGroups(next);
+  };
+
+  const toggleFileSelection = (groupId: number, filePath: string) => {
+    const key = makeFileKey(groupId, filePath);
+    const next = new Set(selectedFiles);
+    if (next.has(key)) next.delete(key);
+    else next.add(key);
+    setSelectedFiles(next);
+  };
+
+  const selectGroupFiles = (group: DuplicateGroup, checked: boolean) => {
+    const next = new Set(selectedFiles);
+    for (const file of group.files) {
+      const key = makeFileKey(group.id, file);
+      if (checked) next.add(key);
+      else next.delete(key);
+    }
+    setSelectedFiles(next);
+  };
+
+  const handleBatchDelete = () => {
+    if (!duplicateGroups) return;
+    const grouped = new Map<number, string[]>();
+
+    selectedFiles.forEach((key) => {
+      const { groupId, path } = parseFileKey(key);
+      const arr = grouped.get(groupId) || [];
+      arr.push(path);
+      grouped.set(groupId, arr);
+    });
+
+    if (grouped.size === 0) return;
+
+    const groups = Array.from(grouped.entries()).map(([id, files]) => ({
+      id,
+      files
+    }));
+
+    setDeleteSelection({ isOpen: true, groups });
+  };
+
+  const confirmDelete = () => {
+    deleteMutation.mutate(deleteSelection.groups);
+  };
+
+  const markSelectedNotDuplicate = () => {
+    if (selectedGroups.size === 0) return;
+    markNotDuplicateMutation.mutate(Array.from(selectedGroups));
+  };
+
+  if (isLoading && !duplicateGroups) {
+    return <LoadingCard message="Loading duplicate files..." />;
+  }
+
+  if (error) {
+    return (
+      <div className="text-center p-8 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg">
+        <h2 className="text-lg font-semibold text-red-800 dark:text-red-200 mb-2">
+          Failed to load duplicates
+        </h2>
+        <p className="text-gray-600 dark:text-gray-400 mb-4">
+          There was an error loading duplicate file data.
+        </p>
+        <button
+          onClick={() =>
+            queryClient.refetchQueries({ queryKey: ["duplicate-files"] })
+          }
+          className="inline-flex items-center px-4 py-2 text-sm font-medium rounded-md text-white bg-red-600 hover:bg-red-700"
+        >
+          Retry
+        </button>
+      </div>
+    );
+  }
+
+  return (
+    <div className="space-y-6">
+      <div className="flex flex-col gap-4 lg:flex-row lg:items-center lg:justify-between">
+        <div>
+          <h3 className="text-lg font-semibold text-gray-900 dark:text-white">
+            Duplicate Files
+          </h3>
+          <p className="text-sm text-gray-600 dark:text-gray-400">
+            Review detected duplicates, delete unwanted files, or mark them as
+            safe.
+          </p>
+        </div>
+        <div className="flex flex-wrap gap-2">
+          <button
+            onClick={() => scanMutation.mutate()}
+            className="inline-flex items-center gap-2 rounded-md bg-blue-600 px-3 py-2 text-sm font-medium text-white shadow-sm hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2"
+          >
+            <ArrowPathIcon className="h-4 w-4" />
+            Rescan
+          </button>
+          <button
+            onClick={markSelectedNotDuplicate}
+            disabled={selectedGroups.size === 0}
+            className="inline-flex items-center gap-2 rounded-md bg-emerald-600 px-3 py-2 text-sm font-medium text-white shadow-sm disabled:opacity-60 hover:bg-emerald-700 focus:outline-none focus:ring-2 focus:ring-emerald-500 focus:ring-offset-2"
+          >
+            <CheckCircleIcon className="h-4 w-4" />
+            Mark Not Duplicate
+          </button>
+          <button
+            onClick={handleBatchDelete}
+            disabled={selectedFiles.size === 0}
+            className="inline-flex items-center gap-2 rounded-md bg-red-600 px-3 py-2 text-sm font-medium text-white shadow-sm disabled:opacity-60 hover:bg-red-700 focus:outline-none focus:ring-2 focus:ring-red-500 focus:ring-offset-2"
+          >
+            <TrashIcon className="h-4 w-4" />
+            Delete Selected Files
+          </button>
+        </div>
+      </div>
+
+      <div className="rounded-lg border border-gray-200 dark:border-gray-800 bg-white dark:bg-gray-900 p-4 shadow-sm space-y-4">
+        <div className="flex flex-col gap-4 lg:flex-row lg:items-center lg:justify-between">
+          <div className="flex items-center gap-3">
+            <Squares2X2Icon className="h-5 w-5 text-gray-500" />
+            <div>
+              <div className="text-sm text-gray-700 dark:text-gray-300">
+                Showing {filteredData.length} group
+                {filteredData.length === 1 ? "" : "s"}
+              </div>
+              <div className="text-xs text-gray-500 dark:text-gray-400">
+                {selectedFiles.size} file{selectedFiles.size === 1 ? "" : "s"}{" "}
+                selected
+              </div>
+            </div>
+          </div>
+          <div className="flex flex-wrap gap-3">
+            <input
+              type="text"
+              placeholder="Search destination or file path..."
+              value={searchTerm}
+              onChange={(e) => setSearchTerm(e.target.value)}
+              className="w-64 min-w-[200px] rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-800 px-3 py-2 text-sm text-gray-900 dark:text-gray-100 shadow-sm focus:border-blue-500 focus:outline-none focus:ring-2 focus:ring-blue-500"
+            />
+            <select
+              className="rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-800 px-3 py-2 text-sm text-gray-900 dark:text-gray-100 shadow-sm focus:border-blue-500 focus:outline-none focus:ring-2 focus:ring-blue-500"
+              value={sortField}
+              onChange={(e) => setSortField(e.target.value as SortField)}
+            >
+              <option value="count">Most files</option>
+              <option value="size">Largest size</option>
+              <option value="dataset">Dataset</option>
+              <option value="created_at">Newest</option>
+            </select>
+            <button
+              onClick={() =>
+                setSortDirection((prev) => (prev === "asc" ? "desc" : "asc"))
+              }
+              className="rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-800 px-3 py-2 text-sm text-gray-900 dark:text-gray-100 shadow-sm hover:bg-gray-50 dark:hover:bg-gray-700"
+            >
+              {sortDirection === "asc" ? "Asc" : "Desc"}
+            </button>
+          </div>
+        </div>
+
+        <div className="flex flex-wrap gap-2 items-center">
+          <span className="text-sm text-gray-600 dark:text-gray-400">
+            Datasets:
+          </span>
+          {datasets?.map((ds: string) => {
+            const name = ds.split("/").pop();
+            if (!name) return null;
+            return (
+              <label
+                key={name}
+                className="flex items-center gap-2 text-sm text-gray-700 dark:text-gray-300"
+              >
+                <input
+                  type="checkbox"
+                  checked={enabledDatasets.has(name)}
+                  onChange={() => toggleDataset(name)}
+                  className="rounded border-gray-300 dark:border-gray-600 text-blue-600 focus:ring-blue-500"
+                />
+                {name}
+              </label>
+            );
+          })}
+        </div>
+
+        <div className="overflow-x-auto rounded-lg border border-gray-200 dark:border-gray-800 bg-white dark:bg-gray-900">
+          <table className="min-w-full divide-y divide-gray-200 dark:divide-gray-800">
+            <thead className="bg-gray-50 dark:bg-gray-800">
+              <tr>
+                <th className="px-3 py-3 text-left text-xs font-semibold text-gray-700 dark:text-gray-200">
+                  Group
+                </th>
+                <th className="px-3 py-3 text-left text-xs font-semibold text-gray-700 dark:text-gray-200">
+                  Dataset
+                </th>
+                <th className="px-3 py-3 text-left text-xs font-semibold text-gray-700 dark:text-gray-200">
+                  Destination
+                </th>
+                <th className="px-3 py-3 text-left text-xs font-semibold text-gray-700 dark:text-gray-200">
+                  Files
+                </th>
+                <th className="px-3 py-3 text-left text-xs font-semibold text-gray-700 dark:text-gray-200">
+                  Size
+                </th>
+                <th className="px-3 py-3 text-left text-xs font-semibold text-gray-700 dark:text-gray-200">
+                  Hash
+                </th>
+                <th className="px-3 py-3 text-right text-xs font-semibold text-gray-700 dark:text-gray-200">
+                  Actions
+                </th>
+              </tr>
+            </thead>
+            <tbody className="divide-y divide-gray-200 dark:divide-gray-800">
+              {filteredData.length === 0 && (
+                <tr>
+                  <td
+                    colSpan={7}
+                    className="px-3 py-4 text-center text-sm text-gray-500 dark:text-gray-400"
+                  >
+                    No duplicate groups match the current filters.
+                  </td>
+                </tr>
+              )}
+              {filteredData.map((group) => {
+                const isExpanded = expandedRows.has(group.id);
+                const allSelected = group.files.every((f) =>
+                  selectedFiles.has(makeFileKey(group.id, f))
+                );
+                return (
+                  <>
+                    <tr
+                      key={group.id}
+                      className="hover:bg-gray-50 dark:hover:bg-gray-800"
+                    >
+                      <td className="px-3 py-3 text-sm text-gray-700 dark:text-gray-200 whitespace-nowrap">
+                        <button
+                          onClick={() => toggleExpanded(group.id)}
+                          className="mr-2 text-gray-500 hover:text-gray-700 dark:hover:text-gray-300"
+                        >
+                          {isExpanded ? "−" : "+"}
+                        </button>
+                        <input
+                          type="checkbox"
+                          checked={selectedGroups.has(group.id)}
+                          onChange={() => toggleGroupSelection(group.id)}
+                          className="rounded border-gray-300 dark:border-gray-600 text-blue-600 focus:ring-blue-500"
+                        />
+                      </td>
+                      <td className="px-3 py-3 text-sm text-gray-900 dark:text-gray-100 whitespace-nowrap">
+                        <span className="px-2 py-1 bg-blue-100 dark:bg-blue-900 text-blue-800 dark:text-blue-200 rounded text-xs font-medium">
+                          {group.dataset}
+                        </span>
+                      </td>
+                      <td className="px-3 py-3 text-sm text-gray-700 dark:text-gray-300 max-w-xs">
+                        <div className="truncate" title={group.destination}>
+                          {group.destination}
+                        </div>
+                      </td>
+                      <td className="px-3 py-3 text-sm text-gray-700 dark:text-gray-300">
+                        {group.files.length}
+                      </td>
+                      <td className="px-3 py-3 text-sm text-gray-700 dark:text-gray-300">
+                        {formatBytes(group.size)}
+                      </td>
+                      <td className="px-3 py-3 text-sm text-gray-500 dark:text-gray-400">
+                        {group.hash.slice(0, 10)}…
+                      </td>
+                      <td className="px-3 py-3 text-sm text-right whitespace-nowrap">
+                        <div className="inline-flex items-center gap-1">
+                          <button
+                            onClick={() =>
+                              markNotDuplicateMutation.mutate([group.id])
+                            }
+                            className="inline-flex items-center gap-1 rounded-md border border-gray-300 dark:border-gray-700 px-3 py-1 text-xs font-medium text-gray-700 dark:text-gray-200 hover:bg-gray-50 dark:hover:bg-gray-800"
+                          >
+                            <EyeSlashIcon className="h-4 w-4" /> Ignore
+                          </button>
+                          <button
+                            onClick={() =>
+                              selectGroupFiles(group, !allSelected)
+                            }
+                            className="inline-flex items-center gap-1 rounded-md border border-gray-300 dark:border-gray-700 px-3 py-1 text-xs font-medium text-gray-700 dark:text-gray-200 hover:bg-gray-50 dark:hover:bg-gray-800"
+                          >
+                            {allSelected ? "Unselect files" : "Select files"}
+                          </button>
+                        </div>
+                      </td>
+                    </tr>
+                    {isExpanded && (
+                      <tr
+                        key={`${group.id}-expanded`}
+                        className="bg-gray-50 dark:bg-gray-800/60"
+                      >
+                        <td colSpan={7} className="px-3 py-3">
+                          <div className="flex items-center justify-between mb-2">
+                            <div className="text-sm font-medium text-gray-800 dark:text-gray-100">
+                              Files in group
+                            </div>
+                            <div className="flex items-center gap-2 text-xs text-gray-600 dark:text-gray-400">
+                              <span>{selectedFiles.size} selected</span>
+                              <button
+                                onClick={() => selectGroupFiles(group, true)}
+                                className="rounded-md border border-gray-300 dark:border-gray-700 px-2 py-1 hover:bg-gray-100 dark:hover:bg-gray-700"
+                              >
+                                Select all
+                              </button>
+                              <button
+                                onClick={() => selectGroupFiles(group, false)}
+                                className="rounded-md border border-gray-300 dark:border-gray-700 px-2 py-1 hover:bg-gray-100 dark:hover:bg-gray-700"
+                              >
+                                Clear
+                              </button>
+                            </div>
+                          </div>
+                          <div className="space-y-2 max-h-64 overflow-y-auto pr-1">
+                            {group.files.map((file) => {
+                              const key = makeFileKey(group.id, file);
+                              return (
+                                <label
+                                  key={key}
+                                  className="flex items-center gap-3 rounded-md border border-gray-200 dark:border-gray-700 bg-white dark:bg-gray-900 px-3 py-2 text-sm text-gray-800 dark:text-gray-200"
+                                >
+                                  <input
+                                    type="checkbox"
+                                    checked={selectedFiles.has(key)}
+                                    onChange={() =>
+                                      toggleFileSelection(group.id, file)
+                                    }
+                                    className="rounded border-gray-300 dark:border-gray-600 text-blue-600 focus:ring-blue-500"
+                                  />
+                                  <span className="truncate" title={file}>
+                                    {file}
+                                  </span>
+                                </label>
+                              );
+                            })}
+                          </div>
+                        </td>
+                      </tr>
+                    )}
+                  </>
+                );
+              })}
+            </tbody>
+          </table>
+        </div>
+      </div>
+
+      <ConfirmationDialog
+        isOpen={deleteSelection.isOpen}
+        title="Delete selected files"
+        message="These files will be removed from disk. This action cannot be undone."
+        confirmText="Delete"
+        cancelText="Cancel"
+        type="danger"
+        isLoading={deleteMutation.isPending}
+        onConfirm={confirmDelete}
+        onClose={() => setDeleteSelection({ isOpen: false, groups: [] })}
+      />
+    </div>
+  );
+}

+ 9 - 0
apps/web/src/app/duplicates/page.tsx

@@ -0,0 +1,9 @@
+import DuplicateList from "./DuplicateList";
+
+export default function DuplicatesPage() {
+  return (
+    <div className="max-w-7xl mx-auto px-6 py-8 lg:px-8">
+      <DuplicateList />
+    </div>
+  );
+}

+ 31 - 31
apps/web/src/app/providers/AppContext.tsx

@@ -110,20 +110,47 @@ export function AppProvider({ children }: { children: ReactNode }) {
     initializeData();
   }, [initializeData]);
 
+  const refreshSettings = useCallback(async () => {
+    try {
+      const settingsData = await get("/config/settings");
+      setSettings(settingsData || {});
+      if (settingsData) {
+        setQueueConfig(settingsData.queue || {});
+        setWatcherConfig(settingsData.watcher || {});
+        setDatasetsConfig(settingsData.datasets || {});
+      }
+    } catch (err) {
+      const error = err instanceof Error ? err : new Error(String(err));
+      setError(error);
+      throw error;
+    }
+  }, []);
+
+  const refreshDatasets = useCallback(async () => {
+    try {
+      const datasetsList = await get("/files/all-datasets");
+      setDatasets(datasetsList || []);
+    } catch (err) {
+      const error = err instanceof Error ? err : new Error(String(err));
+      setError(error);
+      throw error;
+    }
+  }, []);
+
   // Listen for WebSocket events
   useEffect(() => {
     const handleSettingsUpdate = () => {
-      initializeData();
+      refreshSettings();
     };
 
     const handleTaskUpdate = () => {
       // Refetch datasets in case new ones were created
-      initializeData();
+      refreshDatasets();
     };
 
     const handleFileUpdate = () => {
       // Refetch datasets in case new ones were created
-      initializeData();
+      refreshDatasets();
     };
 
     window.addEventListener(
@@ -147,7 +174,7 @@ export function AppProvider({ children }: { children: ReactNode }) {
         handleFileUpdate as EventListener
       );
     };
-  }, [initializeData]);
+  }, [refreshSettings, refreshDatasets]);
 
   // Mutation functions
   const updateSetting = useCallback(
@@ -229,33 +256,6 @@ export function AppProvider({ children }: { children: ReactNode }) {
     [updateSetting]
   );
 
-  const refreshSettings = useCallback(async () => {
-    try {
-      const settingsData = await get("/config/settings");
-      setSettings(settingsData || {});
-      if (settingsData) {
-        setQueueConfig(settingsData.queue || {});
-        setWatcherConfig(settingsData.watcher || {});
-        setDatasetsConfig(settingsData.datasets || {});
-      }
-    } catch (err) {
-      const error = err instanceof Error ? err : new Error(String(err));
-      setError(error);
-      throw error;
-    }
-  }, []);
-
-  const refreshDatasets = useCallback(async () => {
-    try {
-      const datasetsList = await get("/files/all-datasets");
-      setDatasets(datasetsList || []);
-    } catch (err) {
-      const error = err instanceof Error ? err : new Error(String(err));
-      setError(error);
-      throw error;
-    }
-  }, []);
-
   const refreshAll = useCallback(async () => {
     await initializeData();
   }, [initializeData]);

+ 30 - 7
apps/web/src/lib/websocket.ts

@@ -5,22 +5,45 @@ class WebSocketService {
   private reconnectAttempts = 0;
   private maxReconnectAttempts = 5;
 
+  private toWsUrl(base: string) {
+    if (!base) return base;
+    if (base.startsWith("ws://") || base.startsWith("wss://")) return base;
+
+    try {
+      const url = new URL(
+        base,
+        typeof window !== "undefined"
+          ? window.location.origin
+          : "http://localhost:3000"
+      );
+      url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
+      return url.toString();
+    } catch (err) {
+      console.warn("Failed to build WS URL, falling back to base", err);
+      return base;
+    }
+  }
+
   connect(url?: string) {
-    // Use relative path for proxy support, or explicit URL if provided
+    const envWs = process.env.NEXT_PUBLIC_WATCH_FINISHED_WS;
     const envApi = process.env.NEXT_PUBLIC_WATCH_FINISHED_API;
-    const isLocalEnv =
-      envApi?.includes("localhost") || envApi?.includes("127.0.0.1");
     const isBrowser = typeof window !== "undefined";
     const hostIsLocal =
       !isBrowser ||
       window.location.hostname === "localhost" ||
       window.location.hostname === "127.0.0.1";
+    const envApiIsLocal =
+      envApi?.includes("localhost") || envApi?.includes("127.0.0.1");
 
-    // Use env API when provided and not pointing at localhost from a remote page; otherwise, stick to current origin.
-    const defaultUrl =
-      envApi && (!isLocalEnv || hostIsLocal) ? envApi : window.location.origin;
+    // Prefer explicit WS host; otherwise use API host (converted to ws/wss).
+    // If API host is localhost but the page is remote, fall back to current origin (allows reverse proxy handling).
+    const baseUrl =
+      url ||
+      envWs ||
+      (envApi && !(envApiIsLocal && !hostIsLocal) ? envApi : undefined) ||
+      (isBrowser ? window.location.origin : "http://localhost:3000");
 
-    const wsUrl = url || defaultUrl;
+    const wsUrl = this.toWsUrl(baseUrl);
 
     if (this.socket?.connected) {
       return this.socket;

+ 2 - 0
data/README.md

@@ -145,6 +145,7 @@ pnpm run migrate:create <migration_name>
 When you need to make schema changes:
 
 1. Create a new migration file:
+
    ```bash
    pnpm run migrate:create add_new_table
    ```
@@ -152,6 +153,7 @@ When you need to make schema changes:
 2. Edit the generated SQL file in `data/migrations/` with your schema changes.
 
 3. Test the migration by running it:
+
    ```bash
    pnpm run migrate:up
    ```

+ 23 - 23
scripts/migrate.ts

@@ -1,31 +1,31 @@
 #!/usr/bin/env tsx
 
-import Database from 'better-sqlite3';
-import fs from 'fs';
-import path from 'path';
-import { MigrationRunner } from '../apps/service/src/migration-runner';
+import Database from "better-sqlite3";
+import fs from "fs";
+import path from "path";
+import { MigrationRunner } from "../apps/service/src/migration-runner";
 
 const args = process.argv.slice(2);
 const command = args[0];
 
 if (!command) {
-  console.log('Usage: migrate <command>');
-  console.log('Commands:');
-  console.log('  up          - Apply all pending migrations');
-  console.log('  create <name> - Create a new migration file');
-  console.log('  status      - Show migration status');
+  console.log("Usage: migrate <command>");
+  console.log("Commands:");
+  console.log("  up          - Apply all pending migrations");
+  console.log("  create <name> - Create a new migration file");
+  console.log("  status      - Show migration status");
   process.exit(1);
 }
 
 // Find project root
 let projectRoot = process.cwd();
 while (projectRoot !== path.dirname(projectRoot)) {
-  if (fs.existsSync(path.join(projectRoot, 'package.json'))) {
+  if (fs.existsSync(path.join(projectRoot, "package.json"))) {
     try {
       const pkg = JSON.parse(
-        fs.readFileSync(path.join(projectRoot, 'package.json'), 'utf-8'),
+        fs.readFileSync(path.join(projectRoot, "package.json"), "utf-8")
       );
-      if (pkg.name === 'watch-finished-turbo') {
+      if (pkg.name === "watch-finished-turbo") {
         break;
       }
     } catch (e) {
@@ -35,8 +35,8 @@ while (projectRoot !== path.dirname(projectRoot)) {
   projectRoot = path.dirname(projectRoot);
 }
 
-const dbPath = path.resolve(projectRoot, 'data/database.db');
-const migrationsDir = path.resolve(projectRoot, 'data/migrations');
+const dbPath = path.resolve(projectRoot, "data/database.db");
+const migrationsDir = path.resolve(projectRoot, "data/migrations");
 
 // Ensure directories exist
 if (!fs.existsSync(path.dirname(dbPath))) {
@@ -51,33 +51,33 @@ const runner = new MigrationRunner(db, migrationsDir);
 
 try {
   switch (command) {
-    case 'up':
+    case "up":
       runner.applyPendingMigrations();
       break;
 
-    case 'create':
+    case "create":
       const name = args[1];
       if (!name) {
-        console.error('Migration name is required');
+        console.error("Migration name is required");
         process.exit(1);
       }
       runner.createMigration(name);
       break;
 
-    case 'status':
+    case "status":
       runner.init();
       const applied = runner.getAppliedMigrations();
       const available = runner.getAvailableMigrations();
-      const pending = available.filter(m => !applied.includes(m));
+      const pending = available.filter((m) => !applied.includes(m));
 
-      console.log('Migration Status:');
+      console.log("Migration Status:");
       console.log(`Applied: ${applied.length}`);
       console.log(`Available: ${available.length}`);
       console.log(`Pending: ${pending.length}`);
 
       if (pending.length > 0) {
-        console.log('\nPending migrations:');
-        pending.forEach(m => console.log(`  - ${m}`));
+        console.log("\nPending migrations:");
+        pending.forEach((m) => console.log(`  - ${m}`));
       }
       break;
 
@@ -87,4 +87,4 @@ try {
   }
 } finally {
   db.close();
-}
+}