#!/usr/bin/env node /** * Example script demonstrating the new duplicate detection optimization * * This shows how to: * 1. Index destination files for fast duplicate detection * 2. Query duplicate statistics * 3. Run duplicate scans using the database */ const API_BASE = process.env.API_BASE || "http://localhost:3000"; async function indexDestination(dataset, destination) { console.log(`\nšŸ“ Indexing ${dataset} destination: ${destination}`); const response = await fetch(`${API_BASE}/maintenance/index/destination`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ dataset, destination, reindex: false, // Set to true to rebuild index batchSize: 100, }), }); const result = await response.json(); console.log( `āœ… Indexed: ${result.indexed}, Skipped: ${result.skipped}, Errors: ${result.errors}` ); return result; } async function getIndexCount(dataset, destination) { console.log(`\nšŸ“Š Getting index count for ${dataset}`); const params = new URLSearchParams({ dataset }); if (destination) params.append("destination", destination); const response = await fetch(`${API_BASE}/maintenance/index/count?${params}`); const result = await response.json(); console.log(`šŸ“ˆ Indexed files: ${result.count}`); return result; } async function getDuplicateStats(dataset) { console.log( `\nšŸ” Getting duplicate statistics for ${dataset || "all datasets"}` ); const params = dataset ? `?dataset=${dataset}` : ""; const response = await fetch(`${API_BASE}/maintenance/index/stats${params}`); const result = await response.json(); console.log(`šŸ”„ Total duplicate groups: ${result.totalDuplicates}`); if (result.duplicatesByDataset.length > 0) { console.log("\nDuplicate groups:"); result.duplicatesByDataset.slice(0, 5).forEach((dup, idx) => { console.log(`\n Group ${idx + 1}:`); console.log(` Hash: ${dup.hash.substring(0, 16)}...`); console.log(` Size: ${(dup.file_size / 1024 / 1024).toFixed(2)} MB`); console.log(` Count: ${dup.file_count} files`); console.log(` Files:`); dup.files.forEach((file) => { console.log(` - ${file}`); }); }); if (result.duplicatesByDataset.length > 5) { console.log( `\n ... and ${result.duplicatesByDataset.length - 5} more groups` ); } } return result; } async function scanDuplicates(resetExisting = false) { console.log(`\nšŸ”Ž Scanning for duplicates (reset: ${resetExisting})`); const response = await fetch(`${API_BASE}/maintenance/duplicates/scan`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ resetExisting }), }); const result = await response.json(); console.log("āœ… Duplicate scan completed"); return result; } async function clearIndex(dataset, destination) { console.log(`\nšŸ—‘ļø Clearing index for ${dataset}`); const params = destination ? `?destination=${destination}` : ""; const response = await fetch( `${API_BASE}/maintenance/index/${dataset}${params}`, { method: "DELETE", } ); const result = await response.json(); console.log(`šŸ—‘ļø Cleared ${result.cleared} entries`); return result; } async function reindexDestination(dataset, destination) { console.log(`\nšŸ”„ Re-indexing ${dataset} destination: ${destination}`); const response = await fetch(`${API_BASE}/maintenance/index/destination`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ dataset, destination, reindex: true, // Force rebuild batchSize: 100, }), }); const result = await response.json(); console.log( `āœ… Re-indexed: ${result.indexed}, Skipped: ${result.skipped}, Errors: ${result.errors}` ); return result; } // Example workflow async function exampleWorkflow() { console.log("šŸš€ Duplicate Detection Optimization - Example Workflow\n"); console.log(`Using API: ${API_BASE}\n`); try { // Example 1: Index a destination directory console.log("═".repeat(60)); console.log("Example 1: Index destination files"); console.log("═".repeat(60)); // Uncomment and modify these lines with your actual paths: // await indexDestination('movies', '/path/to/movies/destination'); // await indexDestination('tvshows', '/path/to/tvshows/destination'); console.log( "\nā„¹ļø Uncomment the indexDestination calls in the script to run this example" ); // Example 2: Check index count console.log("\n" + "═".repeat(60)); console.log("Example 2: Check index count"); console.log("═".repeat(60)); // await getIndexCount('movies'); console.log( "\nā„¹ļø Uncomment the getIndexCount call in the script to run this example" ); // Example 3: Get duplicate statistics console.log("\n" + "═".repeat(60)); console.log("Example 3: Get duplicate statistics"); console.log("═".repeat(60)); // await getDuplicateStats('movies'); console.log( "\nā„¹ļø Uncomment the getDuplicateStats call in the script to run this example" ); // Example 4: Run duplicate scan (uses database) console.log("\n" + "═".repeat(60)); console.log("Example 4: Run duplicate scan"); console.log("═".repeat(60)); // await scanDuplicates(false); console.log( "\nā„¹ļø Uncomment the scanDuplicates call in the script to run this example" ); // Example 5: Re-index (clear and rebuild) console.log("\n" + "═".repeat(60)); console.log("Example 5: Re-index destination"); console.log("═".repeat(60)); // await clearIndex('movies'); // await indexDestination('movies', '/path/to/movies/destination'); console.log( "\nā„¹ļø Uncomment the clearIndex and indexDestination calls in the script to run this example" ); console.log("\n" + "═".repeat(60)); console.log("✨ Workflow complete!"); console.log("═".repeat(60)); } catch (error) { console.error("\nāŒ Error:", error.message); if (error.cause) { console.error("Cause:", error.cause); } } } // Run the workflow exampleWorkflow().catch(console.error);