| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205 |
- #!/usr/bin/env node
- /**
- * Example script demonstrating the new duplicate detection optimization
- *
- * This shows how to:
- * 1. Index destination files for fast duplicate detection
- * 2. Query duplicate statistics
- * 3. Run duplicate scans using the database
- */
- const API_BASE = process.env.API_BASE || "http://localhost:3000";
- async function indexDestination(dataset, destination) {
- console.log(`\n📁 Indexing ${dataset} destination: ${destination}`);
- const response = await fetch(`${API_BASE}/maintenance/index/destination`, {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify({
- dataset,
- destination,
- reindex: false, // Set to true to rebuild index
- batchSize: 100,
- }),
- });
- const result = await response.json();
- console.log(
- `✅ Indexed: ${result.indexed}, Skipped: ${result.skipped}, Errors: ${result.errors}`
- );
- return result;
- }
- async function getIndexCount(dataset, destination) {
- console.log(`\n📊 Getting index count for ${dataset}`);
- const params = new URLSearchParams({ dataset });
- if (destination) params.append("destination", destination);
- const response = await fetch(`${API_BASE}/maintenance/index/count?${params}`);
- const result = await response.json();
- console.log(`📈 Indexed files: ${result.count}`);
- return result;
- }
- async function getDuplicateStats(dataset) {
- console.log(
- `\n🔍 Getting duplicate statistics for ${dataset || "all datasets"}`
- );
- const params = dataset ? `?dataset=${dataset}` : "";
- const response = await fetch(`${API_BASE}/maintenance/index/stats${params}`);
- const result = await response.json();
- console.log(`🔄 Total duplicate groups: ${result.totalDuplicates}`);
- if (result.duplicatesByDataset.length > 0) {
- console.log("\nDuplicate groups:");
- result.duplicatesByDataset.slice(0, 5).forEach((dup, idx) => {
- console.log(`\n Group ${idx + 1}:`);
- console.log(` Hash: ${dup.hash.substring(0, 16)}...`);
- console.log(` Size: ${(dup.file_size / 1024 / 1024).toFixed(2)} MB`);
- console.log(` Count: ${dup.file_count} files`);
- console.log(` Files:`);
- dup.files.forEach((file) => {
- console.log(` - ${file}`);
- });
- });
- if (result.duplicatesByDataset.length > 5) {
- console.log(
- `\n ... and ${result.duplicatesByDataset.length - 5} more groups`
- );
- }
- }
- return result;
- }
- async function scanDuplicates(resetExisting = false) {
- console.log(`\n🔎 Scanning for duplicates (reset: ${resetExisting})`);
- const response = await fetch(`${API_BASE}/maintenance/duplicates/scan`, {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify({ resetExisting }),
- });
- const result = await response.json();
- console.log("✅ Duplicate scan completed");
- return result;
- }
- async function clearIndex(dataset, destination) {
- console.log(`\n🗑️ Clearing index for ${dataset}`);
- const params = destination ? `?destination=${destination}` : "";
- const response = await fetch(
- `${API_BASE}/maintenance/index/${dataset}${params}`,
- {
- method: "DELETE",
- }
- );
- const result = await response.json();
- console.log(`🗑️ Cleared ${result.cleared} entries`);
- return result;
- }
- async function reindexDestination(dataset, destination) {
- console.log(`\n🔄 Re-indexing ${dataset} destination: ${destination}`);
- const response = await fetch(`${API_BASE}/maintenance/index/destination`, {
- method: "POST",
- headers: { "Content-Type": "application/json" },
- body: JSON.stringify({
- dataset,
- destination,
- reindex: true, // Force rebuild
- batchSize: 100,
- }),
- });
- const result = await response.json();
- console.log(
- `✅ Re-indexed: ${result.indexed}, Skipped: ${result.skipped}, Errors: ${result.errors}`
- );
- return result;
- }
- // Example workflow
- async function exampleWorkflow() {
- console.log("🚀 Duplicate Detection Optimization - Example Workflow\n");
- console.log(`Using API: ${API_BASE}\n`);
- try {
- // Example 1: Index a destination directory
- console.log("═".repeat(60));
- console.log("Example 1: Index destination files");
- console.log("═".repeat(60));
- // Uncomment and modify these lines with your actual paths:
- // await indexDestination('movies', '/path/to/movies/destination');
- // await indexDestination('tvshows', '/path/to/tvshows/destination');
- console.log(
- "\nℹ️ Uncomment the indexDestination calls in the script to run this example"
- );
- // Example 2: Check index count
- console.log("\n" + "═".repeat(60));
- console.log("Example 2: Check index count");
- console.log("═".repeat(60));
- // await getIndexCount('movies');
- console.log(
- "\nℹ️ Uncomment the getIndexCount call in the script to run this example"
- );
- // Example 3: Get duplicate statistics
- console.log("\n" + "═".repeat(60));
- console.log("Example 3: Get duplicate statistics");
- console.log("═".repeat(60));
- // await getDuplicateStats('movies');
- console.log(
- "\nℹ️ Uncomment the getDuplicateStats call in the script to run this example"
- );
- // Example 4: Run duplicate scan (uses database)
- console.log("\n" + "═".repeat(60));
- console.log("Example 4: Run duplicate scan");
- console.log("═".repeat(60));
- // await scanDuplicates(false);
- console.log(
- "\nℹ️ Uncomment the scanDuplicates call in the script to run this example"
- );
- // Example 5: Re-index (clear and rebuild)
- console.log("\n" + "═".repeat(60));
- console.log("Example 5: Re-index destination");
- console.log("═".repeat(60));
- // await clearIndex('movies');
- // await indexDestination('movies', '/path/to/movies/destination');
- console.log(
- "\nℹ️ Uncomment the clearIndex and indexDestination calls in the script to run this example"
- );
- console.log("\n" + "═".repeat(60));
- console.log("✨ Workflow complete!");
- console.log("═".repeat(60));
- } catch (error) {
- console.error("\n❌ Error:", error.message);
- if (error.cause) {
- console.error("Cause:", error.cause);
- }
- }
- }
- // Run the workflow
- exampleWorkflow().catch(console.error);
|