"use client"; import { ArrowPathIcon, ChartBarIcon, FolderIcon, TrashIcon, } from "@heroicons/react/24/outline"; import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; import { useEffect, useState } from "react"; import toast from "react-hot-toast"; import { del, get, post } from "../../lib/api"; import LoadingCard from "../components/Loading"; import { useAppContext } from "../providers/AppContext"; interface IndexStats { totalDuplicates: number; duplicatesByDataset: Array<{ dataset: string; hash: string; file_size: number; file_count: number; files: string[]; }>; } interface IndexCount { count: number; } export default function IndexManagementPage() { const queryClient = useQueryClient(); const { datasets, datasetsConfig } = useAppContext(); const [selectedDataset, setSelectedDataset] = useState(""); const [destinationPath, setDestinationPath] = useState(""); const [batchSize, setBatchSize] = useState(100); const datasetNames = datasets ? datasets.map((p: string) => p.split("/").pop()).filter(Boolean) : []; // Auto-populate destination path from dataset configuration when a dataset is selected // We mimic the backend collector: prefer top-level destination, otherwise any nested config with a destination key useEffect(() => { if (!selectedDataset || !datasetsConfig) return; const cfg = datasetsConfig[selectedDataset]; if (!cfg) return; const tryFindDestination = (obj: any): string | undefined => { if (!obj || typeof obj !== "object") return undefined; if (typeof obj.destination === "string" && obj.destination.trim()) { return obj.destination as string; } for (const value of Object.values(obj)) { if ( value && typeof value === "object" && typeof value.destination === "string" ) { if (value.destination.trim()) return value.destination as string; } } return undefined; }; const destination = tryFindDestination(cfg); if (destination && destination !== destinationPath) { setDestinationPath(destination); } }, [selectedDataset, datasetsConfig, destinationPath]); // Get index count for selected dataset const { data: indexCount, isLoading: isLoadingCount, refetch: refetchCount, } = useQuery({ queryKey: ["index-count", selectedDataset], queryFn: async () => selectedDataset ? get("/maintenance/index/count", { dataset: selectedDataset }) : { count: 0 }, enabled: !!selectedDataset, }); // Get duplicate stats const { data: stats, isLoading: isLoadingStats, refetch: refetchStats, } = useQuery({ queryKey: ["index-stats", selectedDataset], queryFn: async () => { const params = selectedDataset ? { dataset: selectedDataset } : undefined; return get("/maintenance/index/stats", params); }, }); // Index destination mutation const indexMutation = useMutation({ mutationFn: async ({ dataset, destination, reindex, }: { dataset: string; destination: string; reindex: boolean; }) => post("/maintenance/index/destination", { dataset, destination, reindex, batchSize, }), onSuccess: (data) => { toast.success( `✅ Indexed: ${data.indexed}, Skipped: ${data.skipped}, Errors: ${data.errors}` ); refetchCount(); refetchStats(); }, onError: (err: any) => { console.error(err); toast.error("Failed to index destination"); }, }); // Clear index mutation const clearMutation = useMutation({ mutationFn: async (dataset: string) => del(`/maintenance/index/${dataset}`), onSuccess: (data) => { toast.success(`🗑️ Cleared ${data.cleared} index entries`); refetchCount(); refetchStats(); }, onError: (err: any) => { console.error(err); toast.error("Failed to clear index"); }, }); const handleIndex = (reindex: boolean) => { if (!selectedDataset) { toast.error("Please select a dataset"); return; } if (!destinationPath) { toast.error("Please enter a destination path"); return; } indexMutation.mutate({ dataset: selectedDataset, destination: destinationPath, reindex, }); }; const handleClear = () => { if (!selectedDataset) { toast.error("Please select a dataset"); return; } if (confirm(`Clear all index entries for ${selectedDataset}?`)) { clearMutation.mutate(selectedDataset); } }; const formatBytes = (bytes: number) => { if (!bytes) return "0 B"; const sizes = ["B", "KB", "MB", "GB", "TB"]; const i = Math.floor(Math.log(bytes) / Math.log(1024)); return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${sizes[i]}`; }; return (

Index Management

Index destination files for fast duplicate detection

{/* Index Controls */}

Index Destination

setDestinationPath(e.target.value)} placeholder="/path/to/destination" className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-700 dark:text-gray-100" />
setBatchSize(parseInt(e.target.value))} min="10" max="1000" className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-700 dark:text-gray-100" />

Number of files to process at once

{/* Index Stats */} {selectedDataset && (

Index Statistics

{isLoadingCount ? ( ) : (
Indexed Files {indexCount?.count || 0}
)}
)} {/* Duplicate Stats */}

Duplicate Statistics

{isLoadingStats ? ( ) : stats && stats.totalDuplicates > 0 ? (
Total Duplicate Groups {stats.totalDuplicates}
{stats.duplicatesByDataset.slice(0, 10).map((dup, idx) => (
[{dup.dataset}] {dup.file_count} files {formatBytes(dup.file_size)}
Hash: {dup.hash.substring(0, 32)}...
{dup.files.map((file, fileIdx) => (
• {file}
))}
))}
{stats.duplicatesByDataset.length > 10 && (

... and {stats.duplicatesByDataset.length - 10} more duplicate groups

)}
) : (

No duplicates found in indexed files

)}
); }