|
|
@@ -2,6 +2,7 @@ import { Injectable, Logger } from '@nestjs/common';
|
|
|
import { Cron, CronExpression } from '@nestjs/schedule';
|
|
|
import crypto from 'crypto';
|
|
|
import fs from 'fs';
|
|
|
+import fsPromises from 'fs/promises';
|
|
|
import path from 'path';
|
|
|
import { DatasetsService } from './datasets.service';
|
|
|
import { DbService } from './db.service';
|
|
|
@@ -113,7 +114,7 @@ export class MaintenanceService {
|
|
|
* Scan destination folders (as defined in settings.datasets) for duplicate files.
|
|
|
* Duplicates are recorded in the duplicate_files table for manual review.
|
|
|
*/
|
|
|
- findDuplicateFiles(options: { resetExisting?: boolean } = {}) {
|
|
|
+ async findDuplicateFiles(options: { resetExisting?: boolean } = {}) {
|
|
|
const { resetExisting = false } = options;
|
|
|
|
|
|
this.logger.log('Starting duplicate file scan');
|
|
|
@@ -168,8 +169,8 @@ export class MaintenanceService {
|
|
|
}
|
|
|
|
|
|
this.logger.log(`Scanning destination: ${destination}`);
|
|
|
- const groups = this.scanDestinationForDuplicates(destination);
|
|
|
- this.scanForSimilarNames(destination);
|
|
|
+ const groups = await this.scanDestinationForDuplicates(destination);
|
|
|
+ await this.scanForSimilarNames(destination);
|
|
|
for (const group of groups) {
|
|
|
const entry = {
|
|
|
dataset: datasetName,
|
|
|
@@ -232,7 +233,7 @@ export class MaintenanceService {
|
|
|
return destinations;
|
|
|
}
|
|
|
|
|
|
- private scanDestinationForDuplicates(destination: string) {
|
|
|
+ private async scanDestinationForDuplicates(destination: string) {
|
|
|
const files = this.walkFiles(destination);
|
|
|
this.logger.log(`Found ${files.length} files to scan in ${destination}`);
|
|
|
const groups = new Map<string, { size: number; files: string[] }>();
|
|
|
@@ -240,10 +241,10 @@ export class MaintenanceService {
|
|
|
|
|
|
for (const filePath of files) {
|
|
|
try {
|
|
|
- const stat = fs.statSync(filePath);
|
|
|
+ const stat = await fsPromises.stat(filePath);
|
|
|
if (!stat.isFile()) continue;
|
|
|
|
|
|
- const hash = this.hashFile(filePath);
|
|
|
+ const hash = await this.hashFileAsync(filePath);
|
|
|
if (hash) {
|
|
|
const key = `${hash}:${stat.size}`;
|
|
|
const group = groups.get(key) || { size: stat.size, files: [] };
|
|
|
@@ -274,7 +275,7 @@ export class MaintenanceService {
|
|
|
}));
|
|
|
}
|
|
|
|
|
|
- private scanForSimilarNames(destination: string) {
|
|
|
+ private async scanForSimilarNames(destination: string) {
|
|
|
const files = this.walkFiles(destination);
|
|
|
this.logger.log(
|
|
|
`Checking ${files.length} files for similar names in ${destination}`,
|
|
|
@@ -284,7 +285,7 @@ export class MaintenanceService {
|
|
|
|
|
|
for (const filePath of files) {
|
|
|
try {
|
|
|
- const stat = fs.statSync(filePath);
|
|
|
+ const stat = await fsPromises.stat(filePath);
|
|
|
if (!stat.isFile()) continue;
|
|
|
|
|
|
const baseName = path
|
|
|
@@ -356,10 +357,10 @@ export class MaintenanceService {
|
|
|
return files;
|
|
|
}
|
|
|
|
|
|
- private hashFile(filePath: string): string | null {
|
|
|
+ private async hashFileAsync(filePath: string): Promise<string | null> {
|
|
|
try {
|
|
|
+ const data = await fsPromises.readFile(filePath);
|
|
|
const hash = crypto.createHash('sha1');
|
|
|
- const data = fs.readFileSync(filePath);
|
|
|
hash.update(data);
|
|
|
return hash.digest('hex');
|
|
|
} catch (error) {
|