103 lines
3.0 KiB
JavaScript

#!/usr/bin/env node
/**
* Deduplicate files in uploads/ by content hash.
* Keeps the first file per hash, removes later duplicates, and deletes matching Photo docs.
* Dry run by default; set APPLY=1 to actually delete.
*/
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');
const { pipeline } = require('stream/promises');
const mongoose = require('mongoose');
const Photo = require('../models/photo');
const MONGO_URI = process.env.MONGO_URI || 'mongodb://localhost:27017/photogallery';
const APPLY = process.env.APPLY === '1';
const UPLOAD_DIR = path.join(__dirname, '..', 'uploads');
async function hashFile(filePath) {
const hash = crypto.createHash('sha256');
const stream = fs.createReadStream(filePath);
await pipeline(stream, hash);
return hash.digest('hex');
}
function walkFiles(dir) {
const results = [];
for (const entry of fs.readdirSync(dir)) {
const full = path.join(dir, entry);
const stat = fs.statSync(full);
if (stat.isDirectory()) {
results.push(...walkFiles(full));
} else {
results.push(full);
}
}
return results;
}
async function main() {
const files = walkFiles(UPLOAD_DIR).filter(f => f.toLowerCase().endsWith('.webp'));
console.log(`Scanning ${files.length} webp files...`);
const hashMap = new Map();
const dupes = [];
for (const file of files) {
const h = await hashFile(file);
if (!hashMap.has(h)) {
hashMap.set(h, file);
} else {
dupes.push({ hash: h, keep: hashMap.get(h), remove: file });
}
}
if (!dupes.length) {
console.log('No duplicate content found.');
return;
}
console.log(`Found ${dupes.length} duplicate files (by content).`);
dupes.slice(0, 10).forEach(d =>
console.log(`Hash ${d.hash.slice(0, 12)} keep=${path.basename(d.keep)} remove=${path.basename(d.remove)}`)
);
if (!APPLY) {
console.log('Dry run. Set APPLY=1 to delete duplicates and matching Photo docs.');
return;
}
await mongoose.connect(MONGO_URI);
console.log(`Connected to Mongo: ${MONGO_URI}`);
let removedFiles = 0;
let removedDocs = 0;
for (const d of dupes) {
try {
fs.unlinkSync(d.remove);
removedFiles++;
} catch (err) {
console.error('Failed to delete file', d.remove, err.message);
}
const relPath = path.relative(path.join(__dirname, '..'), d.remove).replace(/\\/g, '/');
const filename = path.basename(d.remove);
const res = await Photo.deleteMany({
$or: [
{ path: relPath },
{ filename }
]
});
removedDocs += res.deletedCount || 0;
}
console.log(`Deleted ${removedFiles} duplicate files.`);
console.log(`Deleted ${removedDocs} Photo docs matching removed files.`);
await mongoose.disconnect();
console.log('Done.');
}
main().catch(err => {
console.error(err);
process.exit(1);
});