103 lines
3.0 KiB
JavaScript
103 lines
3.0 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Deduplicate files in uploads/ by content hash.
|
|
* Keeps the first file per hash, removes later duplicates, and deletes matching Photo docs.
|
|
* Dry run by default; set APPLY=1 to actually delete.
|
|
*/
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const crypto = require('crypto');
|
|
const { pipeline } = require('stream/promises');
|
|
const mongoose = require('mongoose');
|
|
const Photo = require('../models/photo');
|
|
|
|
const MONGO_URI = process.env.MONGO_URI || 'mongodb://localhost:27017/photogallery';
|
|
const APPLY = process.env.APPLY === '1';
|
|
const UPLOAD_DIR = path.join(__dirname, '..', 'uploads');
|
|
|
|
async function hashFile(filePath) {
|
|
const hash = crypto.createHash('sha256');
|
|
const stream = fs.createReadStream(filePath);
|
|
await pipeline(stream, hash);
|
|
return hash.digest('hex');
|
|
}
|
|
|
|
function walkFiles(dir) {
|
|
const results = [];
|
|
for (const entry of fs.readdirSync(dir)) {
|
|
const full = path.join(dir, entry);
|
|
const stat = fs.statSync(full);
|
|
if (stat.isDirectory()) {
|
|
results.push(...walkFiles(full));
|
|
} else {
|
|
results.push(full);
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
async function main() {
|
|
const files = walkFiles(UPLOAD_DIR).filter(f => f.toLowerCase().endsWith('.webp'));
|
|
console.log(`Scanning ${files.length} webp files...`);
|
|
|
|
const hashMap = new Map();
|
|
const dupes = [];
|
|
|
|
for (const file of files) {
|
|
const h = await hashFile(file);
|
|
if (!hashMap.has(h)) {
|
|
hashMap.set(h, file);
|
|
} else {
|
|
dupes.push({ hash: h, keep: hashMap.get(h), remove: file });
|
|
}
|
|
}
|
|
|
|
if (!dupes.length) {
|
|
console.log('No duplicate content found.');
|
|
return;
|
|
}
|
|
|
|
console.log(`Found ${dupes.length} duplicate files (by content).`);
|
|
dupes.slice(0, 10).forEach(d =>
|
|
console.log(`Hash ${d.hash.slice(0, 12)} keep=${path.basename(d.keep)} remove=${path.basename(d.remove)}`)
|
|
);
|
|
|
|
if (!APPLY) {
|
|
console.log('Dry run. Set APPLY=1 to delete duplicates and matching Photo docs.');
|
|
return;
|
|
}
|
|
|
|
await mongoose.connect(MONGO_URI);
|
|
console.log(`Connected to Mongo: ${MONGO_URI}`);
|
|
|
|
let removedFiles = 0;
|
|
let removedDocs = 0;
|
|
for (const d of dupes) {
|
|
try {
|
|
fs.unlinkSync(d.remove);
|
|
removedFiles++;
|
|
} catch (err) {
|
|
console.error('Failed to delete file', d.remove, err.message);
|
|
}
|
|
const relPath = path.relative(path.join(__dirname, '..'), d.remove).replace(/\\/g, '/');
|
|
const filename = path.basename(d.remove);
|
|
const res = await Photo.deleteMany({
|
|
$or: [
|
|
{ path: relPath },
|
|
{ filename }
|
|
]
|
|
});
|
|
removedDocs += res.deletedCount || 0;
|
|
}
|
|
|
|
console.log(`Deleted ${removedFiles} duplicate files.`);
|
|
console.log(`Deleted ${removedDocs} Photo docs matching removed files.`);
|
|
await mongoose.disconnect();
|
|
console.log('Done.');
|
|
}
|
|
|
|
main().catch(err => {
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|