#!/usr/bin/env node /** * Deduplicate files in uploads/ by content hash. * Keeps the first file per hash, removes later duplicates, and deletes matching Photo docs. * Dry run by default; set APPLY=1 to actually delete. */ const fs = require('fs'); const path = require('path'); const crypto = require('crypto'); const { pipeline } = require('stream/promises'); const mongoose = require('mongoose'); const Photo = require('../models/photo'); const MONGO_URI = process.env.MONGO_URI || 'mongodb://localhost:27017/photogallery'; const APPLY = process.env.APPLY === '1'; const UPLOAD_DIR = path.join(__dirname, '..', 'uploads'); async function hashFile(filePath) { const hash = crypto.createHash('sha256'); const stream = fs.createReadStream(filePath); await pipeline(stream, hash); return hash.digest('hex'); } function walkFiles(dir) { const results = []; for (const entry of fs.readdirSync(dir)) { const full = path.join(dir, entry); const stat = fs.statSync(full); if (stat.isDirectory()) { results.push(...walkFiles(full)); } else { results.push(full); } } return results; } async function main() { const files = walkFiles(UPLOAD_DIR).filter(f => f.toLowerCase().endsWith('.webp')); console.log(`Scanning ${files.length} webp files...`); const hashMap = new Map(); const dupes = []; for (const file of files) { const h = await hashFile(file); if (!hashMap.has(h)) { hashMap.set(h, file); } else { dupes.push({ hash: h, keep: hashMap.get(h), remove: file }); } } if (!dupes.length) { console.log('No duplicate content found.'); return; } console.log(`Found ${dupes.length} duplicate files (by content).`); dupes.slice(0, 10).forEach(d => console.log(`Hash ${d.hash.slice(0, 12)} keep=${path.basename(d.keep)} remove=${path.basename(d.remove)}`) ); if (!APPLY) { console.log('Dry run. Set APPLY=1 to delete duplicates and matching Photo docs.'); return; } await mongoose.connect(MONGO_URI); console.log(`Connected to Mongo: ${MONGO_URI}`); let removedFiles = 0; let removedDocs = 0; for (const d of dupes) { try { fs.unlinkSync(d.remove); removedFiles++; } catch (err) { console.error('Failed to delete file', d.remove, err.message); } const relPath = path.relative(path.join(__dirname, '..'), d.remove).replace(/\\/g, '/'); const filename = path.basename(d.remove); const res = await Photo.deleteMany({ $or: [ { path: relPath }, { filename } ] }); removedDocs += res.deletedCount || 0; } console.log(`Deleted ${removedFiles} duplicate files.`); console.log(`Deleted ${removedDocs} Photo docs matching removed files.`); await mongoose.disconnect(); console.log('Done.'); } main().catch(err => { console.error(err); process.exit(1); });