#!/usr/bin/env node /** * Report duplicate photos by filename/path and oversized base-name clusters. * Run: node scripts/find_duplicates.js */ const mongoose = require('mongoose'); const Photo = require('../models/photo'); const MONGO_URI = process.env.MONGO_URI || 'mongodb://localhost:27017/photogallery'; async function main() { await mongoose.connect(MONGO_URI); const byFilename = await Photo.aggregate([ { $group: { _id: '$filename', ids: { $addToSet: '$_id' }, count: { $sum: 1 } } }, { $match: { count: { $gt: 1 } } }, { $project: { _id: 0, filename: '$_id', count: 1, ids: 1 } } ]); const byPath = await Photo.aggregate([ { $group: { _id: '$path', ids: { $addToSet: '$_id' }, count: { $sum: 1 } } }, { $match: { count: { $gt: 1 } } }, { $project: { _id: 0, path: '$_id', count: 1, ids: 1 } } ]); const cluster = await Photo.aggregate([ { $project: { base: { $regexFind: { input: '$filename', regex: /^(.*?)(-md|-sm)?\.webp$/ } }, filename: 1 } }, { $group: { _id: '$base.captures.0', files: { $addToSet: '$filename' }, ids: { $addToSet: '$_id' }, count: { $sum: 1 } } }, { $match: { count: { $gt: 3 } } }, { $project: { _id: 0, base: '$_id', count: 1, files: 1, ids: 1 } } ]); console.log('Duplicates by filename:', JSON.stringify(byFilename, null, 2)); console.log('Duplicates by path:', JSON.stringify(byPath, null, 2)); console.log('Clusters (>3 variants):', JSON.stringify(cluster, null, 2)); await mongoose.disconnect(); } main().catch(err => { console.error(err); process.exit(1); });