55 lines
1.8 KiB
JavaScript

#!/usr/bin/env node
/**
* Report duplicate photos by filename/path and oversized base-name clusters.
* Run: node scripts/find_duplicates.js
*/
const mongoose = require('mongoose');
const Photo = require('../models/photo');
const MONGO_URI = process.env.MONGO_URI || 'mongodb://localhost:27017/photogallery';
async function main() {
await mongoose.connect(MONGO_URI);
const byFilename = await Photo.aggregate([
{ $group: { _id: '$filename', ids: { $addToSet: '$_id' }, count: { $sum: 1 } } },
{ $match: { count: { $gt: 1 } } },
{ $project: { _id: 0, filename: '$_id', count: 1, ids: 1 } }
]);
const byPath = await Photo.aggregate([
{ $group: { _id: '$path', ids: { $addToSet: '$_id' }, count: { $sum: 1 } } },
{ $match: { count: { $gt: 1 } } },
{ $project: { _id: 0, path: '$_id', count: 1, ids: 1 } }
]);
const cluster = await Photo.aggregate([
{
$project: {
base: { $regexFind: { input: '$filename', regex: /^(.*?)(-md|-sm)?\.webp$/ } },
filename: 1
}
},
{
$group: {
_id: '$base.captures.0',
files: { $addToSet: '$filename' },
ids: { $addToSet: '$_id' },
count: { $sum: 1 }
}
},
{ $match: { count: { $gt: 3 } } },
{ $project: { _id: 0, base: '$_id', count: 1, files: 1, ids: 1 } }
]);
console.log('Duplicates by filename:', JSON.stringify(byFilename, null, 2));
console.log('Duplicates by path:', JSON.stringify(byPath, null, 2));
console.log('Clusters (>3 variants):', JSON.stringify(cluster, null, 2));
await mongoose.disconnect();
}
main().catch(err => {
console.error(err);
process.exit(1);
});