chris 746868d720 Add 'main-site/' from commit '5cefb4d1618bc54ae0e86830421a8c911900302c'
git-subtree-dir: main-site
git-subtree-mainline: 4d1daa39101c0a85ca6d916f1c31139faf39632a
git-subtree-split: 5cefb4d1618bc54ae0e86830421a8c911900302c
2026-04-13 19:22:17 -04:00

55 lines
1.8 KiB
JavaScript

#!/usr/bin/env node
/**
* Report duplicate photos by filename/path and oversized base-name clusters.
* Run: node scripts/find_duplicates.js
*/
const mongoose = require('mongoose');
const Photo = require('../models/photo');
const MONGO_URI = process.env.MONGO_URI || 'mongodb://localhost:27017/photogallery';
async function main() {
await mongoose.connect(MONGO_URI);
const byFilename = await Photo.aggregate([
{ $group: { _id: '$filename', ids: { $addToSet: '$_id' }, count: { $sum: 1 } } },
{ $match: { count: { $gt: 1 } } },
{ $project: { _id: 0, filename: '$_id', count: 1, ids: 1 } }
]);
const byPath = await Photo.aggregate([
{ $group: { _id: '$path', ids: { $addToSet: '$_id' }, count: { $sum: 1 } } },
{ $match: { count: { $gt: 1 } } },
{ $project: { _id: 0, path: '$_id', count: 1, ids: 1 } }
]);
const cluster = await Photo.aggregate([
{
$project: {
base: { $regexFind: { input: '$filename', regex: /^(.*?)(-md|-sm)?\.webp$/ } },
filename: 1
}
},
{
$group: {
_id: '$base.captures.0',
files: { $addToSet: '$filename' },
ids: { $addToSet: '$_id' },
count: { $sum: 1 }
}
},
{ $match: { count: { $gt: 3 } } },
{ $project: { _id: 0, base: '$_id', count: 1, files: 1, ids: 1 } }
]);
console.log('Duplicates by filename:', JSON.stringify(byFilename, null, 2));
console.log('Duplicates by path:', JSON.stringify(byPath, null, 2));
console.log('Clusters (>3 variants):', JSON.stringify(cluster, null, 2));
await mongoose.disconnect();
}
main().catch(err => {
console.error(err);
process.exit(1);
});