55 lines
1.8 KiB
JavaScript
55 lines
1.8 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Report duplicate photos by filename/path and oversized base-name clusters.
|
|
* Run: node scripts/find_duplicates.js
|
|
*/
|
|
const mongoose = require('mongoose');
|
|
const Photo = require('../models/photo');
|
|
|
|
const MONGO_URI = process.env.MONGO_URI || 'mongodb://localhost:27017/photogallery';
|
|
|
|
async function main() {
|
|
await mongoose.connect(MONGO_URI);
|
|
|
|
const byFilename = await Photo.aggregate([
|
|
{ $group: { _id: '$filename', ids: { $addToSet: '$_id' }, count: { $sum: 1 } } },
|
|
{ $match: { count: { $gt: 1 } } },
|
|
{ $project: { _id: 0, filename: '$_id', count: 1, ids: 1 } }
|
|
]);
|
|
|
|
const byPath = await Photo.aggregate([
|
|
{ $group: { _id: '$path', ids: { $addToSet: '$_id' }, count: { $sum: 1 } } },
|
|
{ $match: { count: { $gt: 1 } } },
|
|
{ $project: { _id: 0, path: '$_id', count: 1, ids: 1 } }
|
|
]);
|
|
|
|
const cluster = await Photo.aggregate([
|
|
{
|
|
$project: {
|
|
base: { $regexFind: { input: '$filename', regex: /^(.*?)(-md|-sm)?\.webp$/ } },
|
|
filename: 1
|
|
}
|
|
},
|
|
{
|
|
$group: {
|
|
_id: '$base.captures.0',
|
|
files: { $addToSet: '$filename' },
|
|
ids: { $addToSet: '$_id' },
|
|
count: { $sum: 1 }
|
|
}
|
|
},
|
|
{ $match: { count: { $gt: 3 } } },
|
|
{ $project: { _id: 0, base: '$_id', count: 1, files: 1, ids: 1 } }
|
|
]);
|
|
|
|
console.log('Duplicates by filename:', JSON.stringify(byFilename, null, 2));
|
|
console.log('Duplicates by path:', JSON.stringify(byPath, null, 2));
|
|
console.log('Clusters (>3 variants):', JSON.stringify(cluster, null, 2));
|
|
await mongoose.disconnect();
|
|
}
|
|
|
|
main().catch(err => {
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|