#!/usr/bin/perl # Filename: find_dup # Author: David Ljung Madison # See License: http://MarginalHacks.com/License # Description: Find duplicates amongst a list of files # Version: 1.0 use strict; ################################################## # Setup the variables ################################################## my $PROGNAME = $0; $PROGNAME =~ s|.*/||; my $SUM = "sum"; # Checksum program ################################################## # Usage ################################################## sub usage { my $msg; foreach $msg (@_) { print "ERROR: $msg\n"; } print "\n"; print "Usage:\t$PROGNAME [-d] \n"; print "\tList all duplicates in a set of files\n"; print "\n"; print "\t-f\tDon't list the first duplicate on each line\n"; print "\t-rm\tRemove all duplicate files\n"; print "\t-d\tSet debug mode\n"; print "\n"; exit -1; } sub parse_args { my @files; my %opt; $opt{first} = 1; while ($#ARGV>=0) { my $arg=shift(@ARGV); if ($arg =~ /^-h$/) { usage(); } if ($arg =~ /^-d$/) { $opt{debug}=1; next; } if ($arg =~ /^-f$/) { $opt{first}=0; next; } if ($arg =~ /^-rm$/) { $opt{rm}=1; next; } if ($arg =~ /^-/) { usage("Unknown option: $arg"); } push(@files,$arg); } usage("Need at least two files") unless ($#files>0); (\%opt,@files); } ################################################## # Main code ################################################## sub main { my ($opt,@files) = parse_args(); # Get sums my %sums; foreach my $f ( @files ) { my $sum = `$SUM \Q$f\E`; chomp($sum); print "$f: $sum\n" if ($opt->{debug}); push(@{$sums{$sum}}, $f); } # Find matching sums my @match; foreach my $s ( keys %sums ) { next unless ($#{$sums{$s}} > 0); push(@match, $sums{$s}); } # Double-check matches with diff # It is unlikely, though possible, that they have the same sum but are different foreach my $m ( @match ) { my @check = sort @$m; while (@check) { my $first = shift(@check); my (@same,@diff); foreach (@check) { system("diff \Q$first\E \Q$_\E"); $? ? push(@diff,$_) : push(@same,$_); } print "Same sum but different! [$first @same] vs [@diff]\n" if ($opt->{debug} && @diff); @check = @diff; # Go through any remainders that didn't match next unless @same; print $first," " if $opt->{first}; print join(" ",@same), "\n"; if ($opt->{rm}) { print STDERR "[$PROGNAME] ERROR: Couldn't remove all files [@same]\n" unless (unlink(@same) == $#same+1); } } } } main();