Skip to content

Commit

Permalink
- alert message if order of options is wrong
Browse files Browse the repository at this point in the history
- support for ignoring files by patterns
- fixing file statistic calculation
  • Loading branch information
thomas694 committed Oct 18, 2020
1 parent 50de637 commit 9287f7b
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 42 deletions.
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,21 @@ However, Matthias' current version 1.23 is not supporting my requirements. And i
I added the following features to finddupe:
- multiple reference directories that shall not be touched (v1.24)
- unicode support (v1.25)
- alert message if order of options is wrong (v1.26)
- support for ignoring files by patterns (v1.26)

It works for me, but some more testing is desirable.

I've udated the project to use Visual Studio 2019.

## Usage
```
finddupe v1.25 compiled Jun 4 2017
Usage: finddupe [options] [-ref] <filepat> [filepat]...
finddupe v1.26 compiled Oct 18 2020
an enhanced version by thomas694 (@GH), originally by Matthias Wandel
This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you
are welcome to redistribute it under certain conditions; view GNU GPLv3 for more.
Usage: finddupe [options] [-ign <substr> ...] [-ref <filepat> ...] <filepat>...
Options:
-bat <file.bat> Create batch file with commands to do the hard
linking. run batch file afterwards to do it
Expand All @@ -34,15 +40,16 @@ Options:
-v Verbose
-sigs Show signatures calculated based on first 32k for each file
-rdonly Apply to readonly files also (as opposed to skipping them)
-ref <filepat> Following file pattern are files that are for reference, NOT
to be eliminated, only used to check duplicates against
-z Do not skip zero length files (zero length files are ignored
by default)
-u Do not print a warning for files that cannot be read
-p Hide progress indicator (useful when redirecting to a file)
-j Follow NTFS junctions and reparse points (off by default)
-listlink hardlink list mode. Not valid with -del, -bat, -hardlink,
or -rdonly, options
-ign <substr> Ignore file pattern, eg. .bak or .tmp (repeatable)
-ref <filepat> Following file pattern are files that are for reference, NOT to
be eliminated, only used to check duplicates against (repeatable)
filepat Pattern for files. Examples:
c:\** Match everything on drive C
c:\**\*.jpg Match only .jpg files on drive C
Expand Down
131 changes: 99 additions & 32 deletions finddupe.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@
// Version 1.25
// Copyright (C) Jun 2017 thomas694
// added unicode support
// Version 1.26
// Copyright (C) Oct 2020 thomas694
// added support for ignore filename patterns
//
// This program is free software: you can redistribute it and/or modify
// finddupe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
Expand All @@ -29,7 +32,7 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//--------------------------------------------------------------------------

#define VERSION "1.25"
#define VERSION "1.26"

#define REF_CODE

Expand All @@ -43,6 +46,9 @@
#include <errno.h>
#include <ctype.h>

#include <shlwapi.h> /* StrStrI */
#pragma comment(lib, "shlwapi.lib") /* unresolved external symbol __imp__StrStrIW@8 */

#include <process.h>
#include <io.h>
#include <sys/utime.h>
Expand Down Expand Up @@ -92,6 +98,7 @@ struct {
int HardlinkGroups;
int CantReadFiles;
int ZeroLengthFiles;
int IgnoredFiles;
__int64 TotalBytes;
__int64 DuplicateBytes;
}DupeStats;
Expand All @@ -116,7 +123,11 @@ int ShowProgress = 1; // Show progressing file count...
int HideCantReadMessage= 0;// Hide the can't read file error
int SkipZeroLength = 1; // Ignore zero length files.
int ProgressIndicatorVisible = 0; // Weither a progress indicator needs to be overwritten.
int FollowReparse = 0; // Wether to follow reparse points (like unix softlinks for NTFS)
int FollowReparse = 0; // Whether to follow reparse points (like unix softlinks for NTFS)

TCHAR* * IgnorePatterns; // Patterns of filename to ignore (can be repeated, eg. .bak, .tmp)
int IgnorePatternsAlloc; // Number of allocated ignore patterns
int IgnorePatternsCount; // Number of specified ignore patterns

int MyGlob(const TCHAR * Pattern, int FollowReparse, void (*FileFuncParm)(const TCHAR * FileName));

Expand Down Expand Up @@ -187,7 +198,7 @@ static int EliminateDuplicate(FileData_t ThisFile, FileData_t DupeOf)
if (ThisFile.FileSize != DupeOf.FileSize) return 0;

Hardlinked = 0;
if (DupeOf.NumLinks && memcmp(&ThisFile.FileIndex, &DupeOf.FileIndex, 8) == 0){
if (DupeOf.NumLinks && memcmp(&ThisFile.FileIndex, &DupeOf.FileIndex, sizeof(DupeOf.FileIndex)) == 0){
Hardlinked = 1;
goto dont_read;
}
Expand Down Expand Up @@ -368,35 +379,52 @@ static int IsNonRefPath(TCHAR * filename)
}
#endif

static void StoreFileData(FileData_t ThisFile)
{
if (NumUnique >= NumAllocated) {
// Array is full. Make it bigger
NumAllocated = NumAllocated + NumAllocated / 2;
FileData = (FileData_t*)realloc(FileData, sizeof(FileData_t) * NumAllocated);
if (FileData == NULL) {
fprintf(stderr, "Malloc failure");
exit(EXIT_FAILURE);
}
}
FileData[NumUnique] = ThisFile;
NumUnique += 1;
}

//--------------------------------------------------------------------------
// Check for duplicates.
//--------------------------------------------------------------------------
static void CheckDuplicate(FileData_t ThisFile)
{
int Ptr;
int * Link;
// Find where in the trie structure it belongs.
// Find where in the tree structure it belongs.
Ptr = 0;

DupeStats.TotalFiles += 1;
DupeStats.TotalBytes += (__int64) ThisFile.FileSize;

if (NumUnique == 0) goto store_it;

for(;;){
int comp;
comp = memcmp(&ThisFile.Checksum, &FileData[Ptr].Checksum, sizeof(Checksum_t));
if (comp == 0){
if (comp == 0) {
// the same file
if (_tcscmp(ThisFile.FileName, FileData[Ptr].FileName) == 0) {
return;
}
// Check for true duplicate.
#ifdef REF_CODE
if (!ReferenceFiles && !HardlinkSearchMode && IsNonRefPath(ThisFile.FileName)){
if (!ReferenceFiles && !HardlinkSearchMode && IsNonRefPath(ThisFile.FileName)) {
#else
if (!ReferenceFiles && !HardlinkSearchMode){
if (!ReferenceFiles && !HardlinkSearchMode) {
#endif
int r = EliminateDuplicate(ThisFile, FileData[Ptr]);
if (r){
if (r) {
if (r == 2) FileData[Ptr].NumLinks += 1; // Update link count.
// Its a duplicate for elimination. Do not store info on it.
// Its a duplicate for elimination. Do not store info on it. New: store info for correct statistic calculation
StoreFileData(ThisFile);
return;
}
}
Expand All @@ -421,19 +449,12 @@ static void CheckDuplicate(FileData_t ThisFile)
}
}

DupeStats.TotalFiles += 1;
DupeStats.TotalBytes += (__int64) ThisFile.FileSize;

store_it:

if (NumUnique >= NumAllocated){
// Array is full. Make it bigger
NumAllocated = NumAllocated + NumAllocated/2;
FileData = (FileData_t*) realloc(FileData, sizeof(FileData_t) * NumAllocated);
if (FileData == NULL){
fprintf(stderr, "Malloc failure");
exit(EXIT_FAILURE);
}
}
FileData[NumUnique] = ThisFile;
NumUnique += 1;
StoreFileData(ThisFile);
}

//--------------------------------------------------------------------------
Expand Down Expand Up @@ -479,9 +500,14 @@ static void ProcessFile(const TCHAR * FileName)
Checksum_t CheckSum;
struct _stat FileStat;

for (int i = 0; i < NumUnique; i++)
{
if (_tcscmp(FileName, FileData[i].FileName) == 0)
return;
}

FileData_t ThisFile;
memset(&ThisFile, 0, sizeof(ThisFile));

{
static int LastPrint, Now;
Now = GetTickCount();
Expand Down Expand Up @@ -625,6 +651,18 @@ static void ProcessFile(const TCHAR * FileName)

ThisFile.FileName = _tcsdup(FileName); // allocate the string last, so
// we don't waste memory on errors.

// skip if filename contains a ignore pattern
for (int i = 0; i < IgnorePatternsCount; i++)
{
if (StrStrI(FileName, IgnorePatterns[i]))
{
DupeStats.IgnoredFiles++;
StoreFileData(ThisFile);
return;
}
}

CheckDuplicate(ThisFile);
}

Expand All @@ -634,10 +672,10 @@ static void ProcessFile(const TCHAR * FileName)
static void Usage (void)
{
_tprintf(TEXT("finddupe v%s compiled %s\n"), TEXT(VERSION), TEXT(__DATE__));
_tprintf(TEXT("an enhanced version by thomas694 (@GH)\n"));
_tprintf(TEXT("an enhanced version by thomas694 (@GH), originally by Matthias Wandel\n"));
_tprintf(TEXT("This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you\n"));
_tprintf(TEXT("are welcome to redistribute it under certain conditions; view GNU GPLv3 for more.\n"));
_tprintf(TEXT("Usage: finddupe [options] [-ref] <filepat> [filepat]...\n"));
_tprintf(TEXT("are welcome to redistribute it under certain conditions; view GNU GPLv3 for more.\n\n"));
_tprintf(TEXT("Usage: finddupe [options] [-ign <substr> ...] [-ref <filepat> ...] <filepat>...\n"));
_tprintf(TEXT("Options:\n")
TEXT(" -bat <file.bat> Create batch file with commands to do the hard\n")
TEXT(" linking. run batch file afterwards to do it\n")
Expand All @@ -647,15 +685,16 @@ static void Usage (void)
TEXT(" -v Verbose\n")
TEXT(" -sigs Show signatures calculated based on first 32k for each file\n")
TEXT(" -rdonly Apply to readonly files also (as opposed to skipping them)\n")
TEXT(" -ref <filepat> Following file pattern are files that are for reference, NOT\n")
TEXT(" to be eliminated, only used to check duplicates against\n")
TEXT(" -z Do not skip zero length files (zero length files are ignored\n")
TEXT(" by default)\n")
TEXT(" -u Do not print a warning for files that cannot be read\n")
TEXT(" -p Hide progress indicator (useful when redirecting to a file)\n")
TEXT(" -j Follow NTFS junctions and reparse points (off by default)\n")
TEXT(" -listlink hardlink list mode. Not valid with -del, -bat, -hardlink,\n")
TEXT(" or -rdonly, options\n")
TEXT(" -ign <substr> Ignore file pattern, eg. .bak or .tmp (repeatable)\n")
TEXT(" -ref <filepat> Following file pattern are files that are for reference, NOT to\n")
TEXT(" be eliminated, only used to check duplicates against (repeatable)\n")
TEXT(" filepat Pattern for files. Examples:\n")
TEXT(" c:\\** Match everything on drive C\n")
TEXT(" c:\\**\\*.jpg Match only .jpg files on drive C\n")
Expand All @@ -675,12 +714,24 @@ int _tmain (int argc, TCHAR **argv)
TCHAR * arg;
TCHAR DefaultDrive;
TCHAR DriveUsed = '\0';
int indexFirstRef = 0;

PrintDuplicates = 0;
PrintFileSigs = 0;
HardlinkSearchMode = 0;
Verbose = 0;

for (argn = 1; argn < argc; argn++) {
arg = argv[argn];
if (indexFirstRef == 0 && !_tcscmp(arg, TEXT("-ref"))) indexFirstRef = argn;
if (indexFirstRef > 0 && (!_tcscmp(arg, TEXT("-bat")) || !_tcscmp(arg, TEXT("-v")) || !_tcscmp(arg, TEXT("-sigs")) || !_tcscmp(arg, TEXT("-hardlink")) ||
!_tcscmp(arg, TEXT("-del")) || !_tcscmp(arg, TEXT("-rdonly")) || !_tcscmp(arg, TEXT("-listlink")) || !_tcscmp(arg, TEXT("-z")) ||
!_tcscmp(arg, TEXT("-u")) || !_tcscmp(arg, TEXT("-p")) || !_tcscmp(arg, TEXT("-j")) || !_tcscmp(arg, TEXT("-ign"))) && argn > indexFirstRef) {
fprintf(stderr, "Wrong order of options! Use -h for help\n");
exit(EXIT_FAILURE);
}
}

for (argn=1;argn<argc;argn++){
arg = argv[argn];
if (arg[0] != '-') break; // Filenames from here on.
Expand Down Expand Up @@ -716,6 +767,19 @@ int _tmain (int argc, TCHAR **argv)
ShowProgress = 0;
}else if (!_tcscmp(arg,TEXT("-j"))){
FollowReparse = 1;
}
else if (!_tcscmp(arg, TEXT("-ign"))) {
if (IgnorePatternsCount >= IgnorePatternsAlloc) {
// Array is full. Make it bigger
IgnorePatternsAlloc = IgnorePatternsAlloc + 4;
IgnorePatterns = realloc(IgnorePatterns, sizeof(TCHAR*) * IgnorePatternsAlloc);
if (IgnorePatterns == NULL) {
_ftprintf(stderr, TEXT("Malloc failure"));
exit(EXIT_FAILURE);
}
};
TCHAR* substr = _tcsdup(argv[++argn]);
IgnorePatterns[IgnorePatternsCount++] = substr;
}else{
_tprintf(TEXT("Argument '%s' not understood. Use -h for help.\n"), arg);
exit(-1);
Expand Down Expand Up @@ -837,13 +901,16 @@ int _tmain (int argc, TCHAR **argv)
ClearProgressInd();
_tprintf(TEXT("\n"));
_tprintf(TEXT("Files: %8u kBytes in %5d files\n"),
(unsigned)(DupeStats.TotalBytes/1000), DupeStats.TotalFiles);
(unsigned)(DupeStats.TotalBytes/1024), DupeStats.TotalFiles);
_tprintf(TEXT("Dupes: %8u kBytes in %5d files\n"),
(unsigned)(DupeStats.DuplicateBytes/1000), DupeStats.DuplicateFiles);
(unsigned)(DupeStats.DuplicateBytes/1024), DupeStats.DuplicateFiles);
}
if (DupeStats.ZeroLengthFiles){
_tprintf(TEXT(" %d files of zero length were skipped\n"), DupeStats.ZeroLengthFiles);
}
if (DupeStats.IgnoredFiles) {
_tprintf(TEXT(" %d files were ignored\n"), DupeStats.IgnoredFiles);
}
if (DupeStats.CantReadFiles){
_tprintf(TEXT(" %d files could not be opened\n"), DupeStats.CantReadFiles);
}
Expand Down
8 changes: 4 additions & 4 deletions myglob.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
//--------------------------------------------------------------------------------
// This file is part of finddupe.
//
// Module to do recursive directory file matching under windows.
//
// Tries to do pattern matching to produce similar results as Unix, but using
// the Windows _findfirst to do all the pattern matching.
//
// Also hadles recursive directories - "**" path component expands into
// Also handles recursive directories - "**" path component expands into
// any levels of subdirectores (ie c:\**\*.c matches ALL .c files on drive c:)
//
// Matthias Wandel Nov 5 2000 - March 2009
Expand All @@ -18,7 +16,9 @@
// Copyright (C) Jun 2017 thomas694
// added unicode support
//
// This program is free software: you can redistribute it and/or modify
// This file is part of finddupe.
//
// finddupe is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
Expand Down
4 changes: 2 additions & 2 deletions version.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#define STRINGIZE(s) STRINGIZE2(s)

#define VERSION_MAJOR 1
#define VERSION_MINOR 25
#define VERSION_MINOR 26
#define VERSION_REVISION 0
#define VERSION_BUILD 0

Expand All @@ -17,7 +17,7 @@
#define VER_PRODUCT_VERSION_STR VER_FILE_VERSION_STR
#define VER_ORIGINAL_FILENAME_STR VER_PRODUCTNAME_STR ".exe"
#define VER_INTERNAL_NAME_STR VER_ORIGINAL_FILENAME_STR
#define VER_COPYRIGHT_STR "(C) 2017 modifications by thomas694 (@GH)"
#define VER_COPYRIGHT_STR "(C) 2017, 2020 modifications by thomas694 (@GH)"

#ifdef _DEBUG
#define VER_VER_DEBUG VS_FF_DEBUG
Expand Down

0 comments on commit 9287f7b

Please sign in to comment.