I stole this from the internet - this problem screams for a shell script - not an interface...
find . -type f -exec stat --printf='%s/%n\0' {} + |
awk '
BEGIN{
FS = "/"
RS = ORS = "\0"
q = "\047"
md5_cmd = "md5sum"
}
{
#get the files path from the two columns data delimited by slash char reported
#by the stat command.
filePath = substr($0, index($0, "/") +1)
#record and group filePath having the same fileSize with NULL delimited
sizes[$1] = ($1 in sizes? sizes[$1] : "") filePath ORS
}
END {
for (size in sizes) {
#split the filesPath for each group of files to
#calculate the check-sum for last confirmation to see if there are
#any duplicate files among the same sized files
filesNr = split(sizes[size], filesName, ORS)
#call md5sum only if there are more than two files with the same size in that group.
if (filesNr > 2) {
for (i = 1; i < filesNr; i++) {
if ((md5_cmd " " q filesName[i] q) | getline md5 >0) {
#split to extract the hash of a file
split(md5, hash, " ")
#remove leading back-slash from the hash if a fileName contain
#back-slash char in its name. see https://unix.stackexchange.com/q/424628/72456
sub(/\\/, "", hash[1])
#records all the same sized filesPath along with their hash, again NULL delimited
hashes[hash[1]] = (hash[1] in hashes? hashes[hash[1]] : "") filesName[i] ORS
#record also the size of files with hash used as key mapping
fileSize[hash[1]] = size
}
}
}
}
for (fileName in hashes) {
#process the hash of the same sized filesPath to verify if there is a hash
#which occupied for more than one file.
#here hash is the key and filesName are values of the hashes[] array.
filesNr = split(hashes[fileName], filesName, ORS)
#OK, if there is a hash with +2 files, then we found duplicates, print the size, hash and the path.
if ( filesNr> 2) {
print fileSize[fileName] " bytes, MD5: " fileName
for(i=1; i < filesNr; i++)
print filesName[i]
}
}
}'