dups (1987B)
1 #!/usr/bin/env sh 2 3 # Description: List non-empty duplicates in the current dir (based on size followed by MD5) 4 # 5 # Source: https://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash 6 # 7 # Dependencies: find md5sum sort uniq xargs gsed 8 # 9 # Notes: 10 # 1. If the file size exceeds $size_digits digits the file will be misplaced 11 # 12 digits fit files up to 931GiB 12 # 2. Bash compatible required for mktemp 13 # 14 # Shell: Bash 15 # Authors: syssyphus, KlzXS 16 17 EDITOR="${EDITOR:-vi}" 18 TMPDIR="${TMPDIR:-/tmp}" 19 20 size_digits=12 21 tmpfile=$(mktemp "$TMPDIR/.nnnXXXXXX") 22 23 printf "\ 24 ## This is an overview of all duplicate files found. 25 ## Comment out the files you wish to remove. You will be given an option to cancel. 26 ## Lines with double comments (##) are ignored. 27 ## You will have the option to remove the files with force or interactively.\n 28 " > "$tmpfile" 29 30 # shellcheck disable=SC2016 31 find . -size +0 -type f -printf "%${size_digits}s %p\n" | sort -rn | uniq -w"${size_digits}" -D | sed -E ' 32 s/^ {,12}([0-9]{,12}) (.*)$/printf "%s %s\\n" "$(md5sum "\2")" "d\1"/ 33 ' | tr '\n' '\0' | xargs -0 -n1 sh -c | sort | { uniq -w32 --all-repeated=separate; echo; } | sed -nE ' 34 h 35 s/^(.{32}).* d([0-9]*)$/## md5sum: \1 size: \2 bytes/p 36 g 37 38 :loop 39 N 40 /.*\n$/!b loop 41 p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' >> "$tmpfile" 42 43 "$EDITOR" "$tmpfile" 44 45 printf "Remove commented files? (yes/no) [default=n]: " 46 read -r commented 47 48 if [ "$commented" = "y" ]; then 49 sedcmd="/^(##|[^#]).*/d; /^$/d; s/^# *(.*)$/\1/" 50 else 51 printf "Press any key to exit" 52 read -r _ 53 exit 54 fi 55 56 printf "Remove with force or interactive? (f/i) [default=i]: " 57 read -r force 58 59 if [ "$force" = "f" ]; then 60 #shellcheck disable=SC2016 61 sed -E "$sedcmd" "$tmpfile" | tr '\n' '\0' | xargs -0 -r sh -c 'rm -f "$0" "$@" </dev/tty' 62 else 63 #shellcheck disable=SC2016 64 sed -E "$sedcmd" "$tmpfile" | tr '\n' '\0' | xargs -0 -r sh -c 'rm -i "$0" "$@" </dev/tty' 65 fi 66 67 rm "$tmpfile" 68 69 printf "Press any key to exit" 70 read -r _