Published: 2023-08-03
This script is neither smart, fast, or safe so be sure to always have backup copies and understand every line before running. Mostly you might end up learning something about bash.
#!/bin/bash
set -e
move_all_to_root() {
local directory_name
local item_name
local filename
local filepath
local things
local current_directory
current_directory=$1
things=("$current_directory"/*)
for thing in "${things[@]}"; do
# if thing is a directory
if [[ -d "$thing" ]]; then
# use lower-case spaceless directory names
directory_name=$( echo "${thing// /-}" | tr '[:upper:]' '[:lower:]' )
if [[ "$thing" != "$directory_name" ]]; then
mv -n -T "$thing" "$directory_name"
fi
# if directory is not empty, do recursion
if [[ -n $(ls -A "$directory_name") ]]; then
move_all_to_root "$directory_name"
fi
# if thing is not a directory
elif [[ ! -d "$thing" ]]; then
# note: counter variable is defined locally in the parent function
counter=$(( counter + 1 ))
item_name=$( basename "$thing" )
# clean file name
filename=$(basename "${thing// /-}" | tr '[:upper:]' '[:lower:]')
# take global_root from the scope of the clean_directory function
filepath="$global_root/$filename"
# move file to target file path
if [[ ! -f "$filepath" ]]; then
mv -n -T "$thing" "$filepath"
elif [[ -f "$filepath" ]]; then
# if filepath exists already, add name-conflict tag
mv -n -T "$thing" "${filepath}£"
fi
else
echo "Impossible condition in move_all_to_root." >&2; exit 1
fi
done
}
remove_empty_directories() {
local empty_count
empty_count=$(find "$global_root" -empty -type d | wc -l)
while [[ "$empty_count" != "0" ]]; do
find "$global_root" -empty -type d -exec rm --dir '{}' +
empty_count=$(find "$global_root" -empty -type d | wc -l)
done
}
flatten_directory() {
local global_root
local counter
global_root=$1
counter=0
move_all_to_root "$global_root"
remove_empty_directories "$global_root"
}
remove_exact_duplicates() {
local checksum
local item_name
local things
local counter=0
local rm_counter=0
declare -A count_table
# loop over files and remove if the same md5sum as before
things=( "$global_root"/* )
for thing in "${things[@]}"; do
counter=$(( counter + 1 ))
item_name=$( basename "$thing" )
checksum=$( md5sum "$thing" )
# for first pass count is 1 (false), for next passes 2+ (true)
if (( count_table[$checksum[0]]++ )); then
rm_counter=$(( rm_counter + 1 ))
rm "$thing"
fi
done
printf "A total of %s md5sum-duplicates were deleted." "$rm_counter"
}
organize_files() {
local item_name
local modification_time
local date
local year
local filename
local extension
local filepath
local files=( "$global_root"/* )
local counter=0
for file in "${files[@]}"; do
if [[ ! -d "$file" ]]; then
local counter=$(( counter + 1 ))
item_name=$( basename "$file" )
modification_time=( "$( stat --format=%y "$file" )" )
date="${modification_time[0]//-/ }"
# year is the first word in $date
year=$( echo "$date" | head -n1 | cut -d " " -f1 )
if [[ "$file" =~ "£" ]]
then
# remove name-conflict tags from the target file path
filename=$(basename "${file//£}")
else
filename=$(basename "$file")
fi
extension="${filename##*.}"
# use 'unknown' for missing extensions
if [[ "$extension" == "$filename" ]]; then
extension="unknown"
fi
filepath="$global_root/$extension/$year/$filename"
mkdir --parents "$(dirname "$filepath")"
mv -n -T "$file" "$filepath"
fi
done
}
clean_directory() {
local input
local n_files
input=$1
# use lower case paths without spaces
global_root=$(echo "${input// /-}" | tr '[:upper:]' '[:lower:]')
if [[ "$input" != "$global_root" ]]; then
mkdir --parents "$global_root"
mv -n -T "$input" "$global_root"
fi
n_files=$(ls --recursive --classify "$global_root" | grep -c \\*)
printf "Input directory has %s files.\n" "$n_files"
printf "Flattening directory...\n"
flatten_directory "$global_root"
printf "\nDone.\n"
printf "Removing exact duplicates...\n"
remove_exact_duplicates "$global_root"
printf "\nDone.\n"
printf "Organizing files by file extension and the last modification year...\n"
organize_files "$global_root"
printf "\nDone.\n"
n_files=$(ls --recursive --classify "$global_root" | grep -c "\\*")
printf "Output directory has %s files.\n" "$n_files"
printf "Output directory: %s\n" "$global_root"
}
input_path=$1
if [[ ! -d "$input_path" ]]; then
# print to standard error
echo "Input path is not a directory." >&2; exit 1
fi
printf "Input directory: %s\n" "$input_path"
while true; do
read -r -p "Are you sure [y|n]? " yn
case $yn in
[Yy]* ) clean_directory "$input_path"; exit 0;;
[Nn]* ) exit 0;;
* ) echo "Please type 'yes' or 'no' and press enter.";;
esac
done