How To Organize Messy Personal Archives in Messy Terminal Bash
Shell scripting is an absolute mess. Don’t do it.
#!/bin/bash
set -e
move_all_to_root() {
local directory_name
local item_name
local filename
local filepath
local things
local current_directory
current_directory=$1
things=("$current_directory"/*)
for thing in "${things[@]}"; do
# if thing is a directory
if [[ -d "$thing" ]]; then
# use lower-case spaceless directory names
directory_name=$( echo "${thing// /-}" | tr '[:upper:]' '[:lower:]' )
if [[ "$thing" != "$directory_name" ]]; then
mv -n -T "$thing" "$directory_name"
fi
# if directory is not empty, do recursion
if [[ -n $(ls -A "$directory_name") ]]; then
move_all_to_root "$directory_name"
fi
# if thing is not a directory
elif [[ ! -d "$thing" ]]; then
# note: counter variable is defined locally in the parent function
counter=$(( counter + 1 ))
item_name=$( basename "$thing" )
# clean file name
filename=$(basename "${thing// /-}" | tr '[:upper:]' '[:lower:]')
# take global_root from the scope of the clean_directory function
filepath="$global_root/$filename"
# move file to target file path
if [[ ! -f "$filepath" ]]; then
mv -n -T "$thing" "$filepath"
elif [[ -f "$filepath" ]]; then
# if filepath exists already, add name-conflict tag
mv -n -T "$thing" "${filepath}£"
fi
else
echo "Impossible condition in move_all_to_root." >&2; exit 1
fi
done
}
remove_empty_directories() {
local empty_count
empty_count=$(find "$global_root" -empty -type d | wc -l)
while [[ "$empty_count" != "0" ]]; do
find "$global_root" -empty -type d -exec rm --dir '{}' +
empty_count=$(find "$global_root" -empty -type d | wc -l)
done
}
flatten_directory() {
local global_root
local counter
global_root=$1
counter=0
move_all_to_root "$global_root"
remove_empty_directories "$global_root"
}
remove_exact_duplicates() {
local checksum
local item_name
local things
local counter=0
local rm_counter=0
declare -A count_table
# loop over files and remove if the same md5sum as before
things=( "$global_root"/* )
for thing in "${things[@]}"; do
counter=$(( counter + 1 ))
item_name=$( basename "$thing" )
checksum=$( md5sum "$thing" )
# for first pass count is 1 (false), for next passes 2+ (true)
if (( count_table[$checksum[0]]++ )); then
rm_counter=$(( rm_counter + 1 ))
rm "$thing"
fi
done
printf "A total of %s md5sum-duplicates were deleted." "$rm_counter"
}
organize_files() {
local item_name
local modification_time
local date
local year
local filename
local extension
local filepath
local files=( "$global_root"/* )
local counter=0
for file in "${files[@]}"; do
if [[ ! -d "$file" ]]; then
local counter=$(( counter + 1 ))
item_name=$( basename "$file" )
modification_time=( "$( stat --format=%y "$file" )" )
date="${modification_time[0]//-/ }"
# year is the first word in $date
year=$( echo "$date" | head -n1 | cut -d " " -f1 )
if [[ "$file" =~ "£" ]]
then
# remove name-conflict tags from the target file path
filename=$(basename "${file//£}")
else
filename=$(basename "$file")
fi
extension="${filename##*.}"
# use 'unknown' for missing extensions
if [[ "$extension" == "$filename" ]]; then
extension="unknown"
fi
filepath="$global_root/$extension/$year/$filename"
mkdir --parents "$(dirname "$filepath")"
mv -n -T "$file" "$filepath"
fi
done
}
clean_directory() {
local input
local n_files
input=$1
# use lower case paths without spaces
global_root=$(echo "${input// /-}" | tr '[:upper:]' '[:lower:]')
if [[ "$input" != "$global_root" ]]; then
mkdir --parents "$global_root"
mv -n -T "$input" "$global_root"
fi
n_files=$(ls --recursive --classify "$global_root" | grep -c *)
printf "Input directory has %s files.\n" "$n_files"
printf "Flattening directory...\n"
flatten_directory "$global_root"
printf "\nDone.\n"
printf "Removing exact duplicates...\n"
remove_exact_duplicates "$global_root"
printf "\nDone.\n"
printf "Organizing files by file extension and the last modification year...\n"
organize_files "$global_root"
printf "\nDone.\n"
n_files=$(ls --recursive --classify "$global_root" | grep -c "\*")
printf "Output directory has %s files.\n" "$n_files"
printf "Output directory: %s\n" "$global_root"
}
input_path=$1
if [[ ! -d "$input_path" ]]; then
# print to standard error
echo "Input path is not a directory." >&2; exit 1
fi
printf "Input directory: %s\n" "$input_path"
while true; do
read -r -p "Are you sure [y|n]? " yn
case $yn in
[Yy]* ) clean_directory "$input_path"; exit 0;;
[Nn]* ) exit 0;;
* ) echo "Please type 'yes' or 'no' and press enter.";;
esac
done