Skip to main content eteppo

How To Organize Messy Personal Archives in Messy Terminal Bash

Published: 2023-08-03
Updated: 2023-08-03

Shell scripting is an absolute mess. Don’t do it.

#!/bin/bash
set -e

move_all_to_root() {
    local directory_name
    local item_name
    local filename
    local filepath
    local things
    local current_directory
    current_directory=$1
    things=("$current_directory"/*)
    for thing in "${things[@]}"; do
        # if thing is a directory
        if [[ -d "$thing" ]]; then
            # use lower-case spaceless directory names
            directory_name=$( echo "${thing// /-}" | tr '[:upper:]' '[:lower:]' )
            if [[ "$thing" != "$directory_name" ]]; then
                mv -n -T "$thing" "$directory_name"
            fi
            # if directory is not empty, do recursion
            if [[ -n $(ls -A "$directory_name") ]]; then
                move_all_to_root "$directory_name"
            fi
        # if thing is not a directory
        elif [[ ! -d "$thing" ]]; then
            # note: counter variable is defined locally in the parent function
            counter=$(( counter + 1 ))
            item_name=$( basename "$thing" )
            # clean file name
            filename=$(basename "${thing// /-}" | tr '[:upper:]' '[:lower:]')
            # take global_root from the scope of the clean_directory function
            filepath="$global_root/$filename"
            # move file to target file path
            if [[ ! -f "$filepath" ]]; then
                mv -n -T "$thing" "$filepath"
            elif [[ -f "$filepath" ]]; then
                # if filepath exists already, add name-conflict tag
                mv -n -T "$thing" "${filepath}£"
            fi
        else
            echo "Impossible condition in move_all_to_root." >&2; exit 1
        fi
    done
}

remove_empty_directories() {
    local empty_count
	empty_count=$(find "$global_root" -empty -type d | wc -l)
	while [[ "$empty_count" != "0" ]]; do
		find "$global_root" -empty -type d -exec rm --dir '{}' +
		empty_count=$(find "$global_root" -empty -type d | wc -l)
	done
}

flatten_directory() {
    local global_root
    local counter
    global_root=$1
    counter=0
    move_all_to_root "$global_root"
    remove_empty_directories "$global_root"
}

remove_exact_duplicates() {
    local checksum
    local item_name
    local things
    local counter=0
    local rm_counter=0
    declare -A count_table
    # loop over files and remove if the same md5sum as before
    things=( "$global_root"/* )
    for thing in "${things[@]}"; do
        counter=$(( counter + 1 ))
        item_name=$( basename "$thing" )
        checksum=$( md5sum "$thing" )
        # for first pass count is 1 (false), for next passes 2+ (true)
        if (( count_table[$checksum[0]]++ )); then
            rm_counter=$(( rm_counter + 1 ))
            rm "$thing"
        fi
    done
    printf "A total of %s md5sum-duplicates were deleted." "$rm_counter"
}

organize_files() {
    local item_name
    local modification_time
    local date
    local year
    local filename
    local extension
    local filepath
    local files=( "$global_root"/* )
    local counter=0
    for file in "${files[@]}"; do
        if [[ ! -d "$file" ]]; then
            local counter=$(( counter + 1 ))
            item_name=$( basename "$file" )
            modification_time=( "$( stat --format=%y "$file" )" )
            date="${modification_time[0]//-/ }"
            # year is the first word in $date
            year=$( echo "$date" | head -n1 | cut -d " " -f1 )
            if [[ "$file" =~ "£" ]]
            then
                # remove name-conflict tags from the target file path
                filename=$(basename "${file//£}")
            else
                filename=$(basename "$file")
            fi                        
            extension="${filename##*.}"
            # use 'unknown' for missing extensions
            if [[ "$extension" == "$filename" ]]; then
                extension="unknown"
            fi
            filepath="$global_root/$extension/$year/$filename"
            mkdir --parents "$(dirname "$filepath")"
            mv -n -T "$file" "$filepath"
        fi
    done
}

clean_directory() {
    local input
    local n_files
    input=$1
    # use lower case paths without spaces
    global_root=$(echo "${input// /-}" | tr '[:upper:]' '[:lower:]')
    if [[ "$input" != "$global_root" ]]; then
    	mkdir --parents "$global_root"
        mv -n -T "$input" "$global_root"
    fi
    n_files=$(ls --recursive --classify "$global_root" | grep -c *)
    printf "Input directory has %s files.\n" "$n_files"
    printf "Flattening directory...\n"
    flatten_directory "$global_root"
    printf "\nDone.\n"
    printf "Removing exact duplicates...\n"
    remove_exact_duplicates "$global_root"
    printf "\nDone.\n"
    printf "Organizing files by file extension and the last modification year...\n"
    organize_files "$global_root"
    printf "\nDone.\n"
    n_files=$(ls --recursive --classify "$global_root" | grep -c "\*")
    printf "Output directory has %s files.\n" "$n_files"
    printf "Output directory: %s\n" "$global_root"
}

input_path=$1
if [[ ! -d "$input_path" ]]; then
    # print to standard error
    echo "Input path is not a directory." >&2; exit 1
fi
printf "Input directory: %s\n" "$input_path"
while true; do
    read -r -p "Are you sure [y|n]? " yn
    case $yn in
        [Yy]* ) clean_directory "$input_path"; exit 0;;
        [Nn]* ) exit 0;;
        * ) echo "Please type 'yes' or 'no' and press enter.";;
    esac
done