eteppo

How To Organize Messy Personal Archives in Messy Terminal Bash

Published: 2023-08-03

This script is neither smart, fast, or safe so be sure to always have backup copies and understand every line before running. Mostly you might end up learning something about bash.

#!/bin/bash
set -e

move_all_to_root() {
    local directory_name
    local item_name
    local filename
    local filepath
    local things
    local current_directory
    current_directory=$1
    things=("$current_directory"/*)
    for thing in "${things[@]}"; do
        # if thing is a directory
        if [[ -d "$thing" ]]; then
            # use lower-case spaceless directory names
            directory_name=$( echo "${thing// /-}" | tr '[:upper:]' '[:lower:]' )
            if [[ "$thing" != "$directory_name" ]]; then
                mv -n -T "$thing" "$directory_name"
            fi
            # if directory is not empty, do recursion
            if [[ -n $(ls -A "$directory_name") ]]; then
                move_all_to_root "$directory_name"
            fi
        # if thing is not a directory
        elif [[ ! -d "$thing" ]]; then
            # note: counter variable is defined locally in the parent function
            counter=$(( counter + 1 ))
            item_name=$( basename "$thing" )
            # clean file name
            filename=$(basename "${thing// /-}" | tr '[:upper:]' '[:lower:]')
            # take global_root from the scope of the clean_directory function
            filepath="$global_root/$filename"
            # move file to target file path
            if [[ ! -f "$filepath" ]]; then
                mv -n -T "$thing" "$filepath"
            elif [[ -f "$filepath" ]]; then
                # if filepath exists already, add name-conflict tag
                mv -n -T "$thing" "${filepath}£"
            fi
        else
            echo "Impossible condition in move_all_to_root." >&2; exit 1
        fi
    done
}

remove_empty_directories() {
    local empty_count
	empty_count=$(find "$global_root" -empty -type d | wc -l)
	while [[ "$empty_count" != "0" ]]; do
		find "$global_root" -empty -type d -exec rm --dir '{}' +
		empty_count=$(find "$global_root" -empty -type d | wc -l)
	done
}

flatten_directory() {
    local global_root
    local counter
    global_root=$1
    counter=0
    move_all_to_root "$global_root"
    remove_empty_directories "$global_root"
}

remove_exact_duplicates() {
    local checksum
    local item_name
    local things
    local counter=0
    local rm_counter=0
    declare -A count_table
    # loop over files and remove if the same md5sum as before
    things=( "$global_root"/* )
    for thing in "${things[@]}"; do
        counter=$(( counter + 1 ))
        item_name=$( basename "$thing" )
        checksum=$( md5sum "$thing" )
        # for first pass count is 1 (false), for next passes 2+ (true)
        if (( count_table[$checksum[0]]++ )); then
            rm_counter=$(( rm_counter + 1 ))
            rm "$thing"
        fi
    done
    printf "A total of %s md5sum-duplicates were deleted." "$rm_counter"
}

organize_files() {
    local item_name
    local modification_time
    local date
    local year
    local filename
    local extension
    local filepath
    local files=( "$global_root"/* )
    local counter=0
    for file in "${files[@]}"; do
        if [[ ! -d "$file" ]]; then
            local counter=$(( counter + 1 ))
            item_name=$( basename "$file" )
            modification_time=( "$( stat --format=%y "$file" )" )
            date="${modification_time[0]//-/ }"
            # year is the first word in $date
            year=$( echo "$date" | head -n1 | cut -d " " -f1 )
            if [[ "$file" =~ "£" ]]
            then
                # remove name-conflict tags from the target file path
                filename=$(basename "${file//£}")
            else
                filename=$(basename "$file")
            fi                        
            extension="${filename##*.}"
            # use 'unknown' for missing extensions
            if [[ "$extension" == "$filename" ]]; then
                extension="unknown"
            fi
            filepath="$global_root/$extension/$year/$filename"
            mkdir --parents "$(dirname "$filepath")"
            mv -n -T "$file" "$filepath"
        fi
    done
}

clean_directory() {
    local input
    local n_files
    input=$1
    # use lower case paths without spaces
    global_root=$(echo "${input// /-}" | tr '[:upper:]' '[:lower:]')
    if [[ "$input" != "$global_root" ]]; then
    	mkdir --parents "$global_root"
        mv -n -T "$input" "$global_root"
    fi
    n_files=$(ls --recursive --classify "$global_root" | grep -c \\*)
    printf "Input directory has %s files.\n" "$n_files"
    printf "Flattening directory...\n"
    flatten_directory "$global_root"
    printf "\nDone.\n"
    printf "Removing exact duplicates...\n"
    remove_exact_duplicates "$global_root"
    printf "\nDone.\n"
    printf "Organizing files by file extension and the last modification year...\n"
    organize_files "$global_root"
    printf "\nDone.\n"
    n_files=$(ls --recursive --classify "$global_root" | grep -c "\\*")
    printf "Output directory has %s files.\n" "$n_files"
    printf "Output directory: %s\n" "$global_root"
}

input_path=$1
if [[ ! -d "$input_path" ]]; then
    # print to standard error
    echo "Input path is not a directory." >&2; exit 1
fi
printf "Input directory: %s\n" "$input_path"
while true; do
    read -r -p "Are you sure [y|n]? " yn
    case $yn in
        [Yy]* ) clean_directory "$input_path"; exit 0;;
        [Nn]* ) exit 0;;
        * ) echo "Please type 'yes' or 'no' and press enter.";;
    esac
done
CC BY-SA 4.0 Eero Teppo. Last modified: March 23, 2025. Website built with Franklin.jl and the Julia programming language.