Small utilities

This is a collection of small command-line utilities I have written over the years to make my daily Unix computing better.

Converts media files to 16 kHz WAV for whisper.cpp speech recognition using FFmpeg.

Download.

#! /bin/sh
                        
                        rate=16000
                        
                        usage() {
                            printf 'usage: %s [-r <rate>] src [dest]\n\nDefault rate: %d.\n' \
                                "$(basename "$0")" \
                                "$rate" \
                                >&2 \
                                ;
                        }
                        
                        if [ "$1" = "-r" ]; then
                            if [ "$#" -lt 2 ]; then
                                usage
                                exit 2
                            fi
                        
                            rate=$2
                            shift 2
                        fi
                        
                        if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
                            usage
                            exit 2
                        fi
                        
                        src=$1
                        dest=${2:-}
                        if [ -z "$dest" ]; then
                            dest=${src%.*}.wav
                        fi
                        
                        ffmpeg -hide_banner -i "$src" -ar "$rate" "$dest"

A wrapper around 7-Zip that applies maximum compression settings. The settings are based on a SuperUser answer by user 91735472.

Download.

#! /bin/sh
                        set -eu
                        
                        usage() {
                            printf 'usage: %s [-d <MiB>] 7z-arg ...\n' "$(basename "$0")"
                        }
                        
                        for arg in "$@"; do
                            if [ "$arg" = -h ] || [ "$arg" = --help ]; then
                                usage
                                exit 0
                            fi
                        done
                        
                        if [ "$#" -lt 2 ]; then
                            usage >&2
                            exit 2
                        fi
                        
                        d=1536m
                        if [ "$1" = -d ]; then
                            d=${2%m}m
                            shift 2
                        fi
                        
                        7z \
                            a \
                            -t7z \
                            -md="$d" \
                            -mfb=273 \
                            -mlc=0 \
                            -mmc=10000 \
                            -mmf=bt3 \
                            -mmt \
                            -mmtf \
                            -mpb=0 \
                            -ms \
                            -mx=9 \
                            -myx=9 \
                            "$@" \
                            ;

Creates backup copies of files by appending .bak to their names.

Download.

#! /bin/sh
                        set -eu
                        
                        for f in "$@"; do
                            cp -ai "$f" "$f".bak
                        done

A pair of scripts for interacting with the X clipboard. A little more convenient than using xclip(1) directly.

Download.

#! /bin/sh
                        set -eu
                        
                        xclip -in -sel clip "$@"

Download.

#! /bin/sh
                        set -eu
                        
                        if [ $# -ne 0 ]; then
                            printf 'usage: %s\n' "$(basename "$0")" >&2
                            exit 2
                        fi
                        
                        xclip -out -sel clip "$@"

A cURL wrapper for easy file downloads that works the same way on from NetBSD to Ubuntu. It uses the remote filename by default.

Download.

#! /bin/sh
                        
                        dl() {
                            if [ $# -lt 1 ] || [ $# -gt 2 ]; then
                                printf 'usage: dl url [dest]\n' >&2
                                return 1
                            fi
                        
                            if [ $# -eq 2 ]; then
                                curl -L -o "$2" "$1"
                            else
                                curl -L -O -J "$1"
                            fi
                        }
                        
                        dl "$@"

Lists unique file extensions in the current directory.

Download.

#! /usr/bin/env python3
                        
                        import shlex
                        from pathlib import Path
                        
                        exts = {
                            item.suffix for item in Path().iterdir()
                        }
                        
                        print(shlex.join(sorted(exts)))

Prints file modification times in the Unix timestamp format. When there is more than one file, prints the timestamp followed by the filename quoted for POSIX shell with shlex.quote. Useful for backing up and restoring modification times.

Download.

#! /usr/bin/env python3
                        
                        import shlex
                        import sys
                        from pathlib import Path
                        
                        
                        def main() -> None:
                            if len(sys.argv) == 1:
                                sys.exit()
                        
                            print_paths = len(sys.argv) > 2
                        
                            for path in sys.argv[1:]:
                                mtime = int(Path(path).stat().st_mtime)
                        
                                if print_paths:
                                    print(mtime, shlex.quote(path))
                                else:
                                    print(mtime)
                        
                        
                        if __name__ == "__main__":
                            main()

A shortcut for the tmux session manager that attaches to existing sessions or creates new ones.

Download.

#! /bin/sh
                        set -eu
                        
                        if [ $# -gt 1 ]; then
                            printf 'usage: %s [session]\n' "$(basename "$0")" >&2
                            exit 2
                        fi
                        
                        session=op
                        if [ -n "${1:-}" ]; then
                            session=$1
                        fi
                        
                        tmux a -t "$session" || tmux new -s "$session"

Removes trailing whitespace.

Download.

#! /usr/bin/env python3
                        
                        import fileinput
                        import re
                        import sys
                        
                        regex = re.compile(r"[ \t]+$")
                        
                        try:
                            for line in fileinput.input(inplace=True):
                                sys.stdout.write(regex.sub("", line))
                        except OSError as e:
                            sys.stderr.write(f"{e}\n")
                            sys.exit(1)

Releases grabbed keyboard/mouse input in X11. I need it more often than I would like to in remote sessions.

Download.

#! /bin/sh
                        xdotool key XF86Ungrab

Extracts URLs from text files. The utility uses John Gruber’s URL regex patterns. By default, it finds both web URLs and other URI schemes. With the -w option, it only finds web URLs. The list of TLDs in the web regular expression is outdated.

Download.

#! /usr/bin/env python3
                        
                        import argparse
                        import re
                        import shlex
                        import sys
                        from pathlib import Path
                        from typing import Iterator
                        
                        # The URL and web URL regex patterns by John Gruber.
                        # https://gist.github.com/gruber/249502
                        RE_ALL = r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>«»""'']))"""
                        
                        # https://gist.github.com/gruber/8891611
                        RE_WEB = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»""''])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)\b/?(?!@)))"""
                        
                        
                        def extract_urls(path: Path, pattern: re.Pattern) -> Iterator[str]:
                            "Extract URLs from a single file using the provided regex pattern."
                        
                            with path.open("r") as f:
                                content = f.read()
                                yield from (match.group(0) for match in pattern.finditer(content))
                        
                        
                        def main():
                            parser = argparse.ArgumentParser(description="Extract URLs from files.")
                        
                            parser.add_argument(
                                "-w",
                                "--web",
                                action="store_true",
                                help="extract web URLs only",
                            )
                        
                            parser.add_argument("files", nargs="+", type=Path, help="input files to process")
                        
                            args = parser.parse_args()
                            pattern = re.compile(RE_WEB if args.web else RE_ALL)
                        
                            for path in args.files:
                                try:
                                    for url in extract_urls(path, pattern):
                                        print(url)
                                except OSError as e:
                                    print(f"error processing {shlex.quote(str(path))}: {e}", file=sys.stderr)
                        
                        
                        if __name__ == "__main__":
                            main()

A UTC wrapper for date(1).

Download.

#! /bin/sh
                        
                        TZ= date "$@"

A yt-dlp wrapper optimized for highly compatible H.264 YouTube downloads. The script handles metadata embedding, subtitle downloading, and thumbnail embedding in a single command.

Download.

#! /bin/sh
                        # This script downloads YouTube videos for archival in a high-compatibility
                        # format with embedded metadata using https://github.com/yt-dlp/yt-dlp/.
                        
                        set -eu
                        
                        log_url=0
                        max_height=1080
                        
                        while :; do
                            case "$1" in
                            -l | --log)
                                log_url=1
                                shift
                                ;;
                        
                            -m | --max-height)
                                max_height=$2
                                shift 2
                                ;;
                        
                            *)
                                break
                                ;;
                            esac
                        done
                        
                        yt-dlp \
                            --add-metadata \
                            --embed-chapters \
                            --embed-subs \
                            --embed-thumbnail \
                            --format "bestvideo[vcodec*=avc1][height<=$max_height]+bestaudio[ext=m4a]" \
                            --sub-langs en \
                            --write-auto-subs \
                            "$@" \
                            ;
                        
                        if [ "$log_url" -eq 1 ]; then
                            for url in "$@"; do
                                printf '%s\n' "$url" >> urls.txt
                            done
                        fi