Small utilities
This is a collection of small command-line utilities I have written over the years to make my daily Unix computing better.
Converts media files to 16 kHz WAV for whisper.cpp speech recognition using FFmpeg.
#! /bin/sh
rate=16000
usage() {
printf 'usage: %s [-r <rate>] src [dest]\n\nDefault rate: %d.\n' \
"$(basename "$0")" \
"$rate" \
>&2 \
;
}
if [ "$1" = "-r" ]; then
if [ "$#" -lt 2 ]; then
usage
exit 2
fi
rate=$2
shift 2
fi
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
usage
exit 2
fi
src=$1
dest=${2:-}
if [ -z "$dest" ]; then
dest=${src%.*}.wav
fi
ffmpeg -hide_banner -i "$src" -ar "$rate" "$dest"
A wrapper around 7-Zip that applies maximum compression settings. The settings are based on a SuperUser answer by user 91735472.
#! /bin/sh
set -eu
usage() {
printf 'usage: %s [-d <MiB>] 7z-arg ...\n' "$(basename "$0")"
}
for arg in "$@"; do
if [ "$arg" = -h ] || [ "$arg" = --help ]; then
usage
exit 0
fi
done
if [ "$#" -lt 2 ]; then
usage >&2
exit 2
fi
d=1536m
if [ "$1" = -d ]; then
d=${2%m}m
shift 2
fi
7z \
a \
-t7z \
-md="$d" \
-mfb=273 \
-mlc=0 \
-mmc=10000 \
-mmf=bt3 \
-mmt \
-mmtf \
-mpb=0 \
-ms \
-mx=9 \
-myx=9 \
"$@" \
;
Creates backup copies of files by appending .bak to their names.
#! /bin/sh
set -eu
for f in "$@"; do
cp -ai "$f" "$f".bak
done
A pair of scripts for interacting with the X clipboard. A little more convenient than using xclip(1) directly.
#! /bin/sh
set -eu
xclip -in -sel clip "$@"
#! /bin/sh
set -eu
if [ $# -ne 0 ]; then
printf 'usage: %s\n' "$(basename "$0")" >&2
exit 2
fi
xclip -out -sel clip "$@"
A cURL wrapper for easy file downloads that works the same way on from NetBSD to Ubuntu. It uses the remote filename by default.
#! /bin/sh
dl() {
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
printf 'usage: dl url [dest]\n' >&2
return 1
fi
if [ $# -eq 2 ]; then
curl -L -o "$2" "$1"
else
curl -L -O -J "$1"
fi
}
dl "$@"
Lists unique file extensions in the current directory.
#! /usr/bin/env python3
import shlex
from pathlib import Path
exts = {
item.suffix for item in Path().iterdir()
}
print(shlex.join(sorted(exts)))
Prints file modification times in the Unix timestamp format. When there is more than one file, prints the timestamp followed by the filename quoted for POSIX shell with shlex.quote. Useful for backing up and restoring modification times.
#! /usr/bin/env python3
import shlex
import sys
from pathlib import Path
def main() -> None:
if len(sys.argv) == 1:
sys.exit()
print_paths = len(sys.argv) > 2
for path in sys.argv[1:]:
mtime = int(Path(path).stat().st_mtime)
if print_paths:
print(mtime, shlex.quote(path))
else:
print(mtime)
if __name__ == "__main__":
main()
A shortcut for the tmux session manager that attaches to existing sessions or creates new ones.
#! /bin/sh
set -eu
if [ $# -gt 1 ]; then
printf 'usage: %s [session]\n' "$(basename "$0")" >&2
exit 2
fi
session=op
if [ -n "${1:-}" ]; then
session=$1
fi
tmux a -t "$session" || tmux new -s "$session"
Removes trailing whitespace.
#! /usr/bin/env python3
import fileinput
import re
import sys
regex = re.compile(r"[ \t]+$")
try:
for line in fileinput.input(inplace=True):
sys.stdout.write(regex.sub("", line))
except OSError as e:
sys.stderr.write(f"{e}\n")
sys.exit(1)
Releases grabbed keyboard/mouse input in X11. I need it more often than I would like to in remote sessions.
#! /bin/sh
xdotool key XF86Ungrab
Extracts URLs from text files. The utility uses John Gruber’s URL regex patterns. By default, it finds both web URLs and other URI schemes. With the -w option, it only finds web URLs. The list of TLDs in the web regular expression is outdated.
#! /usr/bin/env python3
import argparse
import re
import shlex
import sys
from pathlib import Path
from typing import Iterator
# The URL and web URL regex patterns by John Gruber.
# https://gist.github.com/gruber/249502
RE_ALL = r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>«»""'']))"""
# https://gist.github.com/gruber/8891611
RE_WEB = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»""''])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)\b/?(?!@)))"""
def extract_urls(path: Path, pattern: re.Pattern) -> Iterator[str]:
"Extract URLs from a single file using the provided regex pattern."
with path.open("r") as f:
content = f.read()
yield from (match.group(0) for match in pattern.finditer(content))
def main():
parser = argparse.ArgumentParser(description="Extract URLs from files.")
parser.add_argument(
"-w",
"--web",
action="store_true",
help="extract web URLs only",
)
parser.add_argument("files", nargs="+", type=Path, help="input files to process")
args = parser.parse_args()
pattern = re.compile(RE_WEB if args.web else RE_ALL)
for path in args.files:
try:
for url in extract_urls(path, pattern):
print(url)
except OSError as e:
print(f"error processing {shlex.quote(str(path))}: {e}", file=sys.stderr)
if __name__ == "__main__":
main()
A UTC wrapper for date(1).
#! /bin/sh
TZ= date "$@"
A yt-dlp wrapper optimized for highly compatible H.264 YouTube downloads. The script handles metadata embedding, subtitle downloading, and thumbnail embedding in a single command.
#! /bin/sh
# This script downloads YouTube videos for archival in a high-compatibility
# format with embedded metadata using https://github.com/yt-dlp/yt-dlp/.
set -eu
log_url=0
max_height=1080
while :; do
case "$1" in
-l | --log)
log_url=1
shift
;;
-m | --max-height)
max_height=$2
shift 2
;;
*)
break
;;
esac
done
yt-dlp \
--add-metadata \
--embed-chapters \
--embed-subs \
--embed-thumbnail \
--format "bestvideo[vcodec*=avc1][height<=$max_height]+bestaudio[ext=m4a]" \
--sub-langs en \
--write-auto-subs \
"$@" \
;
if [ "$log_url" -eq 1 ]; then
for url in "$@"; do
printf '%s\n' "$url" >> urls.txt
done
fi
- mkcd: The missing shell shortcut
- My setup
- now: a date-time calculator for expressions like
now +21 month -5 days