Small utilities
This is a collection of small command-line utilities I have written over the years to make my daily Unix computing better.
Converts media files to 16 kHz WAV for whisper.cpp speech recognition using FFmpeg.
#! /bin/sh
rate=16000
usage() {
printf 'usage: %s [-r <rate>] src [dest]\n\nDefault rate: %d.\n' \
"$(basename "$0")" \
"$rate" \
>&2 \
;
}
if [ "$1" = "-r" ]; then
if [ "$#" -lt 2 ]; then
usage
exit 2
fi
rate=$2
shift 2
fi
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
usage
exit 2
fi
src=$1
dest=${2:-}
if [ -z "$dest" ]; then
dest=${src%.*}.wav
fi
ffmpeg -hide_banner -i "$src" -ar "$rate" "$dest"
A wrapper around 7-Zip that applies maximum compression settings. The settings are based on a SuperUser answer by user 91735472.
#! /bin/sh
set -eu
usage() {
printf 'usage: %s [-d <MiB>] 7z-arg ...\n' "$(basename "$0")"
}
for arg in "$@"; do
if [ "$arg" = -h ] || [ "$arg" = --help ]; then
usage
exit 0
fi
done
if [ "$#" -lt 2 ]; then
usage >&2
exit 2
fi
d=1536m
if [ "$1" = -d ]; then
d=${2%m}m
shift 2
fi
7z \
a \
-t7z \
-md="$d" \
-mfb=273 \
-mlc=0 \
-mmc=10000 \
-mmf=bt3 \
-mmt \
-mmtf \
-mpb=0 \
-ms \
-mx=9 \
-myx=9 \
"$@" \
;
Creates backup copies of files by appending .bak
to their names.
#! /bin/sh
set -eu
for f in "$@"; do
cp -ai "$f" "$f".bak
done
A pair of scripts for interacting with the X clipboard. A little more convenient than using xclip(1) directly.
#! /bin/sh
set -eu
xclip -in -sel clip "$@"
#! /bin/sh
set -eu
if [ $# -ne 0 ]; then
printf 'usage: %s\n' "$(basename "$0")" >&2
exit 2
fi
xclip -out -sel clip "$@"
A cURL wrapper for easy file downloads that works the same way on from NetBSD to Ubuntu. It uses the remote filename by default.
#! /bin/sh
dl() {
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
printf 'usage: dl url [dest]\n' >&2
return 1
fi
if [ $# -eq 2 ]; then
curl -L -o "$2" "$1"
else
curl -L -O -J "$1"
fi
}
dl "$@"
Lists unique file extensions in the current directory.
#! /usr/bin/env python3
import shlex
from pathlib import Path
exts = {
item.suffix for item in Path().iterdir()
}
print(shlex.join(sorted(exts)))
Prints file modification times in the Unix timestamp format. When there is more than one file, prints the timestamp followed by the filename quoted for POSIX shell with shlex.quote
. Useful for backing up and restoring modification times.
#! /usr/bin/env python3
import shlex
import sys
from pathlib import Path
def main() -> None:
if len(sys.argv) == 1:
sys.exit()
print_paths = len(sys.argv) > 2
for path in sys.argv[1:]:
mtime = int(Path(path).stat().st_mtime)
if print_paths:
print(mtime, shlex.quote(path))
else:
print(mtime)
if __name__ == "__main__":
main()
A shortcut for the tmux session manager that attaches to existing sessions or creates new ones.
#! /bin/sh
set -eu
if [ $# -gt 1 ]; then
printf 'usage: %s [session]\n' "$(basename "$0")" >&2
exit 2
fi
session=op
if [ -n "${1:-}" ]; then
session=$1
fi
tmux a -t "$session" || tmux new -s "$session"
Removes trailing whitespace.
#! /usr/bin/env python3
import fileinput
import re
import sys
regex = re.compile(r"[ \t]+$")
try:
for line in fileinput.input(inplace=True):
sys.stdout.write(regex.sub("", line))
except OSError as e:
sys.stderr.write(f"{e}\n")
sys.exit(1)
Releases grabbed keyboard/mouse input in X11. I need it more often than I would like to in remote sessions.
#! /bin/sh
xdotool key XF86Ungrab
Extracts URLs from text files. The utility uses John Gruber’s URL regex patterns. By default, it finds both web URLs and other URI schemes. With the -w
option, it only finds web URLs. The list of TLDs in the web regular expression is outdated.
#! /usr/bin/env python3
import argparse
import re
import shlex
import sys
from pathlib import Path
from typing import Iterator
# The URL and web URL regex patterns by John Gruber.
# https://gist.github.com/gruber/249502
RE_ALL = r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>«»""'']))"""
# https://gist.github.com/gruber/8891611
RE_WEB = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»""''])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)\b/?(?!@)))"""
def extract_urls(path: Path, pattern: re.Pattern) -> Iterator[str]:
"Extract URLs from a single file using the provided regex pattern."
with path.open("r") as f:
content = f.read()
yield from (match.group(0) for match in pattern.finditer(content))
def main():
parser = argparse.ArgumentParser(description="Extract URLs from files.")
parser.add_argument(
"-w",
"--web",
action="store_true",
help="extract web URLs only",
)
parser.add_argument("files", nargs="+", type=Path, help="input files to process")
args = parser.parse_args()
pattern = re.compile(RE_WEB if args.web else RE_ALL)
for path in args.files:
try:
for url in extract_urls(path, pattern):
print(url)
except OSError as e:
print(f"error processing {shlex.quote(str(path))}: {e}", file=sys.stderr)
if __name__ == "__main__":
main()
A UTC wrapper for date(1).
#! /bin/sh
TZ= date "$@"
A yt-dlp wrapper optimized for highly compatible H.264 YouTube downloads. The script handles metadata embedding, subtitle downloading, and thumbnail embedding in a single command.
#! /bin/sh
# This script downloads YouTube videos for archival in a high-compatibility
# format with embedded metadata using https://github.com/yt-dlp/yt-dlp/.
set -eu
log_url=0
max_height=1080
while :; do
case "$1" in
-l | --log)
log_url=1
shift
;;
-m | --max-height)
max_height=$2
shift 2
;;
*)
break
;;
esac
done
yt-dlp \
--add-metadata \
--embed-chapters \
--embed-subs \
--embed-thumbnail \
--format "bestvideo[vcodec*=avc1][height<=$max_height]+bestaudio[ext=m4a]" \
--sub-langs en \
--write-auto-subs \
"$@" \
;
if [ "$log_url" -eq 1 ]; then
for url in "$@"; do
printf '%s\n' "$url" >> urls.txt
done
fi