Skip to content

Title capitalization correction for id3 (and Ogg Vorbis) tags (and filenames).

I finally got sick of the horrible bullshit people have the gall to put into CDDB and FreeDB, so I wrote this:

#!/bin/sh

# copyright 2006, gr@eclipsed.net
# no warranty expressed or implied, use/modify/redistribute as you like

sudo=''
if [ `id | awk -F\( '{ print $1 }' | awk -F= '{ print $2 }'` -ne 0 ]; then
sudo="sudo "
fi

force=0
passargs=''

# getopt(1) doesn't play nicely with argv shell escaping; see below
#args=`getopt f $@`
#args=`getopt f $@`
#if [ $? -ne 0 ]
#then
#	# XXX usage
#	exit 2
#fi
#set -- $args
##for i in $args
#for i
#do
#	case "$i"
#	in
#		-f)
#			force=1
#			passargs='-f'
#			shift;;
#		--)
#			shift; break;;
#	esac
#done

# For title capitalization,
# http://aitech.ac.jp/~ckelly/midi/help/caps.html isn't a bad
# resource, but I disagree with some of it...
munge_filename ()
{
	pass=`echo "$@" | tr -d \'\" | sed -E \
	-e "s,:,\ -,g" \
	-e "s, - ,-,g" \
	-e "s,^([0-9][_-])?([0-9]+)[\.( )*|-|_],\2-," \
	-e "s,[/ ],_,g" \
	-e "s,-_,-,g" \
	-e 's,\.$,,' \
	`
	SPACECHAR='_' SEPARATOR='-' munge_caps $pass
}

munge_cddb ()
{
	SPACECHAR=' ' SEPARATOR='^' munge_caps "$@"
}

munge_caps ()
{
	# XXX deal with raising, rather than just lowering?
	echo "$@" | sed -E \
	-e "s,^a,A,g" -e "s,^b,B,g" -e "s,^c,C,g" -e "s,^d,D,g" -e "s,^e,E,g" \
	-e "s,^f,F,g" -e "s,^g,G,g" -e "s,^h,H,g" -e "s,^i,I,g" -e "s,^j,J,g" \
	-e "s,^k,K,g" -e "s,^l,L,g" -e "s,^m,M,g" -e "s,^n,N,g" -e "s,^o,O,g" \
	-e "s,^p,P,g" -e "s,^q,Q,g" -e "s,^r,R,g" -e "s,^s,S,g" -e "s,^t,T,g" \
	-e "s,^u,U,g" -e "s,^v,V,g" -e "s,^w,W,g" -e "s,^x,X,g" -e "s,^y,Y,g" \
	-e "s,^z,Z,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})a,\1A,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})b,\1B,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})c,\1C,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})d,\1D,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})e,\1E,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})f,\1F,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})g,\1G,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})h,\1H,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})i,\1I,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})j,\1J,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})k,\1K,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})l,\1L,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})m,\1M,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})n,\1N,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})o,\1O,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})p,\1P,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})q,\1Q,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})r,\1R,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})s,\1S,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})t,\1T,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})u,\1U,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})v,\1V,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})w,\1W,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})x,\1X,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})y,\1Y,g" \
	-e "s,(${SEPARATOR}|${SPACECHAR})z,\1Z,g" \
	-e "s,${SPACECHAR}At${SPACECHAR},${SPACECHAR}at${SPACECHAR},g" \
	-e "s,${SPACECHAR}By${SPACECHAR},${SPACECHAR}by${SPACECHAR},g" \
	-e "s,${SPACECHAR}In${SPACECHAR},${SPACECHAR}in${SPACECHAR},g" \
	-e "s,${SPACECHAR}On${SPACECHAR},${SPACECHAR}on${SPACECHAR},g" \
	-e "s,${SPACECHAR}Near${SPACECHAR},${SPACECHAR}near${SPACECHAR},g" \
	-e "s,${SPACECHAR}To${SPACECHAR},${SPACECHAR}to${SPACECHAR},g" \
	-e "s,${SPACECHAR}From${SPACECHAR},${SPACECHAR}from${SPACECHAR},g" \
	-e "s,${SPACECHAR}Down${SPACECHAR},${SPACECHAR}down${SPACECHAR},g" \
	-e "s,${SPACECHAR}Off${SPACECHAR},${SPACECHAR}off${SPACECHAR},g" \
	-e "s,${SPACECHAR}Through${SPACECHAR},${SPACECHAR}through${SPACECHAR},g" \
	-e "s,${SPACECHAR}Out${SPACECHAR},${SPACECHAR}out${SPACECHAR},g" \
	-e "s,${SPACECHAR}Up${SPACECHAR},${SPACECHAR}up${SPACECHAR},g" \
	-e "s,${SPACECHAR}Of${SPACECHAR},${SPACECHAR}of${SPACECHAR},g" \
	-e "s,${SPACECHAR}For${SPACECHAR},${SPACECHAR}for${SPACECHAR},g" \
	-e "s,${SPACECHAR}With${SPACECHAR},${SPACECHAR}with${SPACECHAR},g" \
	-e "s,${SPACECHAR}Like${SPACECHAR},${SPACECHAR}like${SPACECHAR},g" \
	-e "s,${SPACECHAR}About${SPACECHAR},${SPACECHAR}about${SPACECHAR},g" \
	-e "s,${SPACECHAR}Along${SPACECHAR},${SPACECHAR}along${SPACECHAR},g" \
	-e "s,${SPACECHAR}Below${SPACECHAR},${SPACECHAR}below${SPACECHAR},g" \
	-e "s,${SPACECHAR}During${SPACECHAR},${SPACECHAR}during${SPACECHAR},g" \
	-e "s,${SPACECHAR}Above${SPACECHAR},${SPACECHAR}above${SPACECHAR},g" \
	-e "s,${SPACECHAR}Among${SPACECHAR},${SPACECHAR}among${SPACECHAR},g" \
	-e "s,${SPACECHAR}Beneath${SPACECHAR},${SPACECHAR}beneath${SPACECHAR},g" \
	-e "s,${SPACECHAR}Except${SPACECHAR},${SPACECHAR}except${SPACECHAR},g" \
	-e "s,${SPACECHAR}Across${SPACECHAR},${SPACECHAR}across${SPACECHAR},g" \
	-e "s,${SPACECHAR}Around${SPACECHAR},${SPACECHAR}around${SPACECHAR},g" \
	-e "s,${SPACECHAR}Beside${SPACECHAR},${SPACECHAR}beside${SPACECHAR},g" \
	-e "s,${SPACECHAR}Inside${SPACECHAR},${SPACECHAR}inside${SPACECHAR},g" \
	-e "s,${SPACECHAR}After${SPACECHAR},${SPACECHAR}after${SPACECHAR},g" \
	-e "s,${SPACECHAR}Before${SPACECHAR},${SPACECHAR}before${SPACECHAR},g" \
	-e "s,${SPACECHAR}Between${SPACECHAR},${SPACECHAR}between${SPACECHAR},g" \
	-e "s,${SPACECHAR}Outside${SPACECHAR},${SPACECHAR}outside${SPACECHAR},g" \
	-e "s,${SPACECHAR}Against${SPACECHAR},${SPACECHAR}against${SPACECHAR},g" \
	-e "s,${SPACECHAR}Behind${SPACECHAR},${SPACECHAR}behind${SPACECHAR},g" \
	-e "s,${SPACECHAR}Beyond${SPACECHAR},${SPACECHAR}beyond${SPACECHAR},g" \
	-e "s,${SPACECHAR}Over${SPACECHAR},${SPACECHAR}over${SPACECHAR},g" \
	-e "s,${SPACECHAR}Under${SPACECHAR},${SPACECHAR}under${SPACECHAR},g" \
	-e "s,${SPACECHAR}Into${SPACECHAR},${SPACECHAR}into${SPACECHAR},g" \
	-e "s,${SPACECHAR}Upon${SPACECHAR},${SPACECHAR}upon${SPACECHAR},g" \
	-e "s,${SPACECHAR}Without${SPACECHAR},${SPACECHAR}without${SPACECHAR},g" \
	-e "s,${SPACECHAR}Onto${SPACECHAR},${SPACECHAR}onto${SPACECHAR},g" \
	-e "s,${SPACECHAR}Within${SPACECHAR},${SPACECHAR}within${SPACECHAR},g" \
	-e "s,${SPACECHAR}Throughout${SPACECHAR},${SPACECHAR}throughout${SPACECHAR},g" \
	-e "s,${SPACECHAR}The${SPACECHAR},${SPACECHAR}the${SPACECHAR},g" \
	-e "s,${SPACECHAR}A${SPACECHAR},${SPACECHAR}a${SPACECHAR},g" \
	-e "s,${SPACECHAR}An${SPACECHAR},${SPACECHAR}an${SPACECHAR},g" \
	-e "s,${SPACECHAR}Some${SPACECHAR},${SPACECHAR}some${SPACECHAR},g" \
	-e "s,${SPACECHAR}And${SPACECHAR},${SPACECHAR}and${SPACECHAR},g" \
	-e "s,${SPACECHAR}But${SPACECHAR},${SPACECHAR}but${SPACECHAR},g" \
	-e "s,${SPACECHAR}Nor${SPACECHAR},${SPACECHAR}nor${SPACECHAR},g" \
	-e "s,${SPACECHAR}Or${SPACECHAR},${SPACECHAR}or${SPACECHAR},g" \
	-e "s,${SPACECHAR}Either${SPACECHAR},${SPACECHAR}either${SPACECHAR},g" \
	-e "s,${SPACECHAR}Neither${SPACECHAR},${SPACECHAR}neither${SPACECHAR},g" \
	-e "s,${SPACECHAR}After${SPACECHAR},${SPACECHAR}after${SPACECHAR},g" \
	-e "s,${SPACECHAR}Because${SPACECHAR},${SPACECHAR}because${SPACECHAR},g" \
	-e "s,${SPACECHAR}Although${SPACECHAR},${SPACECHAR}although${SPACECHAR},g" \
	-e "s,${SPACECHAR}Since${SPACECHAR},${SPACECHAR}since${SPACECHAR},g" \
	-e "s,${SPACECHAR}Though${SPACECHAR},${SPACECHAR}though${SPACECHAR},g" \
	-e "s,${SPACECHAR}Than${SPACECHAR},${SPACECHAR}than${SPACECHAR},g" \
	-e "s,${SPACECHAR}Unless${SPACECHAR},${SPACECHAR}unless${SPACECHAR},g" \
	-e "s,${SPACECHAR}As${SPACECHAR},${SPACECHAR}as${SPACECHAR},g" \
	-e "s,${SPACECHAR}Whereas${SPACECHAR},${SPACECHAR}whereas${SPACECHAR},g" \
	-e "s,${SPACECHAR}While${SPACECHAR},${SPACECHAR}while${SPACECHAR},g" \
	-e "s,${SPACECHAR}Until${SPACECHAR},${SPACECHAR}until${SPACECHAR},g" \
	-e "s,${SPACECHAR}Till${SPACECHAR},${SPACECHAR}till${SPACECHAR},g" \
	-e "s,${SPACECHAR}Via${SPACECHAR},${SPACECHAR}via${SPACECHAR},g" \
	-e "s,${SPACECHAR}Amid${SPACECHAR},${SPACECHAR}amid${SPACECHAR},g" \
	-e "s,${SPACECHAR}Per${SPACECHAR},${SPACECHAR}per${SPACECHAR},g" \
	-e "s,${SPACECHAR}Yet${SPACECHAR},${SPACECHAR}yet${SPACECHAR},g" \
	-e "s,${SPACECHAR}Da${SPACECHAR},${SPACECHAR}da${SPACECHAR},g" \
	-e "s,${SPACECHAR}De${SPACECHAR},${SPACECHAR}de${SPACECHAR},g" \
	-e "s,${SPACECHAR}La${SPACECHAR},${SPACECHAR}la${SPACECHAR},g" \
	-e "s,${SPACECHAR}Le${SPACECHAR},${SPACECHAR}le${SPACECHAR},g" \
	-e "s,${SPACECHAR}Lo${SPACECHAR},${SPACECHAR}lo${SPACECHAR},g" \
	-e "s,${SPACECHAR}Il${SPACECHAR},${SPACECHAR}il${SPACECHAR},g" \
	-e "s,${SPACECHAR}Della${SPACECHAR},${SPACECHAR}della${SPACECHAR},g" \
	-e "s,${SPACECHAR}Delle${SPACECHAR},${SPACECHAR}delle${SPACECHAR},g" \
	-e "s,${SPACECHAR}Degli${SPACECHAR},${SPACECHAR}degli${SPACECHAR},g" \
	-e "s,${SPACECHAR}Der${SPACECHAR},${SPACECHAR}der${SPACECHAR},g" \
	-e "s,${SPACECHAR}Das${SPACECHAR},${SPACECHAR}das${SPACECHAR},g" \
	-e "s,${SPACECHAR}D\',${SPACECHAR}d\',g" \
	-e "s,${SPACECHAR}L\',${SPACECHAR}l\',g" \

}

retag ()
{
	# called as: retag $filetype "$oldpath" "$album" "$artist" "$title" "$track"
	if [ $force -gt 0 ]; then
		if [ $1 = "mp3" ]; then
			printf "Retagging...\n"
			$sudo id3v2 -A "$3" -a "$4" -t "$5" "$2"
		elif [ $1 = "ogg" ]; then
			printf "Retagging...\n"
			$sudo vorbiscomment -w -t "album=$3" -t "artist=$4" \
				-t "title=$5" -t "tracknumber=$6" "$2"
		else
			printf "$oldpath is unknown filetype \"$1\"; not retagging.\n"
		fi
	else
		printf "  Retag? [y/n] (y)> "
		# XXX edit option?
		read confirm
		if [ ! `echo $confirm | grep '^[nN]'` ]; then
			if [ $1 = "mp3" ]; then
				$sudo id3v2 -A "$3" -a "$4" -t "$5" "$2"
			elif [ $1 = "ogg" ]; then
				$sudo vorbiscomment -w -t "album=$3" -t "artist=$4" \
					-t "title=$5" -t "tracknumber=$6" "$2"
			else
				printf "$oldpath is unknown filetype \"$1\"; not retagging.\n"
			fi
		fi
	fi
}

for oldpath in "$@"
#for oldpath in "$args"
do
if [ "$oldpath" = "-f" ]; then
	# Unfortunately, getopt(1) mangles $@ by inserting a -- and
	# inherently removes shell escaping of spaces and such, which
	# means we'd have go add it back in manually... yech. The down side
	# is that flags will only affect the paths listed AFTER them, but
	# this may have its uses anyway (Sure of a bunch of paths, not
	# positive about a couple... list the ones you're not positive about
	# first).
	force=1
	passargs='-f'
else
	oldpath=`basename "$oldpath"`
	if [ -d "$oldpath" ]; then
		echo "Descending into $oldpath..."
		( cd "$oldpath" && $0 $passargs * )
		echo "Ascending from $oldpath."
	else
#		filetype=`file $oldpath | awk -F': ' '{ print $2 }' | awk '{ print $1 }'`
		# I'm queasy about going with file extensions for filetype, but
		# everybody's file(1) is subtly different and parsing it is a pain
		# in my ass.
		filetype=`echo $oldpath | awk -F. '{ print $NF }'`
		if [ $filetype = 'ogg' ]; then
			TALB=`vorbiscomment -l "$oldpath" | grep ^album | awk -F= '{ print $2 }'`
			TPE1=`vorbiscomment -l "$oldpath" | grep ^artist | awk -F= '{ print $2 }'`
			TIT2=`vorbiscomment -l "$oldpath" | grep ^title | awk -F= '{ print $2 }'`
			TRCK=`vorbiscomment -l "$oldpath" | grep ^tracknumber | awk -F= '{ print $2 }'`
		elif [ $filetype = 'mp3' ]; then
			TALB=`id3v2 -l "$oldpath" | egrep '^(TALB|TAL) ' | awk -F': ' '{ print $2 }'`
			TPE1=`id3v2 -l "$oldpath" | egrep '^(TPE1|TP1) ' | awk -F': ' '{ print $2 }'`
			TIT2=`id3v2 -l "$oldpath" | egrep '^(TIT2|TT2) ' | awk -F': ' '{ print $2 }'`
			TRCK=`id3v2 -l "$oldpath" | egrep '^(TRCK|TRK) ' | awk -F': ' '{ print $2 }'`
		fi
		album=`munge_cddb $TALB`
		artist=`munge_cddb $TPE1`
		title=`munge_cddb $TIT2`
		if [ "$TALB" != "$album" -o "$TPE1" != "$artist" -o "$TIT2" != "$title" ]
		then
			printf "For \"$oldpath\":\n \
  Album:        $TALB -\> $album\n \
  Artist:       $TPE1 -\> $artist\n \
  Song title:   $TIT2 -\> $title\n"
			retag $filetype "$oldpath" "$album" "$artist" "$title" "$TRCK"
		fi
	fi
	newpath=`munge_filename "$oldpath"`
	if [ "$oldpath" != "$newpath" ]; then
		if [ $force -gt 0 ]; then
			printf "Renaming \"$oldpath\"\n      to \"$newpath\"\n"
			$sudo mv "$oldpath" "$newpath"
		else
			printf "Rename \"$oldpath\"\n    to \"$newpath\"? [y/n] (y)> "
			read confirm
			if [ ! `echo $confirm | grep '^[nN]'` ]; then
				$sudo mv "$oldpath" "$newpath"
			fi
		fi
	fi
fi
done

I'm working on merging this into abcde(1) (turns out a friend of a friend maintains that now; so goes Debian, though, really), but that script just post-processes existing files. Feeding it directories is fine, but I recommend against feeding it either full or relative paths, as you'll find it turns your directory separators into “_”. Feeding it a directory will do the right thing (descend recursively, process when it actually gets to a file, then do name correction on the directory when ascending). It's not something I'd recommend running without reading it over first. Feel free to ask any questions you like.

(Note that I'm a bit pathological about what I want to have lower-case in titles; you may want to trim down the list in munge_caps() a bit.)

Once I shoehorn this into abcde(1), it'll be two more user-redefinable functions (mungecddb() and mungetitle()), and mungefilename() will also pass things through the title-correction (mostly for lower/upper casing) by default.

Any thoughts?

Post a Comment

Your email is never published nor shared. Required fields are marked *
*
*