#!/bin/sh
# a script to traverse a directory tree and convert 
# file and directory names to target (current by default)
# charset, auto-guessing source charset given the language
# (c) 2006, 2011 Michael Shigorin <mike@altlinux.org>
# translit support by George Kouryachy <george@altlinux.org>
# copy, distribute, modify and use as needed

usage()
{
	cat >&2 <<-EOF
	A script to traverse a directory tree and convert
	file and directory names to target charset using
	source charset autoguesser (defaulting to locale's
	language and charset if none specified)"
	
	Please note that you can backup filenames with cp -al
	using hardlinks so very cheap on time and disk space!
	
	Usage: $0 [-L language] [-x charset] [-r] [-q] [-t] [<directory|file>]
	       -L and -x take parameters just as enca(1)
	       -r turns on recursion
	       -t ascii transliteration via lynx(1)
	       -q means quiet
	EOF
	exit $1
}

warning() { echo "$0: $*" >&2; }
fatal() { warning "$*"; exit 1; }

process()
{
	# NB: enca might not cope with too short sequences
  	if [ -z "$translit" ]; then
		new="$(echo "$1" | enca -L "$language" -x "$charmap" \
		|| fatal "enca unable to convert '$1'")"
	else
		new="$(echo "$1" | sed 's/</\&lt;/g' \
		| "$translit" -nomargins -width 256 -dump -stdin \
			-display_charset=us-ascii -assume_charset="$charmap" \
		|| fatal "$translit unable to convert '$1'")"
	fi || exit $?
	# safety net
	[ -n "$new" ] || fatal "'$1' results in EMPTY converted name!"
	if [ "$1" != "$new" ]; then
		if [ -e "$new" ]; then
			warning "'$1' already exists with recoded name: '$new', leaving as is"
		else
			$wrapper mv "$verbose" "$q$1$q" "$q$new$q"
		fi
	fi
}

traverse() 
{
	# symlink support might be considered but scary
	if [ -L "$i" ]; then
		warning "skipping '$1' symlink"
		return
	fi	
	if [ -n "$recurse" -a -d "$1" ]; then
		pushd "$1" >/dev/null || return
		ls | while read i; do
			traverse "$i" || exit 1
		done || exit $?
		popd >/dev/null
	fi
	process "$1" || exit 1
}

### main()

TEMP=`getopt -l language:,charset:,recurse,notest,tranlit,quiet,help \
	-- +L:x:r:N:t:q:h "$@"` || usage 1
eval set -- "$TEMP"
eval `locale -k charmap`	# $charmap is current charset
# alas, language code isn't that easy
[ -z "$LC_CTYPE" ] && eval `locale 2>/dev/null | grep ^LC_CTYPE`
language="`echo $LC_CTYPE | cut -c1-2`"
verbose="-v"
recurse=
wrapper="echo"
failed=
translit=
q="'"

while :; do
	case "$1" in 
		--) shift; break
			;;
		-L|--language) shift; language="$1"
			;;
		-x|--charset) shift; charmap="$1"
			;;
		-r|--recurse) recurse=1
			;;
		-N|--notest) wrapper=; q=
			;;
		-t|--translit) translit="lynx"
			;;
		-q|--quiet) verbose=
			;;
		-h|--help) usage 0
			;;
		*) [ -e "$1" ] && break || fatal "Unrecognized option: $1"
			;;
	esac
	shift
done

[ -z "$language" ] && fatal "need -L language, unable to guess from locale"
[ -z "$charmap" ]  && fatal "need -x charset, unable to guess from locale"

if [ -n "$1" ]; then
	if [ -e "$1" ]; then
		traverse "$1" || exit 1
	else
		fatal "'$1' not found"
	fi
else
	usage 1
fi

[ -n "$failed" ] && 
	fatal "'$failed' name failed to convert, please check"

[ -n "$wrapper" -a -z "$failed" ] && 
	echo "seems good, now run me with --notest to actually rename"
:
