change urls in website directories with unfortunate encodings
#!/bin/bash
# script to change homepage urls from fully qualified to absolute i.e.
# from http://my.domain/~user/somestuff/etc
# to /~user/somestuff/etc
#
# due to the unfortunate circumstances of life, this script is assuming
# that the pages are formatted either in an encoding which sed easily
# can deal with, or perhaps in UTF-16LE, which was what was outputted by
# word. the strategy is cludgey, and relies on the fact that when sed fails
# for bad character encoding reasons, it produces an empty file.
domain='http:\/\/www.mydomain.com'
getpath(){
echo $(cd $1; pwd; cd ..)
}
checkargs(){
if [ $# -lt 2 ]; then
echo "need more arguments. correct syntax is:"
echo "$0 <target files directoryname> <personal directory name>"
exit
elif [ ! -d $1 ]; then
echo "$1 is not a directory, exiting"
exit
fi
}
#function below will expect a file's full path
#arguments are: <target html file> <sedstring>
convert_target_utf16le(){
#convert file assuming UTF-16LE format
cat $1 | iconv -f UTF-16LE -t UTF-8 | sed $sed_string | \
iconv -f UTF-8 -t UTF-16LE > $1.changed_
#possible that the resulting file is empty!
if [ -s $1.changed_ ]; then
mv $1.changed_ $1
else
#looks like it's empty! get rid of it.
rm $1.changed_
fi
}
#function below will expect a file's full path
#arguments are: <target html file> <sedstring>
convert_target_standard(){
#convert file assuming no special formatting
cat $1 | sed $sed_string > $1.changed_
#possible that the resulting file is empty!
if [ -s $1.changed_ ]; then
mv $1.changed_ $1
else
#looks like it's empty! get rid of it.
rm $1.changed_
fi
}
#function below will expect a file's full path
#arguments are: <script> <target file/dir> <sedstring>
process_target(){
script=$1
file=$2
sedstring=$3
if [ -d $file ]; then
new_target_dir=$(getpath $file)
bash $script $new_target_dir $sedstring
elif [[ $file =~ \.html?$ ]]; then
# append newline- sed doesen't like files that don't end in newlines!
echo >> $file
convert_target_utf16le $file $sedstring
# convert_target_standard $file $sedstring
fi
}
checkargs $@
personal_directory="$domain\/\~$2"
new_path="\/\~$2"
sed_string="s/$personal_directory/$new_path/g"
# get the full paths
script=$0
target_dir=$(getpath $1)
for t in $(ls $target_dir); do
process_target $script $target_dir/$t $2
done