This script will calculate how many lines of code a company or organization has submitted to a Git repo for a single revision.
This is done using git blame with emails. Emali address is used to filter the lines to produce source code files that only contain lines by selected company.
Blame output is then removed from these files and the resulting files are run through cloc which calculates code, comment and blank lines.
There are several parameters that decide what files to include, these can be seen in the help function or running the script with -h.
Script requires cloc and new enough version of Git (since about late 2010) because older versions do not support --show-email.
#git #parameters #mktemp #grep #loc #bash
#!/bin/bash
TEMP_DIR=`mktemp -d`
# Show counter if number of files is greater than this
COUNTER_VISIBLE=1
COMPANY=""
SCRIPT=`basename $0`
EXCLUDE_FILE="" # Name of exclude list file
TOOL=1 # Default "cloc"
FILTER="\.[ch]p{0,2}$" # Default file filter
function help {
echo
echo "Usage: $SCRIPT [options]"
echo " Options:"
echo " --all Analyze ALL files, no filtering is done."
echo " --exlude-list= File containing exclude list (see comment below)."
echo " -c, --company= Company name as shown in email address (REQUIRED)."
echo " -f, --file-filter= File filter. Default \.[ch]p{0,2}$ which includes C and C++ files."
echo " -t. --tool LOC count tool to use. 1 = cloc (default), 2 = sloccount."
echo " -h, --help This help text."
echo
echo " Exclude list can be used to exclude files or directories from
the analysis. Simply list them relative to the root of the repository
directory."
echo
echo " Script requires a relatively new version of Git that supports --show-email."
echo
}
while test $# -gt 0; do
case "$1" in
-h)
help
exit 1
;;
--help)
help
exit 1
;;
--all)
FILTER=""
shift
;;
-f)
shift
if test $# -gt 0; then
FILTER=$1
fi
shift
;;
--file-filter*)
FILTER=`echo $1|sed -e 's/^[^=]*=/g'`
;;
-c)
shift
if test $# -gt 0; then
COMPANY=$1
fi
shift
;;
--company*)
COMPANY=`echo $1|sed -e 's/^[^=]*=//g'`
shift
;;
--exclude-list*)
if [ $# -gt 0 ]; then
EXCLUDE_FILE=`echo $1|sed -e 's/^[^=]*=//g'`
else
echo "Exclude file missing!"
exit 1
fi
shift
;;
-t)
shift
if test $# -gt 0; then
TOOL=$1
fi
shift
;;
*)
break;
;;
esac
done
if [ "$COMPANY" == "" ]; then
echo "Company name missing, exiting."
echo "Try $SCRIPT -h"
exit 1
fi
echo "Copying directory structure to $TEMP_DIR."
cp -r * $TEMP_DIR
find $TEMP_DIR -type f -exec rm "{}" \;
# Get list of authors from company (emails only)
echo "Creating list of authors into authors.txt..."
git log --oneline --format="%ae" |grep $COMPANY |sort |uniq > authors.txt
# Create blame file of all source code
echo "Creating list of files..."
git ls-files --full-name > $TEMP_DIR/file-list.txt
TOTAL_FILES=`wc -l < $TEMP_DIR/file-list.txt`
if [ "$FILTER" != "" ]; then
# Only include code files (c, h, cpp, hpp)
egrep -e $FILTER < $TEMP_DIR/file-list.txt > $TEMP_DIR/file-list.tmp
mv $TEMP_DIR/file-list.tmp $TEMP_DIR/file-list.txt
fi
INCLUDED_FILES=`wc -l < $TEMP_DIR/file-list.txt`
echo "Total number of files: $TOTAL_FILES"
echo "Total number of files to analyze: $INCLUDED_FILES"
echo
echo "Removing files with no edits by $COMPANY..."
counter=1
while read file
do
if [ $INCLUDED_FILES -gt $COUNTER_VISIBLE ]; then
echo -en "\r$counter / $INCLUDED_FILES ($file) "
fi
# Only include files that have edits by company
BY_COMPANY=`git log --oneline --format="%ae" "$file" |grep -i $COMPANY`
if [ "$BY_COMPANY" != "" ]; then
echo $file >> $TEMP_DIR/file-list.tmp
fi
counter=`expr $counter + 1`
done < $TEMP_DIR/file-list.txt
echo
if [ -e $TEMP_DIR/file-list.tmp ]; then
mv $TEMP_DIR/file-list.tmp $TEMP_DIR/file-list.txt
fi
# Remove temp files
CURR_DIR=`pwd`
if [ "$EXCLUDE_FILE" != "" ]; then
echo "Handling exclude list..."
grep -vFf $EXCLUDE_FILE $TEMP_DIR/file-list.txt > $TEMP_DIR/file-filtered.txt
mv $TEMP_DIR/file-filtered.txt $TEMP_DIR/file-list.txt
fi
COMPANY_FILES=`wc -l < $TEMP_DIR/file-list.txt`
echo "Files removed: `expr $INCLUDED_FILES - $COMPANY_FILES`"
echo
# For each file, find lines of code added or edited by company employee and
# only copy those lines to the temporary directory.
echo "Removing code not written by $COMPANY.."
counter=1
while read file
do
if [ $COMPANY_FILES -gt $COUNTER_VISIBLE ]; then
echo -en "\r$counter / $COMPANY_FILES ($file) "
fi
git blame -w --show-email "$file" > "$TEMP_DIR/$file.back"
grep -Ff authors.txt "$TEMP_DIR/$file.back" > "$TEMP_DIR/$file.back2"
# Remove git blame stuff
sed 's/\w*\s(<.*>\s*\w*-\w*-\w*\s\w*:\w*\w*:\w*\s+\w*\s*\w*)//' < "$TEMP_DIR/$file.back2" > "$TEMP_DIR/$file"
counter=`expr $counter + 1`
done < $TEMP_DIR/file-list.txt
echo
echo
if [ "$EXCLUDE_FILE" != "" ]; then
cp $EXCLUDE_FILE $TEMP_DIR
fi
cd $TEMP_DIR
find $TEMP_DIR -name "*.back*" -exec rm "{}" \;
echo "Running analysis"
echo "----------------"
# Count lines & print results
if [ $TOOL -eq 2 ]; then
if [ "$EXCLUDE_FILE" != "" ]; then
echo "Handling exclude list..."
while read file
do
if [ -d "$file" ]; then
echo "Removing dir $file"
rm -rf $file
else
echo "Removing $file"
rm $file
fi
done < $EXCLUDE_FILE
fi
sloccount .
else
cloc --list-file $TEMP_DIR/file-list.txt
fi
cd $CURR_DIR