Makistos
10/25/2013 - 12:37 PM

This script will calculate how many lines of code a company or organization has submitted to a Git repo for a single revision. This is do

This script will calculate how many lines of code a company or organization has submitted to a Git repo for a single revision.

This is done using git blame with emails. Emali address is used to filter the lines to produce source code files that only contain lines by selected company.

Blame output is then removed from these files and the resulting files are run through cloc which calculates code, comment and blank lines.

There are several parameters that decide what files to include, these can be seen in the help function or running the script with -h.

Script requires cloc and new enough version of Git (since about late 2010) because older versions do not support --show-email.

#git #parameters #mktemp #grep #loc #bash

#!/bin/bash 

TEMP_DIR=`mktemp -d`

# Show counter if number of files is greater than this
COUNTER_VISIBLE=1

COMPANY=""
SCRIPT=`basename $0`
EXCLUDE_FILE="" # Name of exclude list file
TOOL=1 # Default "cloc"
FILTER="\.[ch]p{0,2}$" # Default file filter

function help {
	echo
	echo "Usage: $SCRIPT [options]"

	echo "  Options:"
	echo "    --all 		Analyze ALL files, no filtering is done."
	echo "    --exlude-list= 	File containing exclude list (see comment below)."
	echo "    -c, --company= 	Company name as shown in email address (REQUIRED)."
	echo "    -f, --file-filter= 	File filter. Default \.[ch]p{0,2}$ which includes C and C++ files."
	echo "    -t. --tool 		LOC count tool to use. 1 = cloc (default), 2 = sloccount."
	echo "    -h, --help 		This help text."
	echo 
	echo " 	Exclude list can be used to exclude files or directories from
	the analysis. Simply list them relative to the root of the repository
	directory."
	echo
	echo " 	Script requires a relatively new version of Git that supports --show-email."
	echo
}

while test $# -gt 0; do
	case "$1" in
		-h)
			help
			exit 1
			;;
		--help)
			help
			exit 1
			;;
		--all)
			FILTER=""
			shift
			;;
		-f)
			shift
			if test $# -gt 0; then
				FILTER=$1
			fi
			shift
			;;
		--file-filter*)
			FILTER=`echo $1|sed -e 's/^[^=]*=/g'`	
			;;
		-c)
			shift
			if test $# -gt 0; then
				COMPANY=$1
			fi
			shift
			;;
		--company*)
			COMPANY=`echo $1|sed -e 's/^[^=]*=//g'`
			shift
			;;
		--exclude-list*)
			if [ $# -gt 0 ]; then
				EXCLUDE_FILE=`echo $1|sed -e 's/^[^=]*=//g'`
			else
				echo "Exclude file missing!"
				exit 1
			fi
			shift
			;;
		-t)
			shift
			if test $# -gt 0; then
				TOOL=$1
			fi
			shift
			;;
		*)
			break;
			;;
	esac
done


if [ "$COMPANY" == "" ]; then
	echo "Company name missing, exiting."
	echo "Try $SCRIPT -h"
	exit 1
fi

echo "Copying directory structure to $TEMP_DIR."

cp -r * $TEMP_DIR
find $TEMP_DIR -type f -exec rm "{}" \;

# Get list of authors from company (emails only)
echo "Creating list of authors into authors.txt..."
git log --oneline --format="%ae" |grep $COMPANY |sort |uniq > authors.txt

# Create blame file of all source code
echo "Creating list of files..."
git ls-files --full-name > $TEMP_DIR/file-list.txt
TOTAL_FILES=`wc -l < $TEMP_DIR/file-list.txt`

if [ "$FILTER" != "" ]; then
	# Only include code files (c, h, cpp, hpp)
	egrep -e $FILTER < $TEMP_DIR/file-list.txt > $TEMP_DIR/file-list.tmp
	mv $TEMP_DIR/file-list.tmp $TEMP_DIR/file-list.txt
fi

INCLUDED_FILES=`wc -l < $TEMP_DIR/file-list.txt`
echo "Total number of files: $TOTAL_FILES"
echo "Total number of files to analyze: $INCLUDED_FILES"

echo
echo "Removing files with no edits by $COMPANY..."
counter=1
while read file
do
	if [ $INCLUDED_FILES -gt $COUNTER_VISIBLE  ]; then
		echo -en "\r$counter / $INCLUDED_FILES ($file)                                                    "
	fi
	# Only include files that have edits by company 
	BY_COMPANY=`git log --oneline --format="%ae" "$file" |grep -i $COMPANY`
	if [ "$BY_COMPANY" != "" ]; then
		echo $file >> $TEMP_DIR/file-list.tmp
	fi
	counter=`expr $counter + 1`
done < $TEMP_DIR/file-list.txt

echo

if [ -e $TEMP_DIR/file-list.tmp ]; then
	mv $TEMP_DIR/file-list.tmp $TEMP_DIR/file-list.txt
fi

# Remove temp files
CURR_DIR=`pwd`

if [ "$EXCLUDE_FILE" != "" ]; then
	echo "Handling exclude list..."
	grep -vFf $EXCLUDE_FILE $TEMP_DIR/file-list.txt > $TEMP_DIR/file-filtered.txt
	mv $TEMP_DIR/file-filtered.txt $TEMP_DIR/file-list.txt
fi

COMPANY_FILES=`wc -l < $TEMP_DIR/file-list.txt`

echo "Files removed: `expr $INCLUDED_FILES - $COMPANY_FILES`"
echo

# For each file, find lines of code added or edited by company employee and
# only copy those lines to the temporary directory.
echo "Removing code not written by $COMPANY.."
counter=1
while read file
do
	if [ $COMPANY_FILES -gt $COUNTER_VISIBLE  ]; then
		echo -en "\r$counter / $COMPANY_FILES ($file)                                                    "
	fi
	git blame -w --show-email "$file" > "$TEMP_DIR/$file.back"
	grep -Ff authors.txt "$TEMP_DIR/$file.back" > "$TEMP_DIR/$file.back2"
	# Remove git blame stuff
	sed 's/\w*\s(<.*>\s*\w*-\w*-\w*\s\w*:\w*\w*:\w*\s+\w*\s*\w*)//' < "$TEMP_DIR/$file.back2" > "$TEMP_DIR/$file"
	counter=`expr $counter + 1`
done < $TEMP_DIR/file-list.txt

echo 
echo

if [ "$EXCLUDE_FILE" != "" ]; then
	cp $EXCLUDE_FILE $TEMP_DIR
fi

cd $TEMP_DIR
find $TEMP_DIR -name "*.back*" -exec rm "{}" \;

echo "Running analysis"
echo "----------------"

# Count lines & print results
if [ $TOOL -eq 2 ]; then
	if [ "$EXCLUDE_FILE" != "" ]; then
		echo "Handling exclude list..."
		while read file 
		do
			if [ -d "$file" ]; then
				echo "Removing dir $file"
				rm -rf $file
			else
				echo "Removing $file"
				rm $file
			fi
		done < $EXCLUDE_FILE
	fi


	sloccount .
else
	cloc --list-file $TEMP_DIR/file-list.txt
fi

cd $CURR_DIR