btihen
6/16/2014 - 3:20 PM

Return Analysis - needs colors, line graph (box plots too)

Return Analysis - needs colors, line graph (box plots too)


----------------

# returns analysis

# Start in Bash
# METHOD 1

curl -k https://inventory.las.ch/serial.csv?scope=computers > laptop-inventory.csv
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 > return-data.csv

echo "day hour" > return-intensive-by-hour.csv
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 | sed 's/:[0-9][0-9]:[0-9][0-9]$/:00/g' | grep '2014-05' | sed 's/2014-05-//g' | grep '2[0-9]' >> return-intensive-by-hour.csv

echo "day time" > return-intensive-by-tenmin.csv
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 | sed 's/[0-9]:[0-9][0-9]$/0/g' | grep '2014-05' | sed 's/2014-05-//g' | grep '2[0-9]' >> return-intensive-by-tenmin.csv

# switch to r

interest_hr = read.csv("return-intensive-by-hour.csv",sep=" ")
table(interest_hr)
library(vcd)
structable(table(interest_hr))

pdf(file="./Returns_Each_Day.pdf")
barplot(interest_hr$day,main="Computer Returns by Day",xlab="Day",ylab="Returned Computers")
dev.off()

hour_day = as.data.frame.matrix(table(interest_hr$hour, interest_hr$day))
day_hour = as.data.frame.matrix(table(interest_hr$day, interest_hr$hour))

pdf(file="./Returns_Each_Hour.pdf")
boxplot(day_hour,main="Returns by hour",xlab="Hour",ylab="Returned Computers")
matlines(hour_day,pch=interest_hr$day,col=interest_hr$day,main="Returns each hour",xlab="Hour",ylab="Returned Computers")
dev.off()

boxplot(day_hour,main="Returns each hour",xlab="Hour",ylab="Returned Computers")
matlines(hour_day,pch=interest_hr$day,col=interest_hr$day,main="Returns each hour",xlab="Hour",ylab="Returned Computers")
barplot(interest_hr$day,main="Computer Returns by Day",xlab="Day",ylab="Returned Computers")

interest_ten = read.csv("return-intensive-by-tenmin.csv",sep=" ")
table(interest_ten)

library(vcd)
structable(table(interest_ten))

time_day_ten = as.data.frame.matrix(table(interest_ten$time, interest_ten$day))
day_time_ten = as.data.frame.matrix(table(interest_ten$day, interest_ten$time))

pdf(file="./Returns_Each_10mins.pdf")
boxplot(day_time_ten,main="Returns each 10 min",xlab="Time",ylab="Returned Computers")
matlines(time_day_ten,pch=19,col=interest_ten$day,main="Returns each 10 mins",xlab="Time",ylab="Returned Computers")
dev.off()

boxplot(day_time_ten,main="Returns each 10 min",xlab="Time",ylab="Returned Computers")
matlines(time_day_ten,pch=19,col=interest_ten$day,main="Returns each 10 mins",xlab="Time",ylab="Returned Computers")

#--------------

# METHOD 2 - POSIX time seems off by a day and possibly an hour!
# Start in R

#get data from inventory
fileURL = "https://inventory.las.ch/serial.csv?scope=computers"
download.file(fileURL, destfile="./computer-inventory.csv", method="curl")

# load data into r
returns = read.csv("./computer-inventory.csv",sep=";")

#returns=read.csv("returns-laptops-2014wk24.csv",sep=";")
#head(returns)
#str(returns)
#names(returns)

# get return information - separate from all other info
ret_fac = subset(returns$X44.usr_return_at, grepl("2014", returns$X44.usr_return_at))

# convert to date_time data
ret_dt = as.POSIXlt(ret_fac)

# get days of interest
interest = subset(ret_dt, ret_dt > as.POSIXlt("2014-05-21") & ret_dt < as.POSIXlt("2014-05-29"))

# extract the interesting days
just_day = as.character( round(interest, "day"))


# just the hours
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC+0100") + round(as.numeric(interest)/3600)*3600,"%H:%M")
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/3600)*3600,"%H:%M")
just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="CET") + round(as.numeric(interest)/3600)*3600,"%H:%M")

interest_df = data.frame(just_day, just_hour)

# count interesting day frequencies
count_by_hr = table(just_day, just_hour)

by_quarter_hr = as.character(format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/900)*900,"%H:%M"))

count_by_quarter = table(just_day, by_quarter_hr)

# now how to get box charts and line charts out of this table (colorized by day)

pdf(file="./returns_by_hour.pdf")
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count")
dev.off()

pdf(file="./returns_by_quarter_hour.pdf")
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count")
dev.off()

pdf(file="./returns_by_day.pdf")
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned")
dev.off()

barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count")
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count")
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned")

library(vcd)
structable(counts)


#----------

# METHOD 3?
# start in R


#get data from inventory
fileURL = "https://inventory.las.ch/serial.csv?scope=computers"
download.file(fileURL, destfile="./computer-inventory.csv", method="curl")

# load data into r
computers = read.csv("./computer-inventory.csv",sep=";")

# get laptops
laptops = subset(returns, grepl("laptop", returns$X30.device_type))

# get return time-dates (from 20th to 29th may 2014 -- 2014-05-2x)
returns = subset(laptops$X44.usr_return_at, grepl("2014-05-2", laptops$X44.usr_return_at))
chars   = as.character(returns)

day  = gsub("2014-05-","",chars))
#time = gsub("^2014-05-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]$", )

----------------


# returns analysis

#get data from inventory
fileURL = "https://inventory.las.ch/serial.csv?scope=computers"
download.file(fileURL, destfile="./computer-inventory.csv", method="curl")

# load data into r
returns = read.csv("./computer-inventory.csv",sep=";")

#returns=read.csv("returns-laptops-2014wk24.csv",sep=";")
#head(returns)
#str(returns)
#names(returns)

# get return information - separate from all other info
ret_fac = subset(returns$X44.usr_return_at, grepl("2014", returns$X44.usr_return_at))

# convert to date_time data
ret_dt = as.POSIXlt(ret_fac)

# get days of interest
interest = subset(ret_dt, ret_dt > as.POSIXlt("2014-05-28") & ret_dt < as.POSIXlt("2014-05-29"))

# extract the interesting days
just_day = as.character( round(interest, "day"))

# just the hours
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC+0100") + round(as.numeric(interest)/3600)*3600,"%H:%M")
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/3600)*3600,"%H:%M")
just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="CET") + round(as.numeric(interest)/3600)*3600,"%H:%M")

interest_df = data.frame(just_day, just_hour)

# count interesting day frequencies
count_by_hr = table(just_day, just_hour)

by_quarter_hr = as.character(format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/900)*900,"%H:%M"))

count_by_quarter = table(just_day, by_quarter_hr)

# now how to get box charts and line charts out of this table (colorized by day)

pdf(file="./returns_by_hour.pdf")
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count")
dev.off()

pdf(file="./returns_by_quarter_hour.pdf")
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count")
dev.off()

pdf(file="./returns_by_day.pdf")
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned")
dev.off()

barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count")
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count")
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned")