nievergeltlab
10/31/2018 - 3:54 PM

Author Affiliations

Assign numbers after author names for affiliations


#Note according to Katy format: Anyone with value "5-all others" is alphabetically ordered by last name.

#!!! ASSUMING THAT DATA HAS BEEN DOUBLE SORTED BY "Paper order" and "Published Name" !!!
dat_ordered <- read.csv('affiliations_Freeze2.csv',stringsAsFactors=F,header=T)

#Generate single variable for affiliations, easily to work with..
dat_ordered$Afone <- paste(dat_ordered$Institution..EDITED.,dat_ordered$Department..EDITED.,dat_ordered$City,dat_ordered$State,dat_ordered$Country,sep=". ")
dat_ordered$Afone <- sapply(dat_ordered$Afone, gsub, pattern='\xa0', replacement=" " )

aflist <- data.frame(unique(dat_ordered$Afone),stringsAsFactors=F)
names(aflist) <- "Affiliation"

#Make a blank variable for the order of affiliations
aflist$order <- NA

#Generate a nonsense timevar
dat_ordered$timevar <- with(dat_ordered, ave(as.character(Published.Name), Published.Name, FUN = seq_along))

#Keep only the really relevant variables
#Dont keep paper order since the data is ordered
dat_ordered2 <- subset(dat_ordered,select=c(Published.Name,timevar,Afone))


dat_ordered2$affils <- NA
#Question: Some uathors have multiple affiliations, some of which appear later for other people. For the first time an affilliation is shown, do they go in alphabetical order?


#Maybe loop through each author, evaluate if an affiliation is in the list or not, if yes, give number, if not, then assign number. Number takes preference over alphabet

for (i in 1:dim(dat_ordered2)[1])
{
 #Add for loop, looping over each affiliation... it's not to my benefit to make the data wide yet, is it?? No, it's not..
 af_being_checked <- which(aflist$Affiliation == dat_ordered2[i,]$Afone  )
 
 #Look at affiliation list. Does it have a number yet? If not, assign a number (max assigned order + 1
 if(is.na(aflist[af_being_checked,]$order))
 {
  print("we are this far1 ")
  #If nothing has been assigned, assign value 1
  #Otherwise assign it the next highest number
  if(all(is.na(aflist$order)))
  {
     print("we are this far2 ")
   aflist[af_being_checked,]$order <- 1
  } else {
     print("we are this far3 ")
   aflist[af_being_checked,]$order <- max(aflist$order,na.rm=T) + 1
   }
 }
   dat_ordered2[i,]$affils <- aflist[af_being_checked,]$order
}
   
#Convert data to wide format
dat_ordered3 <- subset(dat_ordered2,select=c(Published.Name,timevar,affils))
dat_wide <- reshape(dat_ordered3,idvar="Published.Name",timevar='timevar',direction='wide',sep='_') 

#WRite affiliation numbers
write.csv(dat_wide, 'Author_affiliations.csv',row.names=F)

#Write affiliations that have been coded
write.csv(aflist, 'affiliations_codes.csv',row.names=F)