one-channel-agilent.R

10/23/2014 - 8:16 AM

rm -f data/scrubbed/*.txt
# the agilent files have extra headers. we just
# want the part of the file that starts with "FEATURES"
# remove the extra headers and split files
# into sets where the name reflects the array used.
for f in data/{LT,NJ,mi}*.txt; do 
    rm -f a.tmp
    awk 'BEGIN { seen=0;FS=OFS="\t" }
        { if($1=="FEATURES"){ seen=1 }
        if(seen==1){ print $0; }}' $f > a.tmp
    nlines=$(wc -l "a.tmp" | awk '{ print $1 }')
    n=$(($nlines - 1))
    mv a.tmp data/scrubbed/`basename $f .txt`.${n}.txt
    echo $f $n
done


# for each chip used, create a target file.
for N in 45015 62976; do
    T=data/scrubbed/${N}.targets.txt
    echo $'FileName\tCondition' > $T
    for f in data/scrubbed/*${N}.txt; do

        grp=$(echo $f | perl -pe 's/^.+_(\w+)\.\d+.*/$1/;s/IDL/ILD/;s/.+CTRL/CTRL/;')
        echo "$f    $grp" >> $T
    done
done

library(limma)

GROUP="62976"
# targets.txt has columns of "FileName" and "Condition" e.g.
"""
FileName    Condition
data/scrubbed/LT001098RU_COPD.45015.txt COPD
data/scrubbed/LT001600RL_ILD.45015.txt  ILD
data/scrubbed/LT003990RU_CTRL.45015.txt CTRL
data/scrubbed/LT004173LL_ILD.45015.txt  ILD
data/scrubbed/LT007392RU_COPD.45015.txt COPD
"""
targets = readTargets(paste("data/scrubbed/", GROUP, ".targets.txt", sep=""))
dat = read.maimages(files=targets$FileName, path=".", source="agilent.median", green.only=T,
    columns=list(G="gMedianSignal", Gb="gBGMedianSignal"), 
    annotation=c("Row", "Col", "ProbeName", "SystematicName")
)

# https://stat.ethz.ch/pipermail/bioconductor/attachments/20101217/1c82279c/attachment.pl
# https://stat.ethz.ch/pipermail/bioconductor/2010-December/037051.html
# http://matticklab.com/index.php?title=Single_channel_analysis_of_Agilent_microarray_data_with_Limma

# http://tolstoy.newcastle.edu.au/R/e15/help/11/08/3339.html

# E contains the values for single-channel analyses
dat <- backgroundCorrect(dat, method="normexp", offset=1)
dat$E <- normalizeBetweenArrays(dat$E, method="quantile")
dat$E <- log2(dat$E)

E = new("MAList", list(targets=dat$targets, genes=dat$genes, source=dat$source, M=dat$E, A=dat$E))
E.avg <- avereps(E, ID=E$genes$ProbeName)


# make the design matrix
d.levels = unique(targets$Condition)
d.factor = factor(targets$Condition, levels=d.levels)
d.design = model.matrix(~0 + d.factor)
colnames(d.design) = levels(d.factor)

fit = lmFit(E.avg$A, d.design)

# customize comparisons here.
contrast.matrix <- makeContrasts("COPD-ILD", "COPD-CTRL", "ILD-CTRL", levels=d.design)

fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)


for(name in colnames(contrast.matrix)){
    output = topTable(fit2, adjust="BH", coef=name, genelist=E.avg$genes, number=90000)
    write.table(output, file=paste(GROUP, "-", name, ".txt", sep=""), sep="\t", quote=FALSE, row.names=F)
}

Cacher is the code snippet organizer for pro developers

We empower you and your team to get more done, faster

one-channel-agilent.R