limma Data to import: File descriptions: Spot descriptions: Importing data: source("
limma > library(limma) > data.directory<-" > targets<-readTargets(" > targets array experiment cy _MO-S N17-72SV2-3-vs-CSV2-5-B.gpr Nic-WT72hr_ _MO-S N73-CSV3-3-vs-72SV3-5-A.gpr Ctl-WT00hr_ _MO-S N98-CSV1-3-vs-72SV1-5-B.gpr Ctl-WT00hr_1 cy5 date 1 Ctl-WT00hr_2 7/21/ Nic-WT72hr_3 7/21/ Nic-WT72hr_1 7/18/2003
limma > spottypes<- readSpotTypes(" > spottypes SpotType ID Name Color 1 cDNA * * black 2 Blank *Blank* * blue 3 Control * *control* red 4 Empty *Empty* * blue 5 empty *empty* * blue >
RGList class > LimmaDataNickel<-read.maimages(files=targets$experiment,source="genepix", path = data.directory, names=paste("Cy5_",targets$cy5,"_VS_Cy3_",targets$cy3,sep=""), + columns=list(Gf = "F532 Median",Gb ="B532 Median", Rf = "F635 Median", Rb = "B635 Median"), + annotation=c("Name","ID","Block","Row","Column"),wt.fun=wtflags(0)) Read Read Read > attributes(LimmaDataNickel) $names [1] "R" "G" "Rb" "Gb" "weights" "targets" "genes" $class [1] "RGList" attr(,"package") [1] "limma"
RGList class > LimmaDataNickel$R[1:3,] Cy5_Ctl-WT00hr_2_VS_Cy3_Nic-WT72hr_2 Cy5_Nic-WT72hr_3_VS_Cy3_Ctl-WT00hr_3 [1,] [2,] [3,] Cy5_Nic-WT72hr_1_VS_Cy3_Ctl-WT00hr_1 [1,] 726 [2,] 248 [3,] 120 > LimmaDataNickel$Rb[1:3,] Cy5_Ctl-WT00hr_2_VS_Cy3_Nic-WT72hr_2 Cy5_Nic-WT72hr_3_VS_Cy3_Ctl-WT00hr_3 [1,] [2,] [3,] Cy5_Nic-WT72hr_1_VS_Cy3_Ctl-WT00hr_1 [1,] 129 [2,] 127 [3,] 127 >
RGList class > LimmaDataNickel$weights[LimmaDataNickel$genes$ID=="Blank"]<-0 > LimmaDataNickel$weights[LimmaDataNickel$genes$ID=="empty"]<-0 > LimmaDataNickel$printer<-getLayout(LimmaDataNickel$genes) > attributes(LimmaDataNickel) $names [1] "R" "G" "Rb" "Gb" "weights" "targets" "genes" "printer" $class [1] "RGList" attr(,"package") [1] "limma"
RGList class > LimmaDataNickel$genes[1,] Name ID Block Row Column 1 D > LimmaDataNickel$genes$Status<-controlStatus(spottypes,LimmaDataNickel) Matching patterns for: ID Name Found cDNA Found 288 Blank Found 0 Control Found 0 Empty Found 381 empty Setting attributes: values Color > > LimmaDataNickel$genes[1,] Name ID Block Row Column Status 1 D cDNA
Plotting data in a RGList object par(mfrow=c(3,1)) plotMA(LimmaDataNickel,array=1,xlim=c(-1,16),ylim=c(-3,8),zero.weights=T) plotMA(LimmaDataNickel,array=2,xlim=c(-1,16),ylim=c(-3,8),zero.weights=T) plotMA(LimmaDataNickel,array=3,xlim=c(-1,16),ylim=c(-3,8),zero.weights=T)
limma PlotMA automatically subtracts the background intensities before plotting data It plots M = log 2 (Cy5)-log 2 (Cy3) on y-axis and A = [log 2 (Cy5)+log 2 (Cy3)]/2 on x-axis Does not plot data with weight zero unless you ask it to If you want to plot all data or the data without subtracting background, you need to do a little work
Background Adjustments > NBLimmaDataNickel<-backgroundCorrect(LimmaDataNickel,method="none") > attributes(NBLimmaDataNickel) $names [1] "R" "G" "weights" "targets" "genes" "printer" $class [1] "RGList" attr(,"package") [1] "limma" Note that background measurements are gone Whole bunch of background adjustment procedures
Plotting data in a RGList object > par(mfrow=c(3,1)) > plotMA(LimmaDataNickel,array=1,xlim=c(-1,16),ylim=c(-3,8),zero.weights=F) > plotMA(LimmaDataNickel,array=2,xlim=c(-1,16),ylim=c(-3,8),zero.weights=F) > plotMA(LimmaDataNickel,array=3,xlim=c(-1,16),ylim=c(-3,8),zero.weights=F)
Plotting data in a RGList object > par(mfrow=c(3,1)) > plotMA(NBLimmaDataNickel,array=1,xlim=c(-1,16),ylim=c(-3,8),zero.weights=F) > plotMA(NBLimmaDataNickel,array=2,xlim=c(-1,16),ylim=c(-3,8),zero.weights=F) > plotMA(NBLimmaDataNickel,array=3,xlim=c(-1,16),ylim=c(-3,8),zero.weights=F)
Which one to use? Removing points with the weight zero seems reasonable Subtracting background costs us some data points even if one channel is above background since differences of log-transformed measurements are used only Subtracting background seems to increase the variability, but it is unclear how would this affect results For now proceed without background subtraction, but compare results at the end Exploring other proposed background-adjustment methods also seems like a good idea
Within Array Normalization > NNBLimmaDataNickel<-normalizeWithinArrays(NBLimmaDataNickel,method="none") > attributes(NNBLimmaDataNickel) $names [1] "weights" "targets" "genes" "printer" "M" "A" $class [1] "MAList" attr(,"package") [1] "limma" Left with log-ratios and averages - the same things as in the scatter plot produced by plotMA
Checking Out M and A > NNBLimmaDataNickel$M[1:3,] Cy5_Ctl-WT00hr_2_VS_Cy3_Nic-WT72hr_2 Cy5_Nic-WT72hr_3_VS_Cy3_Ctl-WT00hr_3 [1,] [2,] [3,] Cy5_Nic-WT72hr_1_VS_Cy3_Ctl-WT00hr_1 [1,] [2,] [3,] > NNBLimmaDataNickel$A[1:3,] Cy5_Ctl-WT00hr_2_VS_Cy3_Nic-WT72hr_2 Cy5_Nic-WT72hr_3_VS_Cy3_Ctl-WT00hr_3 [1,] [2,] [3,] Cy5_Nic-WT72hr_1_VS_Cy3_Ctl-WT00hr_1 [1,] [2,] [3,] > Homework suggestion: Calculate these directly from R and G
Two-Sample t-test > NBLimmaDataNickelRG<-log2(cbind(NBLimmaDataNickel$R,NBLimmaDataNickel$G)) > dimnames(NBLimmaDataNickelRG)[[2]] [1] "Cy5_Ctl-WT00hr_2_VS_Cy3_Nic-WT72hr_2" [2] "Cy5_Nic-WT72hr_3_VS_Cy3_Ctl-WT00hr_3" [3] "Cy5_Nic-WT72hr_1_VS_Cy3_Ctl-WT00hr_1" [4] "Cy5_Ctl-WT00hr_2_VS_Cy3_Nic-WT72hr_2" [5] "Cy5_Nic-WT72hr_3_VS_Cy3_Ctl-WT00hr_3" [6] "Cy5_Nic-WT72hr_1_VS_Cy3_Ctl-WT00hr_1" > Nic<-c(2,3,4) > Ctl<-c(1,5,6)
Two-Sample t-test > MNic<-apply(NBLimmaDataNickelRG[,Nic],1,mean,na.rm=TRUE) > VNic<-apply(NBLimmaDataNickelRG[,Nic],1,var,na.rm=TRUE) > MCtl<-apply(NBLimmaDataNickelRG[,Ctl],1,mean,na.rm=TRUE) > VCtl<-apply(NBLimmaDataNickelRG[,Ctl],1,var,na.rm=TRUE) > NNic<-apply(!is.na(NBLimmaDataNickelRG[,Nic]),1,sum,na.rm=TRUE) > NCtl<-apply(!is.na(NBLimmaDataNickelRG[,Ctl]),1,sum,na.rm=TRUE) > > VNicCtl<-(((NNic-1)*VNic)+((NCtl-1)*VCtl))/(NCtl+NNic-2) > > DF<-NNic+NCtl-2 > > TStat<-abs(MNic-MCtl)/((VNicCtl*((1/NNic)+(1/NCtl)))^0.5) > TPvalue<-2*pt(TStat,DF,lower.tail=FALSE) > TStat[1] [1] > TPvalue[1] [1]
Paired t-test > dimnames(NNBLimmaDataNickel)[[2]] [1] "Cy5_Ctl-WT00hr_2_VS_Cy3_Nic-WT72hr_2" [2] "Cy5_Nic-WT72hr_3_VS_Cy3_Ctl-WT00hr_3" [3] "Cy5_Nic-WT72hr_1_VS_Cy3_Ctl-WT00hr_1" > NNBLimmaDataNickelLR<-NNBLimmaDataNickel$M > NNBLimmaDataNickelLR[,1]<-(-NNBLimmaDataNickelLR[,1]) > MLR<-apply(NNBLimmaDataNickelLR,1,mean,na.rm=TRUE) > VLR<-apply(NNBLimmaDataNickelLR,1,var,na.rm=TRUE) > NLR<-apply(!is.na(NNBLimmaDataNickelLR),1,sum,na.rm=TRUE) > > PTDF<-NLR-1 > > PTStat<-abs(MLR)/((VLR*(1/NLR))^0.5) > PTPvalue<-2*pt(PTStat,PTDF,lower.tail=FALSE) > PTStat[1] [1] > PTPvalue[1] [1]
Two-sample vs Paired t-test > par(mfrow=c(2,2)) > plot(MNic-MCtl,MLR) > plot(VNicCtl,VLR) > plot(TStat,PTStat) > plot(-log10(TPvalue),-log10(PTPvalue),xlim=c(0,4),ylim=c(0,4))
Limma Analysis > dimnames(NNBLimmaDataNickel)[[2]] [1] "Cy5_Ctl-WT00hr_2_VS_Cy3_Nic-WT72hr_2" [2] "Cy5_Nic-WT72hr_3_VS_Cy3_Ctl-WT00hr_3" [3] "Cy5_Nic-WT72hr_1_VS_Cy3_Ctl-WT00hr_1" > design<-c(-1,1,1) > > LimmaFit<-lmFit(NNBLimmaDataNickel,design) > attributes(LimmaFit) $names [1] "coefficients" "stdev.unscaled" "sigma" "df.residual" [5] "cov.coefficients" "pivot" "method" "design" [9] "genes" "Amean" $class [1] "MArrayLM" attr(,"package") [1] "limma"
Limma Analysis > par(mfrow=c(1,1)) > plot(LimmaFit$coefficients,MLR)
Limma Analysis > complete.data<-apply(NNBLimmaDataNickel$weights,1,sum)==3 > sum(complete.data) [1] 9621 > par(mfrow=c(1,1)) > plot(LimmaFit$coefficients[complete.data],MLR[complete.data])
Limma Analysis > LimmaFitTstat<- abs(LimmaFit$coefficients/(LimmaFit$sigma*LimmaFit$stdev.unscaled)) > LimmaFitPvalue<-2*pt(LimmaFitTstat,LimmaFit$df.residual,lower.tail=FALSE) > > par(mfrow=c(1,2)) > plot(LimmaFitTstat[complete.data],PTStat[complete.data]) > plot(-log10(LimmaFitPvalue[complete.data]),-log10(PTPvalue[complete.data]))
Facilitates easy data import and normalization Keeps track of "bad" spots To run the basic t-test, it takes a bit of additional work If we were to use the empirical Bayes statistics as implemented in limma, it would be even easier Empirical Bayes is generally BETTER than simple t-test Will talk about this type of analysis next week Limma also allows fitting models with multiple factors which we will also talk about next week limma so far