### R code from vignette source 'Rrnaseq.Rnw' ################################################### ### code chunk number 1: Rrnaseq.Rnw:175-177 ################################################### targets <- read.delim("./data/targets.txt") targets ################################################### ### code chunk number 2: Rrnaseq.Rnw:187-201 (eval = FALSE) ################################################### ## library(Rsamtools) ## dir.create("results") # Note: all output data will be written to directory 'results' ## system("bowtie2-build ./data/tair10chr.fasta ./data/tair10chr.fasta") # Build indexed reference genome ## targets <- read.delim("./data/targets.txt") # Import experiment design information ## targets ## input <- paste("./data/", targets[,3], sep="") ## output <- paste("./results/", targets[,3], ".sam", sep="") ## reference <- "./results/tair10chr.fasta" ## for(i in seq(along=targets[,3])) { ## command <- paste("bowtie2 -x ./data/tair10chr.fasta -U", input[i], "-S", output[i]) ## system(command) ## asBam(file=output[i], destination=gsub(".sam", "", output[i]), overwrite=TRUE, indexDestination=TRUE) ## unlink(output[i]) ## } ################################################### ### code chunk number 3: Rrnaseq.Rnw:209-217 ################################################### library(rtracklayer); library(GenomicRanges); library(Rsamtools) gff <- import.gff("./data/TAIR10_GFF3_trunc.gff", asRangedData=FALSE) seqlengths(gff) <- end(ranges(gff[which(elementMetadata(gff)[,"type"]=="chromosome"),])) subgene_index <- which(elementMetadata(gff)[,"type"] == "exon") gffsub <- gff[subgene_index,] # Returns only gene ranges gffsub[1:4, c(2,5)] ids <- gsub("Parent=|\\..*", "", elementMetadata(gffsub)$group) gffsub <- split(gffsub, ids) # Coerce to GRangesList ################################################### ### code chunk number 4: Rrnaseq.Rnw:225-238 ################################################### samples <- as.character(targets$Fastq) samplespath <- paste("./results/", samples, ".bam", sep="") names(samplespath) <- samples countDF <- data.frame(row.names=names(gffsub)) for(i in samplespath) { aligns <- readBamGappedAlignments(i) # Substitute next two lines with this one. counts <- countOverlaps(gffsub, aligns, ignore.strand=TRUE) countDF <- cbind(countDF, counts) } colnames(countDF) <- samples countDF[1:4,] write.table(countDF, "./results/countDF", quote=FALSE, sep="\t", col.names = NA) countDF <- read.table("./results/countDF") ################################################### ### code chunk number 5: Rrnaseq.Rnw:246-250 ################################################### bfl <- BamFileList(samplespath, index=character()) countDF2 <- summarizeOverlaps(gffsub, bfl, mode="Union", ignore.strand=TRUE) countDF2 <- assays(countDF2)$counts countDF2[1:4,] ################################################### ### code chunk number 6: Rrnaseq.Rnw:258-267 ################################################### returnRPKM <- function(counts, gffsub) { geneLengthsInKB <- sum(width(reduce(gffsub)))/1000 # Length of exon union per gene in kbp millionsMapped <- sum(counts)/1e+06 # Factor for converting to million of mapped reads. rpm <- counts/millionsMapped # RPK: reads per kilobase of exon model. rpkm <- rpm/geneLengthsInKB # RPKM: reads per kilobase of exon model per million mapped reads. return(rpkm) } countDFrpkm <- apply(countDF, 2, function(x) returnRPKM(counts=x, gffsub=gffsub)) countDFrpkm[1:4,] ################################################### ### code chunk number 7: sampletree ################################################### library(ape) d <- cor(countDF, method="spearman") hc <- hclust(dist(1-d)) plot.phylo(as.phylo(hc), type="p", edge.col=4, edge.width=2, show.node.label=TRUE, no.margin=TRUE) ################################################### ### code chunk number 8: Rrnaseq.Rnw:292-295 ################################################### source("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/colAg.R") countDFrpkm_mean <- colAg(myMA=countDFrpkm, group=c(1,1,2,2), myfct=mean) countDFrpkm_mean[1:4,] ################################################### ### code chunk number 9: Rrnaseq.Rnw:298-304 ################################################### countDFrpkm_mean <- cbind(countDFrpkm_mean, log2ratio=log2(countDFrpkm_mean[,2]/countDFrpkm_mean[,1])) countDFrpkm_mean <- countDFrpkm_mean[is.finite(countDFrpkm_mean[,3]), ] degs2fold <- countDFrpkm_mean[countDFrpkm_mean[,3] >= 1 | countDFrpkm_mean[,3] <= -1,] degs2fold[1:4,] write.table(degs2fold, "./results/degs2fold", quote=FALSE, sep="\t", col.names = NA) degs2fold <- read.table("./results/degs2fold") ################################################### ### code chunk number 10: Rrnaseq.Rnw:312-324 ################################################### library(DESeq) countDF <- read.table("./results/countDF") conds <- targets$Factor cds <- newCountDataSet(countDF, conds) # Creates object of class CountDataSet derived from eSet class counts(cds)[1:4, ] # CountDataSet has similar accessor methods as eSet class. cds <- estimateSizeFactors(cds) # Estimates library size factors from count data. Alternatively, one can provide here the true library sizes with sizeFactors(cds) <- c(..., ...) cds <- estimateDispersions(cds) # Estimates the variance within replicates res <- nbinomTest(cds, "AP3", "TRL") # Calls DEGs with nbinomTest res <- na.omit(res) res2fold <- res[res$log2FoldChange >= 1 | res$log2FoldChange <= -1,] res2foldpadj <- res2fold[res2fold$padj <= 0.05, ] res2foldpadj[1:4,1:8] ################################################### ### code chunk number 11: Rrnaseq.Rnw:332-342 ################################################### library(edgeR) countDF <- read.table("./results/countDF") y <- DGEList(counts=countDF, group=conds) # Constructs DGEList object y <- estimateCommonDisp(y) # Estimates common dispersion y <- estimateTagwiseDisp(y) # Estimates tagwise dispersion et <- exactTest(y, pair=c("AP3", "TRL")) # Computes exact test for the negative binomial distribution. topTags(et, n=4) edge <- as.data.frame(topTags(et, n=50000)) edge2fold <- edge[edge$logFC >= 1 | edge$logFC <= -1,] edge2foldpadj <- edge2fold[edge2fold$FDR <= 0.01, ] ################################################### ### code chunk number 12: Rrnaseq.Rnw:349-356 ################################################### bothDF <- merge(res, countDFrpkm_mean, by.x=1, by.y=0, all=TRUE); bothDF <- na.omit(bothDF) cor(bothDF[,"log2FoldChange"], bothDF[,"log2ratio"], method="spearman") source("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/overLapper.R") setlist <- list(edgeR=rownames(edge2foldpadj), DESeq=as.character(res2foldpadj[,1]), RPKM=rownames(degs2fold)) OLlist <- overLapper(setlist=setlist, sep="_", type="vennsets") counts <- sapply(OLlist$Venn_List, length) vennPlot(counts=counts) ################################################### ### code chunk number 13: Rrnaseq.Rnw:364-373 ################################################### library(GOstats); library(GO.db); library(ath1121501.db) geneUniverse <- rownames(countDF) geneSample <- res2foldpadj[,1] params <- new("GOHyperGParams", geneIds = geneSample, universeGeneIds = geneUniverse, annotation="ath1121501", ontology = "MF", pvalueCutoff = 0.5, conditional = FALSE, testDirection = "over") hgOver <- hyperGTest(params) summary(hgOver)[1:4,] htmlReport(hgOver, file = "data/MyhyperGresult.html") ################################################### ### code chunk number 14: Rrnaseq.Rnw:412-425 ################################################### source("data/Fct/gffexonDEXSeq.R") gffexonDEXSeq <- exons2DEXSeq(gff=gff) ids <- as.character(elementMetadata(gffexonDEXSeq)[, "ids"]) countDFdex <- data.frame(row.names=ids) for(i in samplespath) { aligns <- readBamGappedAlignments(i) # Substitute next two lines with this one. counts <- countOverlaps(gffexonDEXSeq, aligns) countDFdex <- cbind(countDFdex, counts) } colnames(countDFdex) <- samples countDFdex[1:4,1:2] write.table(countDFdex, "./results/countDFdex", quote=FALSE, sep="\t", col.names = NA) countDFdex <- read.table("./results/countDFdex") ################################################### ### code chunk number 15: Rrnaseq.Rnw:434-453 ################################################### library(DEXSeq) samples <- as.character(targets$Factor); names(samples) <- targets$Fastq countDFdex[is.na(countDFdex)] <- 0 ## Construct ExonCountSet from scratch exset <- newExonCountSet2(countDF=countDFdex) # fData(exset)[1:4,] ## Performs normalization exset <- estimateSizeFactors(exset) ## Evaluate variance of the data by estimating dispersion using Cox-Reid (CR) likelihood estimation exset <- estimateDispersions(exset) ## Fits dispersion-mean relation to the individual CR dispersion values exset <- fitDispersionFunction(exset) ## Performs Chi-squared test on each exon and Benjmini-Hochberg p-value adjustment for mutliple testing exset <- testForDEU(exset) ## Estimates fold changes of exons exset <- estimatelog2FoldChanges(exset) ## Obtain results in data frame deuDF <- DEUresultTable(exset) ## Count number of genes with differential exon usage table(tapply(deuDF$padjust < 0.01, geneIDs(exset), any)) ################################################### ### code chunk number 16: dexseq1 ################################################### plotDEXSeq(exset, "Parent=AT1G01100", displayTranscripts=TRUE, expression=TRUE, legend=TRUE) ## Generate many plots and write them to results directory mygeneIDs <- unique(as.character(na.omit(deuDF[deuDF$geneID %in% unique(deuDF$geneID),])[,"geneID"])) DEXSeqHTML(exset, geneIDs=mygeneIDs, path="results", file="DEU.html") ################################################### ### code chunk number 17: Rrnaseq.Rnw:477-478 ################################################### sessionInfo()