### R code from vignette source 'Rrnaseq.Rnw'

###################################################
### code chunk number 1: Rrnaseq.Rnw:175-177
###################################################
targets <- read.delim("./data/targets.txt")
targets


###################################################
### code chunk number 2: Rrnaseq.Rnw:187-201 (eval = FALSE)
###################################################
## library(Rsamtools)
## dir.create("results") # Note: all output data will be written to directory 'results'
## system("bowtie2-build ./data/tair10chr.fasta ./data/tair10chr.fasta") # Build indexed reference genome
## targets <- read.delim("./data/targets.txt") # Import experiment design information
## targets
## input <- paste("./data/", targets[,3], sep="")
## output <- paste("./results/", targets[,3], ".sam", sep="")
## reference <- "./results/tair10chr.fasta"
## for(i in seq(along=targets[,3])) {
## 	command <- paste("bowtie2 -x ./data/tair10chr.fasta -U", input[i], "-S", output[i])
##         system(command)
## 	asBam(file=output[i], destination=gsub(".sam", "", output[i]), overwrite=TRUE, indexDestination=TRUE)
##         unlink(output[i])
## }


###################################################
### code chunk number 3: Rrnaseq.Rnw:209-217
###################################################
library(rtracklayer); library(GenomicRanges); library(Rsamtools)
gff <- import.gff("./data/TAIR10_GFF3_trunc.gff", asRangedData=FALSE)
seqlengths(gff) <- end(ranges(gff[which(elementMetadata(gff)[,"type"]=="chromosome"),]))
subgene_index <- which(elementMetadata(gff)[,"type"] == "exon")
gffsub <- gff[subgene_index,] # Returns only gene ranges
gffsub[1:4, c(2,5)]
ids <- gsub("Parent=|\\..*", "", elementMetadata(gffsub)$group)
gffsub <- split(gffsub, ids) # Coerce to GRangesList


###################################################
### code chunk number 4: Rrnaseq.Rnw:225-238
###################################################
samples <- as.character(targets$Fastq)
samplespath <- paste("./results/", samples, ".bam", sep="")
names(samplespath) <- samples
countDF <- data.frame(row.names=names(gffsub))
for(i in samplespath) {
        aligns <- readBamGappedAlignments(i) # Substitute next two lines with this one.
        counts <- countOverlaps(gffsub, aligns, ignore.strand=TRUE)
        countDF <- cbind(countDF, counts)
}
colnames(countDF) <- samples
countDF[1:4,]
write.table(countDF, "./results/countDF", quote=FALSE, sep="\t", col.names = NA)
countDF <- read.table("./results/countDF")


###################################################
### code chunk number 5: Rrnaseq.Rnw:246-250
###################################################
bfl <- BamFileList(samplespath, index=character())
countDF2 <- summarizeOverlaps(gffsub, bfl, mode="Union", ignore.strand=TRUE)
countDF2 <- assays(countDF2)$counts
countDF2[1:4,]


###################################################
### code chunk number 6: Rrnaseq.Rnw:258-267
###################################################
returnRPKM <- function(counts, gffsub) {
        geneLengthsInKB <- sum(width(reduce(gffsub)))/1000 # Length of exon union per gene in kbp
        millionsMapped <- sum(counts)/1e+06 # Factor for converting to million of mapped reads.
        rpm <- counts/millionsMapped # RPK: reads per kilobase of exon model.
        rpkm <- rpm/geneLengthsInKB # RPKM: reads per kilobase of exon model per million mapped reads.
        return(rpkm)
}
countDFrpkm <- apply(countDF, 2, function(x) returnRPKM(counts=x, gffsub=gffsub))
countDFrpkm[1:4,]


###################################################
### code chunk number 7: sampletree
###################################################
library(ape) 
d <- cor(countDF, method="spearman")
hc <- hclust(dist(1-d))
plot.phylo(as.phylo(hc), type="p", edge.col=4, edge.width=2, show.node.label=TRUE, no.margin=TRUE)


###################################################
### code chunk number 8: Rrnaseq.Rnw:292-295
###################################################
source("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/colAg.R")
countDFrpkm_mean <- colAg(myMA=countDFrpkm, group=c(1,1,2,2), myfct=mean)
countDFrpkm_mean[1:4,]


###################################################
### code chunk number 9: Rrnaseq.Rnw:298-304
###################################################
countDFrpkm_mean <- cbind(countDFrpkm_mean, log2ratio=log2(countDFrpkm_mean[,2]/countDFrpkm_mean[,1]))
countDFrpkm_mean <- countDFrpkm_mean[is.finite(countDFrpkm_mean[,3]), ]
degs2fold <- countDFrpkm_mean[countDFrpkm_mean[,3] >= 1 | countDFrpkm_mean[,3] <= -1,]
degs2fold[1:4,]
write.table(degs2fold, "./results/degs2fold", quote=FALSE, sep="\t", col.names = NA)
degs2fold <- read.table("./results/degs2fold")


###################################################
### code chunk number 10: Rrnaseq.Rnw:312-324
###################################################
library(DESeq)
countDF <- read.table("./results/countDF")
conds <- targets$Factor
cds <- newCountDataSet(countDF, conds) # Creates object of class CountDataSet derived from eSet class
counts(cds)[1:4, ] # CountDataSet has similar accessor methods as eSet class.
cds <- estimateSizeFactors(cds) # Estimates library size factors from count data. Alternatively, one can provide here the true library sizes with sizeFactors(cds) <- c(..., ...)
cds <- estimateDispersions(cds) # Estimates the variance within replicates
res <- nbinomTest(cds, "AP3", "TRL") # Calls DEGs with nbinomTest
res <- na.omit(res)
res2fold <- res[res$log2FoldChange >= 1 | res$log2FoldChange <= -1,]
res2foldpadj <- res2fold[res2fold$padj <= 0.05, ]
res2foldpadj[1:4,1:8]


###################################################
### code chunk number 11: Rrnaseq.Rnw:332-342
###################################################
library(edgeR)
countDF <- read.table("./results/countDF")
y <- DGEList(counts=countDF, group=conds) # Constructs DGEList object
y <- estimateCommonDisp(y) # Estimates common dispersion
y <- estimateTagwiseDisp(y) # Estimates tagwise dispersion
et <- exactTest(y, pair=c("AP3", "TRL")) # Computes exact test for the negative binomial distribution.
topTags(et, n=4)
edge <- as.data.frame(topTags(et, n=50000)) 
edge2fold <- edge[edge$logFC >= 1 | edge$logFC <= -1,]
edge2foldpadj <- edge2fold[edge2fold$FDR <= 0.01, ]


###################################################
### code chunk number 12: Rrnaseq.Rnw:349-356
###################################################
bothDF <- merge(res, countDFrpkm_mean, by.x=1, by.y=0, all=TRUE); bothDF <- na.omit(bothDF)
cor(bothDF[,"log2FoldChange"], bothDF[,"log2ratio"], method="spearman")
source("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/overLapper.R")
setlist <- list(edgeR=rownames(edge2foldpadj), DESeq=as.character(res2foldpadj[,1]), RPKM=rownames(degs2fold))
OLlist <- overLapper(setlist=setlist, sep="_", type="vennsets")
counts <- sapply(OLlist$Venn_List, length)
vennPlot(counts=counts)


###################################################
### code chunk number 13: Rrnaseq.Rnw:364-373
###################################################
library(GOstats); library(GO.db); library(ath1121501.db)
geneUniverse <- rownames(countDF) 
geneSample <- res2foldpadj[,1]
params <- new("GOHyperGParams", geneIds = geneSample, universeGeneIds = geneUniverse, 
        annotation="ath1121501", ontology = "MF", pvalueCutoff = 0.5,
	conditional = FALSE, testDirection = "over")
hgOver <- hyperGTest(params)
summary(hgOver)[1:4,]
htmlReport(hgOver, file = "data/MyhyperGresult.html")


###################################################
### code chunk number 14: Rrnaseq.Rnw:412-425
###################################################
source("data/Fct/gffexonDEXSeq.R")
gffexonDEXSeq <- exons2DEXSeq(gff=gff)
ids <- as.character(elementMetadata(gffexonDEXSeq)[, "ids"])
countDFdex <- data.frame(row.names=ids)
for(i in samplespath) {
        aligns <- readBamGappedAlignments(i) # Substitute next two lines with this one.
        counts <- countOverlaps(gffexonDEXSeq, aligns)
        countDFdex <- cbind(countDFdex, counts)
}
colnames(countDFdex) <- samples
countDFdex[1:4,1:2]
write.table(countDFdex, "./results/countDFdex", quote=FALSE, sep="\t", col.names = NA)
countDFdex <- read.table("./results/countDFdex")


###################################################
### code chunk number 15: Rrnaseq.Rnw:434-453
###################################################
library(DEXSeq)
samples <- as.character(targets$Factor); names(samples) <- targets$Fastq 
countDFdex[is.na(countDFdex)] <- 0
## Construct ExonCountSet from scratch
exset <- newExonCountSet2(countDF=countDFdex) # fData(exset)[1:4,]
## Performs normalization
exset <- estimateSizeFactors(exset) 
## Evaluate variance of the data by estimating dispersion using Cox-Reid (CR) likelihood estimation
exset <- estimateDispersions(exset) 
## Fits dispersion-mean relation to the individual CR dispersion values
exset <- fitDispersionFunction(exset) 
## Performs Chi-squared test on each exon and Benjmini-Hochberg p-value adjustment for mutliple testing
exset <- testForDEU(exset) 
## Estimates fold changes of exons
exset <- estimatelog2FoldChanges(exset) 
## Obtain results in data frame
deuDF <- DEUresultTable(exset) 
## Count number of genes with differential exon usage
table(tapply(deuDF$padjust < 0.01, geneIDs(exset), any)) 


###################################################
### code chunk number 16: dexseq1
###################################################
plotDEXSeq(exset, "Parent=AT1G01100", displayTranscripts=TRUE, expression=TRUE, legend=TRUE)  
## Generate many plots and write them to results directory
mygeneIDs <- unique(as.character(na.omit(deuDF[deuDF$geneID %in% unique(deuDF$geneID),])[,"geneID"]))
DEXSeqHTML(exset, geneIDs=mygeneIDs, path="results", file="DEU.html") 


###################################################
### code chunk number 17: Rrnaseq.Rnw:477-478
###################################################
sessionInfo()