############################################################# ### R Script to Perform goSlim Counts on Vector of GO IDs ### ############################################################# # Author: Thomas Girke, Sept 2005 # Usage: # (1) Assign your query GO IDs to vector # (2) Define 'goSlim_fct' # (3) Execute goSlim_fct' function on 'sample_go' and plot results in pie chart # To demo what the script does, run it like this: # source("http://bioinfo.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/goSlim.txt") # Load the required packages library(GOstats) # (1) Assign your query GO IDs to vector 'sample_go' # Example with GOs from all three categories: sample_go <- c("GO:0016564", "GO:0003677", "GO:0004345", "GO:0004345", "GO:0004345", "GO:0004345", "GO:0004345", "GO:0008265", "GO:0003841", "GO:0030151", "GO:0006355", "GO:0009664", "GO:0006412", "GO:0006412", "GO:0006412", "GO:0007046", "GO:0015979", "GO:0006457", "GO:0008372", "GO:0005618", "GO:0005622", "GO:0005840", "GO:0015935", "GO:0000311", "GO:0005622", "GO:0009282") # (2) Define goSlim Function goSlim_fct <- function(sample_go=sample_go, gotype="MF") { # Generate GO slim vector containing general GO IDs # This go slim sample is form PDF of goTools package. It covers MF, CC and BP. # slimv <- c("GO:0016209", "GO:0005488", "GO:0005198", "GO:0003774", "GO:0003824", "GO:0005215", "GO:0030188", "GO:0030234", "GO:0005554", "GO:0030528", "GO:0030533", "GO:0045182", "GO:0045735", "GO:0004871", "GO:0031012", "GO:0005576", "GO:0019012", "GO:0005623", "GO:0043226", "GO:0008372", "GO:0043234", "GO:0000004", "GO:0009987", "GO:0007275", "GO:0007582", "GO:0007610", "GO:0050789", "GO:0016032") # My selection from Plant GO Slim from http://www.geneontology.org/GO.slims.shtml. It covers MF, CC and BP. slimv <- c("GO:0030246","GO:0008289","GO:0003676","GO:0000166","GO:0019825","GO:0005515","GO:0003824","GO:0030234","GO:0005554","GO:0003774","GO:0004871","GO:0005198","GO:0030528","GO:0045182","GO:0005215","GO:0000004","GO:0006519","GO:0007154","GO:0007049","GO:0016043","GO:0006412","GO:0006464","GO:0006810","GO:0007275","GO:0007049","GO:0006519","GO:0005975","GO:0006629","GO:0006139","GO:0019748","GO:0015979","GO:0005618","GO:0005829","GO:0005783","GO:0005768","GO:0005794","GO:0005739","GO:0005777","GO:0009536","GO:0005840","GO:0005773","GO:0005764","GO:0005856","GO:0005634","GO:0005886","GO:0008372","GO:0005576") sample_go <- data.frame(test1=sample_go, My_Sample=sample_go) go_df <- data.frame(GOID=unlist(eapply(GOTERM, function(x) x@GOID)), Term=unlist(eapply(GOTERM, function(x) x@Term)), Ont=unlist(eapply(GOTERM, function(x) x@Ontology))) # Retrieve the slim categories for MF, BP or CC slim_mfdf <- go_df[go_df[,1] %in% slimv & go_df[,3]==gotype,] slim_mfv <- as.vector(slim_mfdf[,1]) # Select categories in slimv slim_count_df <- data.frame(Slim=NULL, OffSpr=NULL) for(i in slim_mfv) { slim_count_df <- rbind(slim_count_df, data.frame(Slim=i, OffSpr=i)) # adds slim IDs so that they are included in the counting if (gotype=="MF") { slim_count_df <- rbind(slim_count_df, data.frame(Slim=rep(i, length(as.vector(as.list(GOMFOFFSPRING)[[i]]))), OffSpr=as.vector(as.list(GOMFOFFSPRING)[[i]]))) } if (gotype=="BP") { slim_count_df <- rbind(slim_count_df, data.frame(Slim=rep(i, length(as.vector(as.list(GOBPOFFSPRING)[[i]]))), OffSpr=as.vector(as.list(GOBPOFFSPRING)[[i]]))) } if (gotype=="CC") { slim_count_df <- rbind(slim_count_df, data.frame(Slim=rep(i, length(as.vector(as.list(GOCCOFFSPRING)[[i]]))), OffSpr=as.vector(as.list(GOCCOFFSPRING)[[i]]))) } } # Double-check GO slim vector to show that all GO IDs are covered in offspring list of slim_mfv # zz <- eapply(GOTERM, function(x) x@Ontology) # length(unique(slim_count_df[,2]))-1==as.vector(table(unlist(zz)))[4] # GO slim stats xxx <- merge(slim_count_df, sample_go, by.x="OffSpr", by.y="test1") counter <-data.frame(table(factor(xxx$Slim))) counter <- merge(slim_count_df, counter, by.x="Slim", by.y=1, all.x=T) counter[is.na(counter[,3]),3] <- 0 slim_stat <- counter[!duplicated(counter[,1]),] slim_stat[is.na(slim_stat[,3]),3] <- 0 slim_stat <- slim_stat[,-2]; slim_stat <- data.frame(slim_stat, Percent=round(slim_stat$Freq/sum(slim_stat$Freq)*100,2), Term=as.vector(go_df[as.vector(slim_stat[,1]),2])) cat("GO Slim Table:", gotype, "\n", sep = " ") print(slim_stat) } # (3) Execute goSlim function on 'sample_go' and plot results in pie chart slim_stat_mf <- goSlim_fct(sample_go=sample_go, gotype="MF") slim_stat_bp <- goSlim_fct(sample_go=sample_go, gotype="BP") slim_stat_cc <- goSlim_fct(sample_go=sample_go, gotype="CC") x11(height=10, width=10, pointsize=12) par(mfrow=c(2,2)) pie(slim_stat_mf[slim_stat_mf[,2]>0 ,3], labels=as.vector(slim_stat_mf[slim_stat_mf[,2]>0 ,1]), main="GO MF") pie(slim_stat_bp[slim_stat_bp[,2]>0 ,3], labels=as.vector(slim_stat_bp[slim_stat_bp[,2]>0 ,1]), main="GO BP") pie(slim_stat_cc[slim_stat_cc[,2]>0 ,3], labels=as.vector(slim_stat_cc[slim_stat_cc[,2]>0 ,1]), main="GO CC")