### R code from vignette source 'Cheminfo.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: Cheminfo.Rnw:864-865 (eval = FALSE)
###################################################
download.file(url="http://faculty.ucr.edu/~tgirke/HTML_Presentations/Manuals/ChemmineR/Bioc2013/Cheminfo.R", destfile="Cheminfo.R")


###################################################
### code chunk number 2: Cheminfo.Rnw:869-870
###################################################
library("ChemmineR") # Loads the package


###################################################
### code chunk number 3: Cheminfo.Rnw:873-876 (eval = FALSE)
###################################################
library(help="ChemmineR") # Lists all functions and classes 
vignette("ChemmineR") # Opens PDF manual from R
?MW # Opens help for MW function


###################################################
### code chunk number 4: Cheminfo.Rnw:879-883 (eval = FALSE)
###################################################
sdfset <- read.SDFset("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/Samples/sdfsample.sdf")
sdfset # Returns summary of SDFset
valid <- validSDF(sdfset) # Identifies invalid SDFs in SDFset objects
sdfset <- sdfset[valid] # Removes invalid SDFs, if there are any


###################################################
### code chunk number 5: Cheminfo.Rnw:886-889
###################################################
data(sdfsample)
sdfset <- sdfsample
sdfset # Returns summary of SDFset


###################################################
### code chunk number 6: Cheminfo.Rnw:899-901
###################################################
write.SDF(sdfset[1:4], file="sub.sdf", sig=TRUE)
list.files(pattern="sub.sdf")


###################################################
### code chunk number 7: Cheminfo.Rnw:905-906
###################################################
write.SDFsplit(x=sdfset, filetag="myfile", nmol=50)


###################################################
### code chunk number 8: Cheminfo.Rnw:910-912
###################################################
sdfsetsub <- read.SDFset("sub.sdf")
sdfsetsub


##################
### Exercise I ###
##################
## Task 1
download.file(url="http://faculty.ucr.edu/~tgirke/HTML_Presentations/Manuals/ChemmineR/Bioc2013/p450.sdf", destfile="p450.sdf")
## Task 2 
## Task 2
## Task 3
## Task 4
## Task 5


###################################################
### code chunk number 10: Cheminfo.Rnw:979-985 (eval = FALSE)
###################################################
view(sdfset[1:4]) # Summary view of several molecules
length(sdfset) # Returns number of molecules
sdfset[[1]] # Returns single molecule from SDFset as SDF object
sdfset[[1]][[2]] # Returns atom block from first compound as matrix
sdfset[[1]][[2]][1:4,] 
c(sdfset[1:4], sdfset[5:8]) # Concatenation of several SDFsets


###################################################
### code chunk number 11: Cheminfo.Rnw:989-991 (eval = FALSE)
###################################################
grepSDFset("650001", sdfset, field="datablock", mode="subset") 
##    # To return index, set mode="index")


###################################################
### code chunk number 12: Cheminfo.Rnw:1001-1007 (eval = FALSE)
###################################################
atomblock(sdf); sdf[[2]]; sdf[["atomblock"]] 
##    # All three methods return the same component
header(sdfset[1:4])
atomblock(sdfset[1:4])
bondblock(sdfset[1:4])
datablock(sdfset[1:4])


###################################################
### code chunk number 13: Cheminfo.Rnw:1011-1018
###################################################
sdfid(sdfset[1:4])
   # Retrieves CMP IDs from Molecule Name field in header block.
cid(sdfset[1:4])
   # Retrieves CMP IDs from ID slot in SDFset.
unique_ids <- makeUnique(sdfid(sdfset)) 
   # Creates unique IDs by appending a counter to duplicates.
cid(sdfset) <- unique_ids # Assigns uniquified IDs to ID slot


###################################################
### Exercise II
###################################################
## Task 1
## Task 2
## Task 3


###################################################
### code chunk number 15: struct1
###################################################
data(sdfsample); sdfset <- sdfsample
plot(sdfset[1:4], print=FALSE) # print=TRUE returns SDF summaries


###################################################
### code chunk number 16: struct2
###################################################
plot(sdfset["CMP1"], atomnum = TRUE,  noHbonds=F , no_print_atoms = "", 
     atomcex=0.8, sub=paste("MW:", MW(sdfsample["CMP1"])), print=FALSE)


###################################################
### code chunk number 17: struct3
###################################################
plot(sdfset[1], print=FALSE, colbonds=c(22,26,25,3,28,27,2,23,21,18,8,19,20,24))


###################################################
### code chunk number 18: Cheminfo.Rnw:1100-1101 (eval = FALSE)
###################################################
sdf.visualize(sdfset[1:4])


###################################################
### Exercise III
###################################################
## Task 1
## Task 2


###################################################
### code chunk number 20: Cheminfo.Rnw:1138-1140
###################################################
propma <- atomcountMA(sdfset, addH=FALSE)
propma[1:4,]


###################################################
### code chunk number 21: Cheminfo.Rnw:1143-1145
###################################################
data(atomprop)
atomprop[1:4,] 


###################################################
### code chunk number 22: Cheminfo.Rnw:1155-1157
###################################################
MW(sdfset[1:4], addH=FALSE)
MF(sdfset[1:4], addH=FALSE)


###################################################
### code chunk number 23: Cheminfo.Rnw:1161-1162
###################################################
groups(sdfset[1:4], groups="fctgroup", type="countMA")


###################################################
### code chunk number 24: Cheminfo.Rnw:1172-1178
###################################################
propma <- data.frame(MF=MF(sdfset, addH=FALSE), MW=MW(sdfset, addH=FALSE), 
                     Ncharges=sapply(bonds(sdfset, type="charge"), length), 
                     atomcountMA(sdfset, addH=FALSE), groups(sdfset, 
		     type="countMA"), rings(sdfset, upper=6, type="count", 
		     arom=TRUE))
propma[1:4,]


###################################################
### code chunk number 25: Cheminfo.Rnw:1188-1190
###################################################
datablock(sdfset) <- propma # Works with all SDF components
datablock(sdfset)[1]


###################################################
### code chunk number 26: Cheminfo.Rnw:1193-1195
###################################################
blockmatrix <- datablock2ma(datablocklist=datablock(sdfset)) 
blockmatrix[1:2,1:12] 


###################################################
### code chunk number 27: Cheminfo.Rnw:1205-1208
###################################################
bonds(sdfset[[1]], type="bonds")[1:4,]
bonds(sdfset[1:2], type="charge")
bonds(sdfset[1:2], type="addNH")


###################################################
### code chunk number 28: Cheminfo.Rnw:1218-1219
###################################################
(ringatoms <- rings(sdfset[1], upper=Inf, type="all", arom=TRUE, inner=FALSE))


###################################################
### code chunk number 29: struct4
###################################################
atomindex <- as.numeric(gsub(".*_", "", unique(unlist(ringatoms))))
plot(sdfset[1], print=FALSE, colbonds=atomindex)


###################################################
### code chunk number 30: Cheminfo.Rnw:1244-1254
###################################################
write.SDF(sdfset, "test.sdf")
desc <- function(sdfset) {
        cbind(SDFID=sdfid(sdfset),
              MW=MW(sdfset),
              groups(sdfset),
              rings(sdfset, type="count", upper=6, arom=TRUE)
        )
}
sdfStream(input="test.sdf", output="matrix.xls", fct=desc, Nlines=1000, silent=TRUE)
read.delim("matrix.xls", row.names=1)[1:3,1:10]

###################################################
### Exercise IV
###################################################
## Task 1
## Task 2
## Task 3


###################################################
### code chunk number 32: Cheminfo.Rnw:1291-1293
###################################################
apset <- sdf2ap(sdfset) 
apset


###################################################
### code chunk number 33: Cheminfo.Rnw:1296-1300 (eval = FALSE)
###################################################
showClass("APset")
cid(apset)
view(apset)
as(apset, "list")


###################################################
### code chunk number 34: Cheminfo.Rnw:1303-1305
###################################################
(fpset <- desc2fp(apset))
view(fpset[1])


###################################################
### code chunk number 35: Cheminfo.Rnw:1315-1316
###################################################
cmp.search(apset, apset["CMP1"], type=3, cutoff = 0.3, quiet=TRUE) 


###################################################
### code chunk number 36: Cheminfo.Rnw:1319-1322
###################################################
fpset1024 <- names(rev(sort(table(unlist(as(apset, "list")))))[1:1024])
fpset <- desc2fp(apset, descnames=fpset1024, type="FPset")
fpSim(fpset["CMP1"], fpset, method="Tanimoto", cutoff=0.2, top=6)


###################################################
### code chunk number 37: Cheminfo.Rnw:1332-1334
###################################################
fpset <- fp2bit(sdfsample, type=3) 
fpSim(fpset[1], fpset, method="Tanimoto", cutoff=0.2, top=6) 


###################################################
### code chunk number 38: Cheminfo.Rnw:1337-1338
###################################################
fpSim(fpset[1], fpset[2])


###################################################
### code chunk number 39: struct5
###################################################
library(fmcsR); data(fmcstest) # Loads library and test sdfset object
test <- fmcs(fmcstest[1], fmcstest[2], au=2, bu=1) # Searches for MCS with mismatches
plotMCS(test) # Plots both query compounds with MCS in color


###################################################
### code chunk number 40: Cheminfo.Rnw:1358-1359
###################################################
fmcsBatch(sdfset[[1]], sdfset)[1:2,]


###################################################
### code chunk number 41: Cheminfo.Rnw:1369-1371 (eval = FALSE)
###################################################
compounds <- searchSim(sdfset[1])
compounds


###################################################
### Exercise V 
###################################################
## Task 1
## Task 2


###################################################
### code chunk number 43: Cheminfo.Rnw:1412-1416
###################################################
c1 <- cmp.cluster(fpset, cutoff=c(0.3, 0.6, 0.9), method="Tanimoto", 
            quiet=TRUE)[1:8,] 
c1[1:8,]
cluster.sizestat(c1, cluster.result=2)


###################################################
### code chunk number 44: Cheminfo.Rnw:1426-1427
###################################################
jarvisPatrick(nearestNeighbors(fpset, numNbrs=6), k=5, mode="a1a2b")[1:20]


###################################################
### code chunk number 45: Cheminfo.Rnw:1430-1432
###################################################
nnm <- nearestNeighbors(fpset,numNbrs=6)
nnm$similarities[1:4,]


###################################################
### code chunk number 46: struct6
###################################################
simMA <- sapply(cid(fpset), function(x) fpSim(fpset[x], fpset, sorted=FALSE)) 
plot(cmdscale(as.dist(1-simMA)))


###################################################
### code chunk number 47: struct7
###################################################
library(gplots)
hc <- hclust(as.dist(1-simMA), method="single")
heatmap.2(1-simMA, Rowv=as.dendrogram(hc), Colv=as.dendrogram(hc), 
	col=colorpanel(40, "darkblue", "yellow", "white"), 
        density.info="none", trace="none")


###################################################
### Exercise VI 
###################################################
## Task 1
## Task 2
## Task 3
## Task 4


###################################################
### code chunk number 49: Cheminfo.Rnw:1503-1504
###################################################
sessionInfo()


###################################################
## Exercise Solutions
###################################################

##################
### Exercise I ###
##################
## Task 1 
download.file(url="http://faculty.ucr.edu/~tgirke/HTML_Presentations/Manuals/ChemmineR/Bioc2013/p450.sdf", destfile="p450.sdf")

## Task 2
p450 <- read.SDFset("p450.sdf")
## Task 3
length(p450)
## Task 4
write.SDF(p450[length(p450):1], file="inverse.sdf", sig=TRUE)
## Task 5
write.SDFsplit(x=p450, filetag="p450", nmol=5)

###################
### Exercise II ###
###################
## Task 1
cid(sdfset)[1:4] <- month.name[1:4]
write.SDF(sdfset[1:4], file="sub.sdf", cid=TRUE)
## Task 2
bondList <- bondblock(sdfset)
bondMA <- do.call("rbind", bondList)
dim(bondMA)
write.table(bondMA, file="bondMA.txt", quote=FALSE, sep="\t")
## Task 3
atomblock(sdfset)[1] <- atomblock(sdfset)[2]
atomblock(sdfset[[1]]) == atomblock(sdfset[[1]])


####################
### Exercise III ###
####################
## Task 1
cid(p450) <- sdfid(p450)
plot(p450[c("42631481", "42631375", "42631371", "42631260")])
## Task 2
sdf.visualize(p450[c("42631481", "42631375", "42631371", "42631260")])


###################
### Exercise IV ###
###################
## Task 1
propma <- data.frame(MF=MF(p450, addH=FALSE), MW=MW(p450, addH=FALSE), 
                     Ncharges=sapply(bonds(p450, type="charge"), length), 
                     atomcountMA(p450, addH=FALSE), groups(p450, 
		     type="countMA"), rings(p450, upper=6, type="count", 
		     arom=TRUE))
## Task 2
datablock(p450) <- propma 
## Task 3
write.SDF(p450, file="p450mod.sdf", sig=TRUE)


#################
### Exercise V ## 
#################
## Task 1
p450 <- read.SDFset("p450.sdf"); cid(p450) <- sdfid(p450)
apset <- sdf2ap(p450)
fpset1024 <- names(rev(sort(table(unlist(as(apset, "list")))))[1:1024])
fpset <- desc2fp(apset, descnames=fpset1024, type="FPset")
pcset <- fp2bit(p450, type=3)
## Task 2
cmp.search(apset, apset[1], type=2, cutoff = 0.3, quiet=TRUE)[1:6]
fpSim(fpset[1], fpset, method="Tanimoto", cutoff=0.2, top=6)
fpSim(pcset[1], pcset, method="Tanimoto", cutoff=0.2, top=6)

###################
### Exercise VI ### 
###################
## Task 1
download.file(url="http://faculty.ucr.edu/~tgirke/HTML_Presentations/Manuals/ChemmineR/Bioc2013/p450.sdf", destfile="p450.sdf")
## Task 2
p450 <- read.SDFset("p450.sdf"); cid(p450) <- sdfid(p450)
apset <- sdf2ap(p450)
fpset1024 <- names(rev(sort(table(unlist(as(apset, "list")))))[1:1024])
fpset <- desc2fp(apset, descnames=fpset1024, type="FPset")
cmp.cluster(fpset, cutoff=c(0.8, 0.85, 0.9), method="Tanimoto", quiet=TRUE) 
## Task 2
jarvisPatrick(nearestNeighbors(fpset, numNbrs=6), k=5, mode="a1a2b")
## Task 3
simMA <- sapply(cid(fpset), function(x) fpSim(fpset[x], fpset, sorted=FALSE)) 
plot(cmdscale(as.dist(1-simMA)))
## Task 4
hc <- hclust(as.dist(1-simMA), method="single")
heatmap.2(1-simMA, Rowv=as.dendrogram(hc), Colv=as.dendrogram(hc), col=colorpanel(40, "darkblue", "yellow", "white"), density.info="none", trace="none")