############################################################ ### R Script to Subset Structure Definition Files (SDF) ### ############################################################ # Author: Thomas Girke (thomas.girke@ucr.edu), UC Riverside # Last update: Mar 22, 2007 # Utility: # SDF batch import into list object # SDF subsetting # Export to file # How to run the script: # source("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/sdfSubset.R") # SDF Import Function sdfimp <- function(infile="db.sdf") { my_sdf <- readLines(infile) # reads file line-wise into vector y <- regexpr("[$$$$]", my_sdf, perl=T) # identifies all fields that do not start with a '$$$$' sign y <- as.vector(y); y[y==-1] <- 0 index <- which(y==1) indexDF <- data.frame(start=c(1, index[-length(index)]+1), end=index) my_sdf_list <- apply(indexDF, 1, function(x) my_sdf[seq(x[1], x[2])]) names(my_sdf_list) <- 1:length(my_sdf_list) my_sdf_list } # Subsetting and export to file cat("\n# (1) Import sdf file: \n\t sdflist <- sdfimp(infile=\"db.sdf\") \n\t\t # infile: input sdf file\n") cat("\n# (2) Subset sdf list object: \n\t subsdf <- sdflist[c(1,4)] \n\t\t # provide position ids in vector (e.g. c(1,4))\n") cat("\n# (3) Write returned sdf list to file: \n\t cat(unlist(subsdf), file=\"sub.sdf\", sep=\"\\n\")\n")