## useage: $ R CMD BATCH '--args infile="blast.out.top"' plot_from_blast.R ## PART ONE: read in and store the BLAST table ## # Read in the arguments listed at the command line # args=(commandArgs(TRUE)) args <- list('blast.out.top') # actually, this is set manually here . . not from command line . . . # 'args' is now a list of character vectors # get the command line arguments i.e. the name of the blast table to read if(length(args)==0){ # if arguments are not passed . . . print("Useage: R CMD BATCH '--args infile=' plot_from_blast.R\n") infile <- 'blast.out.top' # guess the input file name } else { for(i in 1:length(args)){ infile <- args[[i]] } } # open the blast table and store it as an array object topblasttablein <- scan(file = infile, what = 'character', sep = "\t") # read the table, store as a vector of type character hits <- (length(topblasttablein)/12) - 1 # calculate the number of hits (= number of rows minus the column titles) topblasttable <- array(topblasttablein[13:length(topblasttablein)],dim=c(12,hits)) # fill a two dimensional array (like a table) of the approriate dimensions with the vector read in above dimnames(topblasttable) <- list(topblasttablein[1:12],c()) # label the columns with the first ## PART TWO: plot the data from the BLAST table ## # this makes some histograms in a PDF (Adobe Portable Document Format, can be viewed in Adobe Reader etc) # <- this symbol makes R ignore the rest of the line. This is useful for adding comments or 'commenting out' R commands so they are silent and do not run # deleting the '#' symbol means R will be read and attempt to execute everything until the next '#' or the next line # open a PDF file to write in pdf(file='blast_plot1.pdf', title='Plots from BLAST table') # plot a histogram of the 3rd column (% identities of the top hit for each protein in the query .faa file) # hist(as.numeric(topblasttable[3,]), main = 'Histogram of % identities of top BLAST hits', nclass = 30, col = 'Red', xlab = '% identity') # plot a histogram of the 11th column (e-values for top hits) # hist(as.numeric(topblasttable[11,]), main = 'Histogram of e-values of top BLAST hits', nclass = 30, col = 'Blue', xlab = 'e-value') # plot a histogram of the 12th column (bit scores of the top hits) # hist(as.numeric(topblasttable[12,]), main = 'Histogram of bit scores of top BLAST hits', nclass = 30, col = 'Green', xlab = 'bit score') # plot a histogram of the 3rd column (% identities of the top hit for each protein in the query .faa file) # 'uncomment' both both of the next lines to plot this histogram threshold <- 0.0001 hist(as.numeric(topblasttable[3, which(as.numeric(topblasttable[11,])