#! /usr/bin/env Rscript # -*- mode: R -*- # Above tells emacs to go into R mode for editing (must be second line). ## ## chromCor: ## ## Compute correlation of two jarched bed files along a chromosome (default chr19). ## ## To run this script, turn the execute bit on and run just as any other script file. The Rscript ## command must be in your path. Rscript comes with R distributions 2.8 (?) and higher, and is in the ## same directory as the R script itself (eg, /usr/local/R/bin). ## ## Author: RET ## Date: 20 Aug 2009 usage <- paste('Usage: chromCor [-c ] [-n ] [-z] [-u] ', '', 'Compute correlation of two bed files along a chromosome (default chr19).', '', 'By default, and are jarched 5-column bed files, with score to be correlated in the', 'fifth column. There must be the same number of entries for the desired chromosome in each bed file.', '', ' is a chromosome name; by default chr19, or "all", for all entries in the bed file.', '', ' is the column in each file containing the data to correlate; by default, 5.', '', '-s specifies that bed files are starched instead of jarched.', '', '-z specifies that bed files are gzipped instead of jarched.', '', '-u specifies that bed files are uncompressed (not jarched or gzipped).', '', 'Expects gchr and Rscript to be in the user\'s path.', '', 'Writes results to stdout.', '', '\n', sep = '\n') args <- commandArgs(T) if(length(args) <= 1){ cat(usage) quit('no') } chr <- 'chr19' col <- 5 uncompr <- c('', '') i <- 1 while(substr(args[i], 1, 1) == '-'){ opt <- substr(args[i], 2, 2) if(opt == 'c'){ chr <- args[i+1] if(chr == 'all') chr = '' i <- i + 2 }else if(opt == 'n'){ col <- as.integer(args[i+1]) i <- i + 2 }else if(opt == 'z'){ uncompr <- c('zcat', 'zcat') i <- i + 1 }else if(opt == 's'){ uncompr <- c('unstarch', 'unstarch') i <- i + 1 }else if(opt == 'u'){ uncompr <- c('cat', 'cat') i <- i + 1 }else{ cat('Unrecognized option:', args[i], '\n', file = stderr()) cat(usage, file = stderr()) quit('no') } } bed <- args[c(i, i+1)] rds <- list() for(i in 1:2){ if(uncompr[i] == ''){ ext <- gsub('.*\\.', '', bed[i]) if( ext == "bed"){ uncompr[i] <- 'cat' } else if( ext == "jarch"){ uncompr[i] <- 'gchr' } else if( ext == "starch"){ uncompr[i] <- 'unstarch' } else if( ext == "gz" || ext == "zip"){ uncompr[i] <- 'zcat' } else{ cat('Unrecognized file extension:', bed[i], '\n', file = stderr()) quit('no') } } if(chr == '') cmd <- paste(uncompr[i], '%BED') else{ if(uncompr[i] == "cat" || uncompr[i] == "zcat"){ cmd <- paste(uncompr[i], ' %BED | grep "^', chr, '" -', sep = '') }else if(uncompr[i] == "unstarch" || uncompr[i] == "gchr"){ cmd <- paste(uncompr[i], chr, '%BED') } } cmd <- paste(cmd, ' | cut -f', col, sep = '') cat('Reading', bed[i], '...\n', file = stderr()) con <- pipe(gsub('%BED', bed[i], cmd)) rds[[i]] <- scan(con, what = numeric(0)) close(con) } cv <- cor(rds[[1]], rds[[2]]) cat(cv, '\n')