miércoles, abril 24, 2013

get.orf <- function(dna.str) {
# Find positions of codons
start.codon.pos <- start(matchPattern('ATG', dna.str))
end.codon.pos <- c(start(matchPattern('TAA', dna.str)),
start(matchPattern('TAG', dna.str)),
start(matchPattern('TGA', dna.str)))
# Get reading frames for codons
start.codon.frame <- start.codon.pos %% 3
end.codon.frame <- end.codon.pos %% 3
# Find the corresponding end for a given start codon
# Add two to the end position to include end codon
orf.ends <-
sapply(1:length(start.codon.pos),
function(xx) min(end.codon.pos[end.codon.frame == start.codon.frame[xx]
& end.codon.pos > start.codon.pos[xx]])) + 2
# Build IRanges output and delete invalid elements
IRanges(start.codon.pos[orf.ends != Inf], orf.ends[orf.ends != Inf])
}
view raw gistfile1.r hosted with ❤ by GitHub