sub extract_open_reading_frames {
my $seq = shift;
for my $r (0,1,2) {
for (my $i = $r; $i <= length($seq)-3; $i += 3) {
if (substr($seq,$i,3) eq "ATG") {
my $j = $i+3;
while ($j <= length($seq)-3 &&
substr($seq,$j,3) ne "TAA" &&
substr($seq,$j,3) ne "TAG" &&
substr($seq,$j,3) ne "TGA") {
$j += 3;
}
if ($j <= length($seq)-3) {
my $len = $j+2-$i+1;
if ($len >= 30) {
print substr($seq,$i,$j+2-$i+1),"\n";
}
}
}
}
}
}
|
extract.open.reading.frames <- function (seq) {
for (i in 1:3) {
while (i+2 <= nchar(seq)) {
if (substr(seq,i,i+2) == "ATG") {
j <- i + 3
while (j+2 <= nchar(seq) &&
substr(seq,j,j+2) != "TAA" &&
substr(seq,j,j+2) != "TAG" &&
substr(seq,j,j+2) != "TGA") {
j <- j + 3
}
if (j+2 <= nchar(seq)) {
if (j+2-i+1 >= 30) {
print(c(i,j+2,substr(seq,i,j+2)))
}
}
}
i <- i + 3
}
}
}
|
sub longest_open_reading_frame {
my $seq = shift;
my ($ii,$jj) = (0,0);
for my $r (0,1,2) {
for (my $i = $r; $i <= length($seq)-3; $i += 3) {
if (substr($seq,$i,3) eq "ATG") {
my $j = $i+3;
while ($j <= length($seq)-3 &&
substr($seq,$j,3) ne "TAA" &&
substr($seq,$j,3) ne "TAG" &&
substr($seq,$j,3) ne "TGA") {
$j += 3;
}
if ($j <= length($seq)-3) {
my $len = $j+2-$i+1;
if ($j+2-$i+1 > $jj+2-$ii+1) {
$ii = $i;
$jj = $j;
}
}
}
}
}
return [$ii,$jj+2];
}
|
longest.open.reading.frame <- function (seq) {
ii <- jj <- 0
for (i in 1:3) {
while (i+2 <= nchar(seq)) {
if (substr(seq,i,i+2) == "ATG") {
j <- i + 3
while (j+2 <= nchar(seq) &&
substr(seq,j,j+2) != "TAA" &&
substr(seq,j,j+2) != "TAG" &&
substr(seq,j,j+2) != "TGA") {
j <- j + 3
}
if (j+2 <= nchar(seq)) {
if (j+2-i+1 > jj+2-ii+1) {
ii <- i
jj <- j
}
}
}
i <- i + 3
}
}
c(ii,jj+2)
}
|
sub seq_to_labeled_seq {
my $seq = shift;
my %seq;
for (my $i = 0; $i < length($seq); $i++) {
my $elem = substr($seq,$i,1);
$seq{$elem}++;
}
return \%seq;
}
|
seq.to.labeled.seq <- function (seq) {
alphabet <- sort(unique(strsplit(seq,"")[[1]]))
lab <- matrix(0,nrow=length(alphabet),ncol=1)
dimnames(lab) <- list(alphabet,"count")
for (i in 1:nchar(seq))
lab[substr(seq,i,i),] <-
lab[substr(seq,i,i),] + 1
lab
}
|
sub symmetric_difference {
my $seq1 = shift;
my $seq2 = shift;
my %seq1 = %{ seq_to_labeled_seq($seq1) };
my %seq2 = %{ seq_to_labeled_seq($seq2) };
for my $elem (keys %seq2) {
$seq1{$elem} = abs($seq1{$elem}-$seq2{$elem});
}
return \%seq1;
}
|
symmetric.difference <- function (seq1,seq2) {
lab1 <- seq.to.labeled.seq(seq1)
lab2 <- seq.to.labeled.seq(seq2)
for (elem in row.names(lab2))
lab1[elem,] <- abs(lab1[elem,] - lab2[elem,])
lab1
}
|
sub reverse_complement {
my $seq = shift;
$seq = reverse $seq;
$seq =~ tr/ACGT/TGCA/;
return $seq;
}
|
reverse.complement <- function (seq) {
rev <- paste(rev(unlist(strsplit(seq,split=""))),sep="",collapse="")
chartr("ACGT","TGCA",rev)
}
|
sub count {
my $n = shift;
my @R;
$R[0] = $R[1] = $R[2] = 1;
for (my $i = 2; $i < $n; $i++) {
$R[$i+1] = $R[$i];
for (my $j = 1; $j < $i; $j++) {
$R[$i+1] += $R[$j-1] * $R[$i-$j]
}
}
return $R[$n];
}
|
count <- function (n) {
R <- rep(0,n)
R[1:3] <- 1
if (n > 2) {
for (i in 2:(n-1)) {
R[i+2] <- R[i+1]
for (j in 1:(i-1)) {
R[i+2] <- R[i+2] + R[j] * R[i-j+1]
}
}
}
R[n+1]
}
|
sub words {
my $n = shift;
my $alphabet = shift;
my @n = split "", $alphabet;
if ($n == 1) {
return \@n;
} else {
my $short = words($n - 1, $alphabet);
my @long;
for my $seq (@$short) {
for my $n (@n) {
push @long, $seq . $n;
}
}
return \@long;
}
}
|
library(seqinr)
words(length=3,alphabet=s2c("ACGT"))
|
sub word_composition {
my $seq = shift;
my $k = shift;
my $alphabet = shift;
my $words = words($k,$alphabet);
my %freq;
for my $word (@$words) {
$freq{$word} = 0;
}
for my $i (0 .. length($seq)-$k) {
my $word = substr($seq,$i,$k);
$freq{$word}++;
}
return \%freq;
}
|
library(seqinr)
count(s2c(seq),word=3,alphabet=s2c("ACGT"))
|
|
|
library(seqinr)
alignment.free.distance <- function (seq1,seq2,k,sigma) {
freq1 <- count(seq1,word=k,alphabet=s2c(sigma))
freq2 <- count(seq2,word=k,alphabet=s2c(sigma))
cor(freq1,freq2)
}
|
sub mean {
my $a = shift;
my $res;
foreach (@$a) { $res += $_ }
$res /= @$a;
}
|
|
sub sd {
my $a = shift;
my $mean = mean($a);
return sqrt( mean( [map $_ ** 2, @$a] ) -
($mean ** 2) );
}
|
|
sub cov {
my $a1 = shift;
my $a2 = shift;
my $res;
foreach (0 .. @$a1 - 1) {
$res += $a1->[$_] * $a2->[$_];
}
$res /= @$a1;
$res -= mean($a1) * mean($a2);
}
|
|
sub cor {
my $a1 = shift;
my $a2 = shift;
return cov($a1,$a2) / (sd($a1) * sd($a2));
}
|
|
sub occurrences {
my $p = shift;
my $t = shift;
my $m = length $p;
my $n = length $t;
my @L;
for (my $i = 0; $i < $n-$m+1; $i++) {
if ($p eq substr($t,$i,$m)) {
push @L, $i;
}
}
return \@L;
}
|
occurrences <- function (p,t) {
L <- c()
for (i in 1:nchar(t)-nchar(p)+1)
if (p == substr(t,i,i+nchar(p)-1))
L <- c(L,i)
L
}
|
sub suffix_array {
my $s = shift;
my @sa = (0 .. length($s)-1);
@sa = sort {substr($s,$a) cmp substr($s,$b)} @sa;
return \@sa;
}
|
suffix.array <- function (seq) {
sa <- apply(matrix(1:nchar(seq)),1,function(i)substr(seq,i,nchar(seq)))
order(sa)
}
|
sub occurrences {
my $p = shift;
my $t = shift;
my $sa = shift;
my ($l, $r) = (0, $#sa);
while ($l <= $r) {
my $i = int(($l + $r)/2);
my $c = $p cmp substr($t, $sa[$i], length $p);
if ($c < 0) {
$r = $i - 1;
} elsif ($c > 0) {
$l = $i + 1;
} else {
my $j = $i;
while ($i > 0 &&
$p eq substr($t, $sa[$i-1], length $p)) {
$i--;
}
while ($j < $#sa-1 &&
$p eq substr($t, $sa[$j+1], length $p)) {
$j++;
}
return ($i, $j);
}
}
return (-1, -1);
}
|
occurrences <- function (p,t,sa) {
ell <- 1
r <- nchar(t)
while (ell <= r) {
i <- as.integer((ell + r) / 2)
s <- substr(t,sa[i],sa[i]+nchar(p)-1)
if (p < s) {
r <- i - 1
} else if (p > s) {
ell <- i + 1
} else {
j <- i
while (i > 1 &&
p==substr(t,sa[i-1],sa[i-1]+nchar(p)-1)) {
i <- i - 1
}
while (j < nchar(t) &&
p==substr(t,sa[j+1],sa[j+1]+nchar(p)-1)) {
j <- j + 1
}
return(c(i,j))
}
}
return(c(-1,-1))
}
|
sub lcp {
my ($p, $i, $t, $j) = @_;
my $c = 0;
while ($i+$c < length $p && $j+$c < length $t &&
substr($p,$i+$c,1) eq substr($t,$j+$c,1)) {
$c++;
}
return $c;
}
|
lcp <- function (seq1,pos1,seq2,pos2) {
n1 <- nchar(seq1)
n2 <- nchar(seq2)
c <- 0
while (pos1+c <= n1 && pos2+c <= n2 &&
substr(seq1,pos1+c,pos1+c) ==
substr(seq2,pos2+c,pos2+c)) {
c <- c + 1
}
c
}
|
sub occurrences {
my $p = shift;
my $t = shift;
my $sa = shift;
my ($l, $r) = (0, $#sa);
my ($llcp, $rlcp) = (0, 0);
while ($l <= $r) {
my $i = int(($l + $r)/2);
my $h = $llcp; $h = $rlcp if $rlcp < $h;
my $c = lcp($p,$h,$t,$sa[$i]+$h);
if ($h+$c == length $p) {
my $j = $i;
while ($i > 0 &&
$p eq substr($t, $sa[$i-1], length $p)) {
$i--;
}
while ($j < $#sa-1 &&
$p eq substr($t, $sa[$j+1], length $p)) {
$j++;
}
return ($i, $j);
} elsif (substr($p,$h+$c,1) lt
substr($t,$sa[$i]+$h+$c,1)) {
$r = $i - 1;
$rlcp = $h + $c;
} else {
$l = $i + 1;
$llcp = $h + $c;
}
}
return (-1, -1);
}
|
occurrences <- function (p,t,sa) {
ell <- 1
r <- nchar(t)
llcp <- 0
rlcp <- 0
while (ell <= r) {
i <- as.integer((ell + r) / 2)
h <- min(llcp,rlcp)
c <- lcp(p,h+1,t,sa[i]+h)
if (h+c == nchar(p)) {
j <- i
while (i > 1 &&
p==substr(t,sa[i-1],sa[i-1]+nchar(p)-1)) {
i <- i - 1
}
while (j < nchar(t) &&
p==substr(t,sa[j+1],sa[j+1]+nchar(p)-1)) {
j <- j + 1
}
return(c(i,j))
} else if (substr(p,h+c+1,h+c+1) <
substr(t,sa[i]+h+c,sa[i]+h+c)) {
r <- i - 1
rlcp <- h + c
} else {
ell <- i + 1
llcp <- h + c
}
}
return(c(-1,-1))
}
|
sub common_occurrences {
my $p = shift;
my $t1 = shift;
my $t2 = shift;
my $m = length $p;
my $n1 = length $t1;
my $n2 = length $t2;
my @L;
for (my $i = 0; $i < $n1-$m+1; $i++) {
if ($p eq substr($t1,$i,$m)) {
for (my $j = 0; $j < $n2-$m+1; $j++) {
if ($p eq substr($t2,$j,$m)) {
push @L, $i, $j;
}
}
}
}
return \@L;
}
|
common.occurrences <- function (p,t1,t2) {
L <- c()
for (i in 1:nchar(t1)-nchar(p)+1)
if (p == substr(t1,i,i+nchar(p)-1))
for (j in 1:nchar(t2)-nchar(p)+1)
if (p == substr(t2,j,j+nchar(p)-1))
L <- c(L,i,j)
L
}
|
sub longest_common_subsequences {
my $s1 = shift;
my $s2 = shift;
my $n1 = length($s1);
my $n2 = length($s2);
my @L;
my $l = 0;
for (my $i = 0; $i < $n1; $i++) {
for (my $j = $i; $j < $n1; $j++) {
my $seq = substr($s1,$i,$j-$i+1);
for (my $k = 0; $k < $n2-$j+$i; $k++) {
if ($seq eq substr($s2,$k,$j-$i+1)) {
if (length($seq) == $l) {
push @L, $seq;
} elsif (length($seq) > $l) {
@L = $seq;
$l = length($seq);
}
}
}
}
}
my %count;
for my $seq (@L) { $count{$seq}++; }
@L = sort keys %count;
return \@L;
}
|
longest.common.subsequences <- function (s1,s2) {
L <- c()
ell <- 0
for (i in 1:nchar(s1)) {
for (j in i:nchar(s1)) {
seq <- substr(s1,i,j)
for (k in 1:(nchar(s2)-j+i)) {
if (seq == substr(s2,k,k+j-i)) {
if (nchar(seq) == ell) {
L <- c(L,seq)
} else if (nchar(seq) > ell) {
L <- c(seq)
ell <- nchar(seq)
}
}
}
}
}
unique(L)
}
|
sub longest_common_subsequences {
my $s1 = shift;
my $s2 = shift;
my @L;
my $l = 0;
my @LCS;
for (my $i = 0; $i < length $s1; $i++ ) {
for (my $j = 0; $j < length $s2; $j++ ) {
$LCS[$i][$j] = 0;
if (substr($s1,$i,1) eq substr($s2,$j,1)) {
if ($i == 0 || $j == 0) {
$LCS[$i][$j] = 1;
} else {
$LCS[$i][$j] = $LCS[$i-1][$j-1] + 1;
}
if ($LCS[$i][$j] > $l) {
$l = $LCS[$i][$j];
@L = ();
}
if ($LCS[$i][$j] == $l) {
push @L, substr($s1,$i-$l+1,$l);
}
}
}
}
return @L;
}
|
longest.common.subsequences <- function (s1,s2) {
L <- c()
ell <- 0
LCS <- matrix(0,nrow=nchar(s1),ncol=nchar(s2))
for (i in 1:nchar(s1)) {
for (j in 1:nchar(s2)) {
if (substr(s1,i,i) == substr(s2,j,j)) {
if (i == 1 || j == 1) {
LCS[i,j] <- 1
} else {
LCS[i,j] <- LCS[i-1,j-1] + 1
}
if (LCS[i,j] > ell) {
ell <- LCS[i,j]
L <- c()
}
if (LCS[i,j] == ell) {
L <- c(L,substr(s1,i-ell+1,i))
}
}
}
}
L
}
|
sub generalized_suffix_array {
my $seq = shift;
my %seq = %{$seq};
my @gsa;
for my $id (keys %seq) {
for my $i (0 .. length($seq{$id})-1) {
push @gsa, [$i, $id];
}
}
@gsa = sort {substr($seq{@{$a}[1]}, @{$a}[0])
cmp substr($seq{@{$b}[1]}, @{$b}[0])} @gsa;
return \@gsa;
}
|
generalized.suffix.array <- function (s1,s2) {
sa1 <- apply(matrix(1:nchar(s1)),1,
function (i) substr(s1,i,nchar(s1)))
sa2 <- apply(matrix(1:nchar(s2)),1,
function (i) substr(s2,i,nchar(s2)))
arr <- order(c(sa1,sa2))
tag <- sapply(arr,function (i)
if (i <= nchar(s1)) { 1 } else { 2 } )
arr <- sapply(arr,function (i)
if (i <= nchar(s1)) { i } else { i-nchar(s1) } )
cbind(arr,tag)
}
|
sub common_occurrences {
my $p = shift;
my $seq = shift;
my %seq = %{$seq};
my $gsa = shift;
my @gsa = @{$gsa};
my ($l, $r) = (0, $#gsa);
while ($l <= $r) {
my $i = int(($l + $r)/2);
my $c = $p cmp substr($seq{$gsa[$i][1]}, $gsa[$i][0], length $p);
if ($c < 0) {
$r = $i - 1;
} elsif ($c > 0) {
$l = $i + 1;
} else {
my $j = $i;
while ($i > 0 &&
$p eq substr($seq{$gsa[$i-1][1]}, $gsa[$i-1][0], length $p)) {
$i--;
}
while ($j < $#gsa &&
$p eq substr($seq{$gsa[$j+1][1]}, $gsa[$j+1][0], length $p)) {
$j++;
}
return ($i, $j);
}
}
return (-1, -1);
}
|
common.occurrences <- function (p,seq,gsa) {
ell <- 1
r <- n <- nchar(seq[1]) + nchar(seq[2])
while (ell <= r) {
i <- as.integer((ell + r) / 2)
s <- substr(seq[gsa[i,2]],
gsa[i,1],gsa[i,1]+nchar(p)-1)
if (p < s) {
r <- i - 1
} else if (p > s) {
ell <- i + 1
} else {
j <- i
while (i > 1 && p == s) {
s <- substr(seq[gsa[i-1,2]],
gsa[i-1,1],gsa[i-1,1]+nchar(p)-1)
if (p == s) i <- i - 1
}
s <- p
while (j < n && p == s) {
s <- substr(seq[gsa[j+1,2]],
gsa[j+1,1],gsa[j+1,1]+nchar(p)-1)
if (p == s) j <- j + 1
}
return(c(i,j))
}
}
return(c(-1,-1))
}
|
sub common_occurrences {
my $p = shift;
my $seq = shift;
my %seq = %{$seq};
my $gsa = shift;
my @gsa = @{$gsa};
my ($l, $r) = (0, $#gsa);
my ($llcp, $rlcp) = (0, 0);
while ($l <= $r) {
my $i = int(($l + $r)/2);
my $h = $llcp; $h = $rlcp if $rlcp < $h;
my $c = lcp($p,$h,$seq{$gsa[$i][1]},$gsa[$i][0]+$h);
if ($h+$c == length $p) {
my $j = $i;
while ($i > 0 &&
$p eq substr($seq{$gsa[$i-1][1]}, $gsa[$i-1][0], length $p)) {
$i--;
}
while ($j < $#gsa-1 &&
$p eq substr($seq{$gsa[$j+1][1]}, $gsa[$j+1][0], length $p)) {
$j++;
}
return ($i, $j);
} elsif (substr($p,$h+$c,1) lt
substr($seq{$gsa[$i][1]},$gsa[$i][0]+$h+$c,1)) {
$r = $i - 1;
$rlcp = $h + $c;
} else {
$l = $i + 1;
$llcp = $h + $c;
}
}
return (-1, -1);
}
|
common.occurrences <- function (p,seq,gsa) {
ell <- 1
r <- n <- nchar(seq[1]) + nchar(seq[2])
llcp <- 0
rlcp <- 0
while (ell <= r) {
i <- as.integer((ell + r) / 2)
s <- substr(seq[gsa[i,2]],gsa[i,1],
gsa[i,1]+nchar(p)-1)
h <- min(llcp,rlcp)
c <- lcp(p,h+1,s,1+h)
if (h+c == nchar(p)) {
j <- i
while (i > 1 && p == s) {
s <- substr(seq[gsa[i-1,2]],gsa[i-1,1],
gsa[i-1,1]+nchar(p)-1)
if (p == s) i <- i - 1
}
s <- p
while (j < n && p == s) {
s <- substr(seq[gsa[j+1,2]],gsa[j+1,1],
gsa[j+1,1]+nchar(p)-1)
if (p == s) j <- j + 1
}
return(c(i,j))
} else {
s <- substr(seq[gsa[i,2]],gsa[i,1]+h+c,
gsa[i,1]+h+c)
if (substr(p,h+c+1,h+c+1) < s) {
r <- i - 1
rlcp <- h + c
} else {
ell <- i + 1
llcp <- h + c
}
}
}
return(c(-1,-1))
}
|
sub longest_common_subsequences {
my $seq = shift;
my %seq = %{$seq};
my $gsa = shift;
my @gsa = @{$gsa};
my @L;
my $l = 0;
my $f2 = substr($seq{$gsa[0][1]}, $gsa[0][0], length($seq{$gsa[0][1]}));
for (my $i = 1; $i < length($seq{1})+length($seq{2}); $i++) {
my $f1 = $f2;
$f2 = substr($seq{$gsa[$i][1]}, $gsa[$i][0], length($seq{$gsa[$i][1]}));
if ($gsa[$i][1] != $gsa[$i-1][1]) {
my $p = lcp($f1,0,$f2,0);
if ($p > $l) {
$l = $p;
@L = ();
}
if ($p == $l) {
push @L,
substr($seq{$gsa[$i][1]},$gsa[$i][0],$p);
}
}
}
return \@L;
}
|
longest.common.subsequences <- function (seq,gsa) {
L <- c()
ell <- 0
suf2 <- substr(seq[gsa[1,2]],gsa[1,1],
nchar(seq[gsa[1,2]]))
for (i in 2:(nrow(gsa))) {
suf1 <- suf2
suf2 <- substr(seq[gsa[i,2]],gsa[i,1],
nchar(seq[gsa[i,2]]))
if (gsa[i-1,2] != gsa[i,2]) {
pref <- substr(suf1,1,lcp(suf1,1,suf2,1))
if (nchar(pref) > ell) {
ell <- nchar(pref)
L <- c()
}
if (nchar(pref) == ell) {
L <- c(L,pref)
}
}
}
L
}
|
sub hamming_distance {
my $s1 = shift;
my $s2 = shift;
my $d = -1;
if (length $s1 == length $s2) {
$d = 0;
for (my $i = 0; $i < length $s1; $i++) {
$d++ if substr($s1,$i,1) ne substr($s2,$i,1);
}
}
return $d;
}
|
hamming.distance <- function (s1,s2) {
if (nchar(s1) != nchar(s2))
-1
else
sum(strsplit(s1,"")[[1]] != strsplit(s2,"")[[1]])
}
|
sub levenshtein_distance {
my $s1 = shift;
my $s2 = shift;
my ($n1, $n2) = (length $s1, length $s2);
my @d;
for ( my $i = 0; $i <= $n1; $i++ ) {
$d[$i][0] = $i;
}
for ( my $j = 0; $j <= $n2; $j++ ) {
$d[0][$j] = $j;
}
my @t1 = split //, $s1;
my @t2 = split //, $s2;
for ( my $i = 1; $i <= $n1; $i++ ) {
for ( my $j = 1; $j <= $n2; $j++ ) {
if ($t1[$i-1] eq $t2[$j-1]) {
$d[$i][$j] = $d[$i-1][$j-1];
} else {
$d[$i][$j] = $d[$i-1][$j] + 1;
if ($d[$i][$j-1] + 1 < $d[$i][$j]) {
$d[$i][$j] = $d[$i][$j-1] + 1;
}
}
}
}
return \@d;
}
|
levenshtein.distance <- function(s1,s2) {
t1 <- strsplit(s1,split="")[[1]]
t2 <- strsplit(s2,split="")[[1]]
n1 <- length(t1)
n2 <- length(t2)
d <- array(0,dim=c(n1+1,n2+1))
d[,1] <- 0:n1
d[1,] <- 0:n2
for (i in 2:(n1+1))
for (j in 2:(n2+1))
if (t1[i-1] == t2[j-1])
d[i,j] <- d[i-1,j-1]
else
d[i,j] <- min(d[i-1,j]+1, d[i,j-1]+1)
d
}
|
sub edit_distance {
my $s1 = shift;
my $s2 = shift;
my ($n1, $n2) = (length $s1, length $s2);
my @d;
for ( my $i = 0; $i <= $n1; $i++ ) {
$d[$i][0] = $i;
}
for ( my $j = 0; $j <= $n2; $j++ ) {
$d[0][$j] = $j;
}
my @t1 = split //, $s1;
my @t2 = split //, $s2;
for ( my $i = 1; $i <= $n1; $i++ ) {
for ( my $j = 1; $j <= $n2; $j++ ) {
$d[$i][$j] = $d[$i-1][$j] + 1;
if ($d[$i][$j-1] + 1 < $d[$i][$j]) {
$d[$i][$j] = $d[$i][$j-1] + 1;
}
if ($t1[$i-1] eq $t2[$j-1]) {
if ($d[$i-1][$j-1] < $d[$i][$j]) {
$d[$i][$j] = $d[$i-1][$j-1];
}
} else {
if ($d[$i-1][$j-1] + 1 < $d[$i][$j]) {
$d[$i][$j] = $d[$i-1][$j-1] + 1;
}
}
}
}
return \@d;
}
|
edit.distance <- function (s1,s2) {
t1 <- strsplit(s1,split="")[[1]]
t2 <- strsplit(s2,split="")[[1]]
n1 <- length(t1)
n2 <- length(t2)
d <- array(0,dim=c(n1+1,n2+1))
d[,1] <- 0:n1
d[1,] <- 0:n2
for (i in 2:(n1+1)) {
for (j in 2:(n2+1)) {
d[i,j] <- min(d[i-1,j]+1, d[i,j-1]+1)
if (t1[i-1] == t2[j-1])
d[i,j] <- min(d[i,j], d[i-1,j-1])
else
d[i,j] <- min(d[i,j], d[i-1,j-1]+1)
}
}
d
}
|
sub alignment {
my $s1 = shift;
my $s2 = shift;
my $d = shift;
my @d = @{$d};
my ($t1, $t2) = ("", "");
my @t1 = split //, $s1;
my @t2 = split //, $s2;
my ($n1, $n2) = (length $s1, length $s2);
my ($i, $j) = ($n1, $n2);
while ($i > 0 && $j > 0) {
if ($d[$i][$j] eq $d[$i][$j-1]+1) {
$t1 = "-" . $t1;
$t2 = $t2[$j-1] . $t2;
$j--;
} elsif ($d[$i][$j] eq $d[$i-1][$j]+1) {
$t1 = $t1[$i-1] . $t1;
$t2 = "-" . $t2;
$i--;
} else {
$t1 = $t1[$i-1] . $t1;
$t2 = $t2[$j-1] . $t2;
$i--;
$j--;
}
}
while ($j > 0) {
$t1 = "-" . $t1;
$t2 = $t2[$j-1] . $t2;
$j--;
}
while ($i > 0) {
$t1 = $t1[$i-1] . $t1;
$t2 = "-" . $t2;
$i--;
}
print "$t1\n$t2\n";
}
|
alignment <- function (s1,s2,d) {
t1 <- strsplit(s1,split="")[[1]]
t2 <- strsplit(s2,split="")[[1]]
a1 <- a2 <- c()
i <- length(t1)+1
j <- length(t2)+1
while (i > 1 && j > 1) {
if (d[i,j] == d[i,j-1]+1) {
a1 <- c("-",a1)
a2 <- c(t2[j-1],a2)
j <- j-1
} else if (d[i,j] == d[i-1,j]+1) {
a1 <- c(t1[i-1],a1)
a2 <- c("-",a2)
i <- i-1
} else {
a1 <- c(t1[i-1],a1)
a2 <- c(t2[j-1],a2)
i <- i-1
j <- j-1
}
}
while (j > 1) {
a1 <- c("-",a1)
a2 <- c(t2[j-1],a2)
j <- j-1
}
while (i > 1) {
a1 <- c(t1[i-1],a1)
a2 <- c("-",a2)
i <- i-1
}
print(paste(a1,collapse=""))
print(paste(a2,collapse=""))
}
|
sub local_alignment {
my ($s1, $s2, $match, $mismatch, $gap) = @_;
my ($n1, $n2) = ( length $s1, length $s2);
my @s;
for (my $i = 0; $i <= $n1; $i++) { $s[$i][0] = 0; }
for (my $j = 0; $j <= $n2; $j++) { $s[0][$j] = 0; }
my @t1 = split //, $s1;
my @t2 = split //, $s2;
for ( my $i = 1; $i <= $n1; $i++ ) {
for ( my $j = 1; $j <= $n2; $j++ ) {
if ($t1[$i-1] eq $t2[$j-1]) {
$s[$i][$j] = $s[$i-1][$j-1] + $match;
} else {
$s[$i][$j] = $s[$i-1][$j-1] + $mismatch;
}
$s[$i][$j] = $s[$i-1][$j] + $gap
if $s[$i-1][$j] + $gap > $s[$i][$j];
$s[$i][$j] = $s[$i][$j-1] + $gap
if $s[$i][$j-1] + $gap > $s[$i][$j];
$s[$i][$j] = 0 if 0 > $s[$i][$j];
}
}
my $max = 0;
my @opt;
for ( my $i = 1; $i <= $n1; $i++ ) {
for ( my $j = 1; $j <= $n2; $j++ ) {
if ($s[$i][$j] > $max) {
$max = $s[$i][$j];
@opt = ();
}
if ($s[$i][$j] == $max) {
$max = $s[$i][$j];
push @opt, $i, $j;
}
}
}
while (@opt) {
my $i = shift @opt;
my $j = shift @opt;
my ($t1, $t2) = ("", "");
while ($s[$i][$j] != 0) {
if (($t1[$i-1] eq $t2[$j-1] && $s[$i][$j] eq
$s[$i-1][$j-1] + $match) || ($t1[$i-1] ne $t2[$j-1] &&
$s[$i][$j] eq $s[$i-1][$j-1] + $mismatch)) {
$t1 = $t1[$i-1] . $t1;
$t2 = $t2[$j-1] . $t2;
$i--;
$j--;
} elsif ($s[$i][$j] eq $s[$i-1][$j] + $gap) {
$t1 = $t1[$i-1] . $t1;
$t2 = "-" . $t2;
$i--;
} elsif ($s[$i][$j] eq $s[$i][$j-1] + $gap) {
$t1 = "-" . $t1;
$t2 = $t2[$j-1] . $t2;
$j--;
}
}
print "$t1\n$t2\n";
}
}
|
local.alignment <- function
(s1,s2,match,mismatch,gap) {
t1 <- strsplit(s1,split="")[[1]]
t2 <- strsplit(s2,split="")[[1]]
n1 <- length(t1)
n2 <- length(t2)
s <- array(0,dim=c(n1+1,n2+1))
s[,1] <- 0
s[1,] <- 0
for (i in 2:(n1+1)) {
for (j in 2:(n2+1)) {
if (t1[i-1] == t2[j-1])
s[i,j] <- s[i-1,j-1] + match
else
s[i,j] <- s[i-1,j-1] + mismatch
s[i,j] <- max(
s[i,j],
s[i-1,j] + gap,
s[i,j-1] + gap,
0)
}
}
opt <- which(s==max(s),arr.ind=TRUE)
for (sol in 1:nrow(opt)) {
i <- opt[sol,1]
j <- opt[sol,2]
m1 <- c()
m2 <- c()
while (s[i,j] != 0) {
if ((t1[i-1] == t2[j-1] && s[i,j] ==
s[i-1,j-1] + match) ||
(t1[i-1] != t2[j-1] && s[i,j] ==
s[i-1,j-1] + mismatch)) {
m1 <- c(t1[i-1],m1)
m2 <- c(t2[j-1],m2)
i <- i - 1
j <- j - 1
} else if (s[i,j] == s[i-1,j] + gap) {
m1 <- c(t1[i-1],m1)
m2 <- c("-",m2)
i <- i - 1
} else if (s[i,j] == s[i,j-1] + gap) {
m1 <- c("-",m1)
m2 <- c(t2[j-1],m2)
j <- j - 1
}
}
print(paste(m1,collapse=""))
print(paste(m2,collapse=""))
}
}
|
sub partition {
my $tree = shift;
my %partition = ();
for my $node ($tree->get_nodes) {
next if $node->is_Leaf; # discard terminal nodes
next unless $node->ancestor; # discard root
my @a = sort { $a cmp $b } map { $_->id }
grep {$_->is_Leaf } $node->get_all_Descendents;
my $aa = join ",", @a;
my @b = ();
my %count = ();
foreach my $label (@a, map { $_->id } $tree->get_leaf_nodes) {
$count{$label}++
}
foreach my $label (keys %count) {
push @b, $label if $count{$label} == 1;
}
my $bb = join ",", sort { $a cmp $b } @b;
my $p = $aa le $bb ? "$aa:$bb" : "$bb:$aa";
$partition{$p} = 1;
}
my @partition = sort keys %partition;
return \@partition;
}
|
|
sub partition_distance {
my $tree1 = shift;
my $tree2 = shift;
my @partition1 = @{ partition($tree1) };
my @partition2 = @{ partition($tree2) };
my $dist = 0;
my %count = ();
foreach my $p (@partition1, @partition2) {
$count{$p}++
}
foreach my $p (keys %count) {
$dist++ if $count{$p} == 1;
}
return $dist;
}
|
library(ape) dist.topo(t1,t2) |
sub distances {
my $tree = shift;
my @leaves = sort {$a->id cmp $b->id} $tree->get_leaf_nodes;
my @labels = map { $_->id } @leaves;
my $n = scalar @labels;
my @dist;
for my $i (1..$n-1) {
my @nodes = $tree->find_node(-id => $labels[$i-1]);
for my $j ($i+1..$n) {
push @nodes, $tree->find_node(-id => $labels[$j-1]);
push @dist, $tree->distance(-nodes => \@nodes);
pop @nodes;
}
}
return \@dist;
}
|
|
sub nodal_distance {
my $tree1 = shift;
my $tree2 = shift;
my @dist1 = @{ distances($tree1) };
my @dist2 = @{ distances($tree2) };
my ($dist, $d1, $d2);
while (@dist1 and @dist2) {
$d1 = pop @dist1;
$d2 = pop @dist2;
$dist += abs($d1-$d2);
}
return $dist;
}
|
library(ape)
nodal.distance <- function (t1,t2) {
m1<-cophenetic.phylo(t1)
m1<-m1[order(rownames(m1)),order(colnames(m1))]
n1<-m1[upper.tri(m1)]
m2<-cophenetic.phylo(t2)
m2<-m2[order(rownames(m2)),order(colnames(m2))]
n2<-m2[upper.tri(m2)]
sum(abs(n1-n2))
}
|
sub triplet {
my $tree = shift;
my $i = shift;
my $j = shift;
my $k = shift;
my @ij = grep { $_->id =~ /$i|$j/ } $tree->get_leaf_nodes;
my @ik = grep { $_->id =~ /$i|$k/ } $tree->get_leaf_nodes;
my @jk = grep { $_->id =~ /$j|$k/ } $tree->get_leaf_nodes;
my $ij = $tree->get_lca(-nodes => \@ij);
my $ik = $tree->get_lca(-nodes => \@ik);
my $jk = $tree->get_lca(-nodes => \@jk);
my $str;
if ($ik == $jk) {
$str = "(($i,$j),$k);";
} else {
if ($ij == $jk) {
$str = "(($i,$k),$j);";
} else {
$str = "(($j,$k),$i);";
}
}
return $str;
}
|
library(ape)
triplet <- function (t,i,j,k) {
ij <- mrca(t)[i,j]
ik <- mrca(t)[i,k]
jk <- mrca(t)[j,k]
if (ik == jk)
paste("((",i,",",j,"),",k,");",sep="")
else
if (ij == jk)
paste("((",i,",",k,"),",j,");",sep="")
else
paste("((",j,",",k,"),",i,");",sep="")
}
|
use Clone qw(clone);
sub quartet {
my ($tree,$i,$j,$k,$l) = @_;
my $copy = clone $tree;
map { $copy->remove_Node($_) }
grep { !($_->id =~ /$i|$j|$k|$l/) }
$copy->get_leaf_nodes;
return $copy;
}
|
library(ape)
quartet <- function (t,i,j,k,l) {
unroot(drop.tip(t,setdiff(t$tip.label,c(i,j,k,l))))
}
|
|
|
parent <- function (tree,node) tree$edge[which(tree$edge[,2]==node),1] |
|
|
children <- function (tree,node) tree$edge[which(tree$edge[,1]==node),2] |
|
|
is.root <- function (tree,node) length(tree$tip.label)+1==node |
|
|
is.ancestor <- function(tree,anc,des) {
if (anc==des) return(TRUE)
if (is.root(tree,des)) return(FALSE)
else return(is.ancestor(tree,anc,parent(tree,des)))
}
|
|
|
postorder <- function (tree) {
r <- length(tree$tip.label)+1
postorder.traversal(tree,r,c())
}
|
|
|
postorder.traversal <- function (tree,v,res) {
for (w in t1$edge[which(tree$edge[,1]==v),2])
res <- postorder.traversal(tree,w,res)
c(res,v)
}
|
sub mast_size {
my $tree1 = shift;
my $tree2 = shift;
my (@m, $m);
for my $node1 (@{ $tree1->Bio::Tree::Compatible::postorder_traversal }) {
my $n1 = $node1->internal_id;
for my $node2 (@{ $tree2->Bio::Tree::Compatible::postorder_traversal }) {
my $n2 = $node2->internal_id;
if ($node1->is_Leaf) {
if ($node2->is_Leaf) {
$m = ($node1->id eq $node2->id ? 1 : 0);
} else {
my $label1 = $node1->id;
$m = (grep { /$label1/ } map { $_->id }
grep { $_->is_Leaf } $node2->get_all_Descendents) ? 1 : 0;
}
} else {
if ($node2->is_Leaf) {
my $label2 = $node2->id;
$m = (grep { /$label2/ } map { $_->id }
grep { $_->is_Leaf } $node1->get_all_Descendents) ? 1 : 0;
} else {
my ($l1,$r1) = map { $_->internal_id } $node1->each_Descendent;
my ($l2,$r2) = map { $_->internal_id } $node2->each_Descendent;
$m = $m[$l1][$l2] + $m[$r1][$r2];
if ($m[$l1][$r2] + $m[$r1][$l2] > $m) {
$m = $m[$l1][$r2] + $m[$r1][$l2];
}
if ($m[$n1][$l2] > $m) {
$m = $m[$n1][$l2];
}
if ($m[$n1][$r2] > $m) {
$m = $m[$n1][$r2];
}
if ($m[$l1][$n2] > $m) {
$m = $m[$l1][$n2];
}
if ($m[$r1][$n2] > $m) {
$m = $m[$r1][$n2];
}
}
}
$m[$n1][$n2] = $m;
}
}
return $m;
}
|
mast.size <- function (t1,t2) {
po1 <- postorder(t1)
po2 <- postorder(t2)
m <- matrix(0,nrow=length(po1),ncol=length(po2),dimnames=list(po1,po2))
for (i in po1) {
for (j in po2) {
if (length(t1$edge[which(t1$edge[,1]==i),1])==0) {
if (length(t2$edge[which(t2$edge[,1]==j),1])==0) {
if (t1$tip.label[i]==t2$tip.label[j]) m[i,j] <- 1
} else {
jj <- which(t2$tip.label==t1$tip.label[i])
if (is.ancestor(t2,j,jj)) m[i,j] <- 1
}
} else {
if (length(t2$edge[which(t2$edge[,1]==j),1])==0) {
ii <- which(t1$tip.label==t2$tip.label[j])
if (is.ancestor(t1,i,ii)) m[i,j] <- 1
} else {
l1 <- children(t1,i)[1]
r1 <- children(t1,i)[2]
l2 <- children(t2,j)[1]
r2 <- children(t2,j)[2]
m[i,j] <- max(m[l1,l2]+m[r1,r2],m[l1,r2]+m[r1,l2],
m[i,l2],m[i,r2],m[l1,j],m[r1,j])
}
}
}
}
m <- m[po1,po2]
dimnames(m) <- list(po1,po2)
m
}
|
sub mast {
my $tree1 = shift;
my $tree2 = shift;
my @m;
for my $node1 (@{ $tree1->Bio::Tree::Compatible::postorder_traversal }) {
my $n1 = $node1->internal_id;
for my $node2 (@{ $tree2->Bio::Tree::Compatible::postorder_traversal }) {
my $n2 = $node2->internal_id;
my @mm = ();
if ($node1->is_Leaf) {
if ($node2->is_Leaf) {
if ($node1->id eq $node2->id) {
@mm = ($node1->id);
}
} else {
my $label1 = $node1->id;
if (grep { /$label1/ } map { $_->id }
grep { $_->is_Leaf } $node2->get_all_Descendents) {
@mm = ($label1);
}
}
} else {
if ($node2->is_Leaf) {
my $label2 = $node2->id;
if (grep { /$label2/ } map { $_->id }
grep { $_->is_Leaf } $node1->get_all_Descendents) {
@mm = ($label2);
}
} else {
my ($l1,$r1) = map { $_->internal_id } $node1->each_Descendent;
my ($l2,$r2) = map { $_->internal_id } $node2->each_Descendent;
@mm = @{$m[$l1][$l2]};
push @mm, @{$m[$r1][$r2]};
if (scalar(@{$m[$l1][$r2]})+scalar(@{$m[$r1][$l2]})>scalar(@mm)) {
@mm = @{$m[$l1][$r2]};
push @mm, @{$m[$r1][$l2]};
}
if (scalar(@{$m[$n1][$l2]})>scalar(@mm)) {
@mm = @{$m[$n1][$l2]};
}
if (scalar(@{$m[$n1][$r2]})>scalar(@mm)) {
@mm = @{$m[$n1][$r2]};
}
if (scalar(@{$m[$l1][$n2]})>scalar(@mm)) {
@mm = @{$m[$l1][$n2]};
}
if (scalar(@{$m[$r1][$n2]})>scalar(@mm)) {
@mm = @{$m[$r1][$n2]};
}
}
}
$m[$n1][$n2] = \@mm;
}
}
return \@m;
}
|
mast <- function (t1,t2) {
po1 <- postorder(t1)
po2 <- postorder(t2)
m <- matrix(rep(list(),length(po1)),nrow=length(po1),ncol=length(po2),
dimnames=list(po1,po2))
for (i in po1) {
for (j in po2) {
mm <- list()
if (length(t1$edge[which(t1$edge[,1]==i),1])==0) {
if (length(t2$edge[which(t2$edge[,1]==j),1])==0) {
if (t1$tip.label[i]==t2$tip.label[j])
mm <- c(t1$tip.label[i])
} else {
jj <- which(t2$tip.label==t1$tip.label[i])
if (is.ancestor(t2,j,jj))
mm <- c(t1$tip.label[i])
}
} else {
if (length(t2$edge[which(t2$edge[,1]==j),1])==0) {
ii <- which(t1$tip.label==t2$tip.label[j])
if (is.ancestor(t1,i,ii))
mm <- c(t2$tip.label[j])
} else {
l1 <- children(t1,i)[1]
r1 <- children(t1,i)[2]
l2 <- children(t2,j)[1]
r2 <- children(t2,j)[2]
mm <- c(m[[l1,l2]],m[[r1,r2]])
if ( length(m[[l1,r2]]) + length(m[[r1,l2]]) > length(mm) )
mm <- c(m[[l1,r2]],m[[r1,l2]])
if ( length(m[[i,l2]]) > length(mm) )
mm <- m[[i,l2]]
if ( length(m[[i,r2]]) > length(mm) )
mm <- m[[i,r2]]
if ( length(m[[l1,j]]) > length(mm) )
mm <- m[[l1,j]]
if ( length(m[[r1,j]]) > length(mm) )
mm <- m[[r1,j]]
}
}
m[[i,j]] <- mm
}
}
unlist(m[[length(t1$tip.label)+1,length(t2$tip.label)+1]])
}
|
sub triplets_distance {
my $tree1 = shift;
my $tree2 = shift;
my @leaves = sort {$a->id cmp $b->id} $tree1->get_leaf_nodes;
my @labels = map { $_->id } @leaves;
my $dist = 0;
for (my $i = 0; $i < @leaves; $i++) {
for (my $j = $i+1; $j < @leaves; $j++) {
for (my $k = $j+1; $k < @leaves; $k++) {
my $t1 = triplet($tree1,$labels[$i],$labels[$j],$labels[$k]);
my $t2 = triplet($tree2,$labels[$i],$labels[$j],$labels[$k]);
$dist += 2 unless ($t1 eq $t2);
}
}
}
return $dist;
}
|
triplets.distance <- function (t1,t2) {
L <- sort(t1$tip.label)
d <- 0
for (i in L[1:(length(L)-2)]) {
for (j in L[(match(i,L)+1):(length(L)-1)]) {
for (k in L[(match(j,L)+1):length(L)]) {
str1 <- triplet(t1,i,j,k)
str2 <- triplet(t2,i,j,k)
if (str1 != str2) { d <- d + 2 }
}
}
}
d
}
|
sub quartets_distance {
my $tree1 = shift;
my $tree2 = shift;
my @leaves = sort {$a->id cmp $b->id} $tree1->get_leaf_nodes;
my @labels = map { $_->id } @leaves;
my $dist = 0;
for (my $i = 0; $i < @leaves; $i++) {
for (my $j = $i+1; $j < @leaves; $j++) {
for (my $k = $j+1; $k < @leaves; $k++) {
for (my $l = $k+1; $l < @leaves; $l++) {
my $q1 = quartet($tree1,
$labels[$i],$labels[$j],$labels[$k],$labels[$l]);
my $q2 = quartet($tree2,
$labels[$i],$labels[$j],$labels[$k],$labels[$l]);
if (partition_distance($q1,$q2) != 0) {
$dist += 2;
}
}
}
}
}
return $dist;
}
|
quartets.distance <- function (t1,t2) {
L <- sort(t1$tip.label)
d <- 0
for (i in L[1:(length(L)-3)]) {
for (j in L[(match(i,L)+1):(length(L)-2)]) {
for (k in L[(match(j,L)+1):(length(L)-1)]) {
for (l in L[(match(k,L)+1):length(L)]) {
q1 <- quartet(t1,i,j,k,l)
q2 <- quartet(t2,i,j,k,l)
if (dist.topo(q1,q2) != 0) { d <- d + 2 }
}
}
}
}
d
}
|
|
|
network.height <- function (net) {
V(net)$height <- 0
V(net)$visited <- FALSE
Q <- as.vector(V(net)[which(degree(net,mode="out")==0)-1])
while (length(Q)>0) {
j <- Q[1]
Q <- Q[-1]
V(net)[j]$visited <- TRUE
I <- as.vector(V(net)[nei(j,mode="in")])
for (i in I) {
V(net)[i]$height <- max(V(net)[i]$height,V(net)[j]$height+1)
if (all(V(net)[nei(i,mode="out")]$visited)) Q <- c(Q,i)
}
}
V(net)$height
}
|
use Bio::PhyloNetwork;
sub is_strict_ancestor {
my $net = shift;
my $u = shift;
my $v = shift;
my $dag = $net->graph->copy_graph;
my @roots = $dag->source_vertices;
my $root = shift @roots;
$dag->delete_vertex($u);
return !$dag->is_reachable($root,$v);
}
|
library(igraph)
is.strict.ancestor <- function (net,i,j) {
if (length(get.all.shortest.paths(net,
V(net)[i],V(net)[j],mode="out")) == 0) return(FALSE)
r <- V(net)[which(degree(net,mode="in")==0)-1]
if (i == r || i == j) return(TRUE)
net <- delete.vertices(net,i)
if (i < j) j <- j - 1 # account for deleted node
length(get.all.shortest.paths(net,r,V(net)[j],mode="out")) == 0
}
|
|
|
is.ancestor <- function (net,i,j) {
length(get.all.shortest.paths(net,V(net)[i],V(net)[j],mode="out")) != 0
}
|
sub CSA {
my $net = shift;
my $v = shift;
my $w = shift;
my $dag = $net->graph;
my @csa = ();
foreach my $u ( $net->nodes ) {
if ( $dag->is_reachable($u,$v) and $dag->is_reachable($u,$w) ) {
if (is_strict_ancestor($net,$u,$v)
or is_strict_ancestor($net,$u,$w)) {
push @csa, $u;
}
}
}
return \@csa;
}
|
CSA <- function (net,i,j) {
strict <- lapply(V(net),function (u) is.ancestor(net,u,i)
&& is.ancestor(net,u,j) && (is.strict.ancestor(net,u,i)
|| is.strict.ancestor(net,u,j)))
V(net)[unlist(strict)]
}
|
sub LCSA {
my $net = shift;
my $v = shift;
my $w = shift;
my %height = $net->heights;
my @CSA = @{ CSA($net,$v,$w) };
my $lcsa = shift @CSA;
for my $node ( @CSA ) {
if ( $height{$node} < $height{$lcsa} ) {
$lcsa = $node;
}
}
return $lcsa;
}
|
LCSA <- function (net,i,j) {
csa <- match(CSA(net,i,j),V(net))
V(net)[csa[which.min(network.height(net)[csa])]-1]
}
|
|
|
path.multiplicity <- function (net) {
leaves <- V(net)[which(degree(net,mode="out")==0)-1]
mu <- matrix(0,nrow=vcount(net),ncol=length(leaves),
dimnames=list(sort(V(net)$name),sort(leaves$name)))
for (v in sort(V(net)[leaves]$name)) mu[v,v] <- 1
V(net)$visited <- FALSE
Q <- as.vector(V(net)[which(degree(net,mode="out")==0)-1])
while (length(Q)>0) {
j <- Q[1]
Q <- Q[-1]
V(net)[j]$visited <- TRUE
I <- as.vector(V(net)[nei(j,mode="in")])
for (i in I) {
mu[V(net)[i]$name,] <- mu[V(net)[i]$name,] + mu[V(net)[j]$name,]
if (all(V(net)[nei(i,mode="out")]$visited)) Q <- c(Q,i)
}
}
mu
}
|
|
|
lex.cmp <- function(vec1,vec2) {
index <- which.min(vec1 == vec2)
as.numeric(sign(vec1[index] - vec2[index]))
}
path.multiplicity.distance <- function (net1,net2) {
mu1 <- path.multiplicity(net1)
mu1 <- mu1[do.call("order",lapply(1:ncol(mu1),function (j) mu1[,j])),]
mu2 <- path.multiplicity(net2)
mu2 <- mu2[do.call("order",lapply(1:ncol(mu2),function (j) mu2[,j])),]
i1 <- 1
i2 <- 1
c <- 0
while (i1 <= nrow(mu1) && i2 <= nrow(mu2)) {
lex <- lex.cmp(mu1[i1,],mu2[i2,])
if ( lex == -1 ) {
i1 <- i1 + 1
} else if ( lex == 1 ) {
i2 <- i2 + 1
} else {
i1 <- i1 + 1
i2 <- i2 + 1
c <- c + 1
}
}
nrow(mu1)+nrow(mu2)-2*c
}
|
|
|
tripartitions <- function (net) {
leaves <- V(net)[which(degree(net,mode="out")==0)-1]
theta <- matrix("",nrow=vcount(net),ncol=length(leaves),
dimnames=list(sort(V(net)$name),sort(leaves$name)))
for (i in V(net)) {
for (j in leaves) {
if ( is.ancestor(net,i,j) ) {
if ( is.strict.ancestor(net,i,j) ) {
theta[V(net)[i]$name,V(net)[j]$name] <- "A"
} else {
theta[V(net)[i]$name,V(net)[j]$name] <- "B"
}
} else {
theta[V(net)[i]$name,V(net)[j]$name] <- "C"
}
}
}
theta
}
|
use Bio::PhyloNetwork;
sub tripartition_distance {
my $net1 = shift;
my $net2 = shift;
$net1->compute_tripartitions() unless defined $net1->{tripartitions};
$net2->compute_tripartitions() unless defined $net2->{tripartitions};
my @tri1 = sort map {$net1->{tripartitions}->{$_}} $net1->nodes;
my @tri2 = sort map {$net2->{tripartitions}->{$_}} $net2->nodes;
my $i1 = 0;
my $i2 = 0;
my $c = 0;
while ($i1 < scalar @tri1 && $i2 < scalar @tri2) {
if ($tri1[$i1] lt $tri2[$i2]) {
$i1++;
} elsif ($tri1[$i1] gt $tri2[$i2]) {
$i2++;
} else {
$i1++;
$i2++;
$c++;
}
}
return scalar @tri1+scalar @tri2-2*$c;
}
|
library(igraph)
lex.cmp.char <- function (vec1,vec2) {
for (j in 1:length(vec1)) {
if (vec1[j] < vec2[j]) { return(-1) }
if (vec1[j] > vec2[j]) { return(1) }
}
return(0)
}
tripartition.distance <- function (net1,net2) {
theta1 <- tripartitions(net1)
theta1 <- theta1[do.call("order",lapply(1:ncol(theta1),
function (j) theta1[,j])),]
theta2 <- tripartitions(net2)
theta2 <- theta2[do.call("order",lapply(1:ncol(theta2),
function (j) theta2[,j])),]
i1 <- 1
i2 <- 1
c <- 0
while (i1 <= nrow(theta1) && i2 <= nrow(theta2)) {
lex <- lex.cmp.char(theta1[i1,],theta2[i2,])
if ( lex == -1 ) {
i1 <- i1 + 1
} else if ( lex == 1 ) {
i2 <- i2 + 1
} else {
i1 <- i1 + 1
i2 <- i2 + 1
c <- c + 1
}
}
nrow(theta1)+nrow(theta2)-2*c
}
|
sub nodal_distance {
my $net1 = shift;
my $net2 = shift;
my @L = $net1->leaves;
my $dist = 0;
for (my $i = 0; $i < @L; $i++) {
for (my $j = $i+1; $j < @L; $j++) {
my $lcsa1 = LCSA($net1,$L[$i],$L[$j]);
my $lcsa2 = LCSA($net2,$L[$i],$L[$j]);
my @p1 = $net1->graph->SP_Dijkstra($lcsa1,$L[$i]);
my @p2 = $net2->graph->SP_Dijkstra($lcsa2,$L[$i]);
my $d1 = @p1 - 1;
my $d2 = @p2 - 1;
$dist += abs($d1 - $d2);
@p1 = $net1->graph->SP_Dijkstra($lcsa1,$L[$j]);
@p2 = $net2->graph->SP_Dijkstra($lcsa2,$L[$j]);
$d1 = @p1 - 1;
$d2 = @p2 - 1;
$dist += abs($d1 - $d2);
}
}
return $dist;
}
|
library(igraph)
nodal.distance <- function (net1,net2) {
l1 <- V(net1)[which(degree(net1,mode="out")==0)-1]
n1 <- matrix(0,nrow=length(l1),ncol=length(l1),
dimnames=list(sort(l1$name),sort(l1$name)))
l2 <- V(net2)[which(degree(net2,mode="out")==0)-1]
n2 <- matrix(0,nrow=length(l2),ncol=length(l2),
dimnames=list(sort(l2$name),sort(l2$name)))
for (i in l1) {
for (j in c(l1)[match(i,l1):length(l1)]) {
lcsa <- LCSA(net1,i,j)
paths <- get.shortest.paths(net1,lcsa,i,mode="out")
n1[V(net1)[i]$name,V(net1)[j]$name] <- length(paths[[1]])-1
paths <- get.shortest.paths(net1,lcsa,j,mode="out")
n1[V(net1)[j]$name,V(net1)[i]$name] <- length(paths[[1]])-1
}
}
for (i in l2) {
for (j in c(l2)[match(i,l2):length(l2)]) {
lcsa <- LCSA(net2,i,j)
paths <- get.shortest.paths(net2,lcsa,i,mode="out")
n2[V(net2)[i]$name,V(net2)[j]$name] <- length(paths[[1]])-1
paths <- get.shortest.paths(net2,lcsa,j,mode="out")
n2[V(net2)[j]$name,V(net2)[i]$name] <- length(paths[[1]])-1
}
}
sum(abs(n1-n2))
}
|
sub explode_rec {
my ($self,$trees) = @_;
my @h = $self->hybrid_nodes;
if (scalar @h) {
my $v = shift @h;
for my $u ($self->{graph}->predecessors($v)) {
$self->{graph}->delete_edge($u,$v);
$self->explode_rec($trees);
$self->{graph}->add_edge($u,$v);
}
} else {
my $io = IO::String->new($self->eNewick);
my $treeio = Bio::TreeIO->new(-format => 'newick', -fh => $io);
my $tree = $treeio->next_tree;
$tree->contract_linear_paths;
push @{$trees}, $tree;
}
}
sub explode {
my ($self) = @_;
my @trees;
$self->explode_rec(\@trees);
return @trees;
}
|
network.to.newick <- function (net) {
r <- V(net)[which(degree(net,mode="in")==0)-1]
newick <- obtain.newick(net,r,c())
paste(c(newick,";"),collapse="")
}
obtain.newick <- function (net,v,newick) {
if (length(V(net)[nei(v,"out")]) != 0) {
if ( length(V(net)[nei(v,"in")]) != 0
&& match(v,V(net)[nei(V(net)[nei(v,"in")],"out")]) != 1 )
newick <- c(newick,",")
newick <- c(newick,"(")
}
for (w in V(net)[nei(v,"out")]) {
newick <- obtain.newick(net,w,newick)
}
if (length(V(net)[nei(v,"out")]) != 0)
newick <- c(newick,")")
if ( length(V(net)[nei(v,"in")]) != 0
&& length(V(net)[nei(v,"out")]) == 0
&& match(v,V(net)[nei(V(net)[nei(v,"in")],"out")]) != 1 )
newick <- c(newick,",")
c(newick,V(net)[v]$name)
}
network.to.tree <- function (net) {
V(net)$del <- FALSE
E(net)$del <- FALSE
r <- V(net)[which(degree(net,mode="in")==0)-1]
net <- postorder.traversal(net,r)
net <- delete.vertices(net,V(net)[del])
net <- delete.edges(net,E(net)[del])
network.to.newick(net)
}
postorder.traversal <- function (net,v) {
for (w in V(net)[nei(v,"out")]) {
net <- postorder.traversal(net,w)
}
VW <- E(net)[from == v & del==FALSE]
if (length(c(VW)) == 1) {
u <- V(net)[nei(v,"in")]
w <- get.edge(net,VW)[2]
E(net,path=c(V(net)[u],V(net)[v]))$del <- TRUE
E(net,path=c(V(net)[v],V(net)[w]))$del <- TRUE
V(net)[v]$del <- TRUE
net <- add.edges(net,c(V(net)[u],V(net)[w]))
E(net,path=c(V(net)[u],V(net)[w]))$del <- FALSE
}
net
}
explode <- function (net) {
trees <- c()
H <- V(net)[which(degree(net,mode="in")>1)-1]
if (length(H) == 0) {
trees <- c(trees,network.to.tree(net))
} else {
v <- c(H)[1]
for (u in V(net)[nei(v,"in")]) {
new <- delete.edges(net,E(net,P=c(u,v)))
trees <- c(trees,explode(new))
}
}
trees
}
|
sub error_ratio {
my $net1 = shift;
my $net2 = shift;
my @trees1 = $net1->explode;
my @trees2 = $net2->explode;
my $FN = 0;
my $FP = 0;
for my $tree1 (@trees1) {
my $found = 0; # false
for my $tree2 (@trees2) {
if (partition_distance($tree1,$tree2) == 0) {
$found = 1; # true
}
}
if (!$found) { $FN++; }
}
for my $tree2 (@trees2) {
my $found = 0; # false
for my $tree1 (@trees1) {
if (partition_distance($tree1,$tree2) == 0) {
$found = 1; # true
}
}
if (!$found) { $FP++; }
}
return (($FN/@trees1)+($FP/@trees2))/2;
}
|
error.rate <- function (net1,net2) {
trees1 <- explode(net1)
trees2 <- explode(net2)
FN <- 0
FP <- 0
for (t1 in trees1) {
tree1 <- read.tree(text=t1)
found <- FALSE
for (t2 in trees2) {
tree2 <- read.tree(text=t2)
if (dist.topo(tree1,tree2) == 0) {
found <- TRUE
break
}
}
if (!found) FN <- FN+1
}
for (t2 in trees2) {
tree2 <- read.tree(text=t2)
found <- FALSE
for (t1 in trees1) {
tree1 <- read.tree(text=t1)
if (dist.topo(tree1,tree2) == 0) {
found <- TRUE
break
}
}
if (!found) FP <- FP+1
}
(FN/length(trees1)+FP/length(trees2))/2
}
|
sub triplet {
my ($net,@triple) = @_;
my ($i,$j,$k) = @triple;
my @L = $net->leaves;
my $ij = LCSA($net,$L[$i],$L[$j]);
my $ik = LCSA($net,$L[$i],$L[$k]);
my $jk = LCSA($net,$L[$j],$L[$k]);
my $t;
if ($net->graph->SP_Dijkstra($ij,$ik)) {
if ($net->graph->SP_Dijkstra($ik,$ij)) {
$t = "(($L[$j],$L[$k]),$L[$i]);";
} else {
if ($net->graph->SP_Dijkstra($jk,$ij)) {
$t = "(($L[$i],$L[$k]),$L[$j]);";
} else {
$t = "(($L[$i],($L[$k])#H1),(#H1,$L[$j]));";
}
}
} else {
if ($net->graph->SP_Dijkstra($ik,$ij)) {
if ($net->graph->SP_Dijkstra($jk,$ij)) {
$t = "(($L[$i],$L[$j]),$L[$k]);";
} else {
$t = "(($L[$i],($L[$j])#H1),(#H1,$L[$k]));";
}
} else {
$t = "(($L[$j],($L[$i])#H1),(#H1,$L[$k]));";
}
}
return $t;
}
|
triplet <- function (net,i,j,k) {
ij <- LCSA(net,i,j)
ik <- LCSA(net,i,k)
jk <- LCSA(net,j,k)
ij.ancestor.of.ik <- length(get.all.shortest.paths(net,
ij,ik,mode="out")) != 0
ik.ancestor.of.ij <- length(get.all.shortest.paths(net,
ik,ij,mode="out")) != 0
ij.ancestor.of.jk <- length(get.all.shortest.paths(net,
ij,jk,mode="out")) != 0
jk.ancestor.of.ij <- length(get.all.shortest.paths(net,
jk,ij,mode="out")) != 0
if (ij.ancestor.of.ik)
if (ik.ancestor.of.ij)
paste("((",V(net)[j]$name,",",V(net)[k]$name,"),",
V(net)[i]$name,");",sep="")
else
if (jk.ancestor.of.ij)
paste("((",V(net)[i]$name,",",V(net)[k]$name,"),",
V(net)[j]$name,");",sep="")
else
paste("((",V(net)[i]$name,",(",V(net)[k]$name,"))#H,(#H,",
V(net)[j]$name,"));",sep="")
else
if (ik.ancestor.of.ij)
if (jk.ancestor.of.ij)
paste("((",V(net)[i]$name,",",V(net)[j]$name,"),",
V(net)[k]$name,");",sep="")
else
paste("((",V(net)[i]$name,",(",V(net)[j]$name,"))#H,(#H,",
V(net)[k]$name,"));",sep="")
else
paste("((",V(net)[j]$name,",(",V(net)[i]$name,"))#H,(#H,",
V(net)[k]$name,"));",sep="")
}
|
sub triplet {
my ($net,@triple) = @_;
my ($i,$j,$k) = @triple;
my @L = $net->leaves;
my $ij = LCSA($net,$L[$i],$L[$j]);
my $ik = LCSA($net,$L[$i],$L[$k]);
my $jk = LCSA($net,$L[$j],$L[$k]);
my @R;
$R[0] = 1;
$R[1] = $net->graph->SP_Dijkstra($ij,$ik) ? 1 : 0;
$R[2] = $net->graph->SP_Dijkstra($ij,$jk) ? 1 : 0;
$R[3] = $net->graph->SP_Dijkstra($ik,$ij) ? 1 : 0;
$R[4] = 1;
$R[5] = $net->graph->SP_Dijkstra($ik,$jk) ? 1 : 0;
$R[6] = $net->graph->SP_Dijkstra($jk,$ij) ? 1 : 0;
$R[7] = $net->graph->SP_Dijkstra($jk,$ik) ? 1 : 0;
$R[8] = 1;
return join "", @R;
}
|
triplet <- function (net,i,j,k) {
ij <- LCSA(net,i,j)
ik <- LCSA(net,i,k)
jk <- LCSA(net,j,k)
L <- c(V(net)[i]$name,V(net)[j]$name,V(net)[k]$name)
R <- matrix(rep(NA,9),nrow=3,dimnames=list(L,L))
R[1,1] <- TRUE
R[1,2] <- length(get.all.shortest.paths(net,ij,ik,mode="out")) != 0
R[1,3] <- length(get.all.shortest.paths(net,ij,jk,mode="out")) != 0
R[2,1] <- length(get.all.shortest.paths(net,ik,ij,mode="out")) != 0
R[2,2] <- TRUE
R[2,3] <- length(get.all.shortest.paths(net,ik,jk,mode="out")) != 0
R[3,1] <- length(get.all.shortest.paths(net,jk,ij,mode="out")) != 0
R[3,2] <- length(get.all.shortest.paths(net,jk,ik,mode="out")) != 0
R[3,3] <- TRUE
R
}
|
sub bottom_up_subgraph {
my $graph = shift;
my $node = shift;
my $sub = Graph->new;
my @Q = ($node);
while (@Q) {
$node = shift @Q;
$sub->add_vertex($node);
push @Q, $graph->successors($node);
}
for my $edge ($graph->edges) {
if ($sub->has_vertex(@$edge[0]) &&
$sub->has_vertex(@$edge[1])) {
$sub->add_edge(@$edge[0],@$edge[1]);
}
}
while (my @V = grep {
$sub->in_degree($_)==1 &&
$sub->out_degree($_)==1 }
$sub->interior_vertices) {
my $v = shift @V;
my @U = $sub->predecessors($v);
my $u = shift @U;
my @W = $sub->successors($v);
my $w = shift @W;
$sub->add_edge($u,$w);
$sub->delete_vertex($v);
}
return $sub;
}
|
bottom.up.subgraph <- function (net,i) {
Q <- c(i)
S <- c()
while (length(Q)>0) {
i <- Q[1]
Q <- Q[-1]
S <- c(S,i)
Q <- c(Q,as.vector(V(net)[nei(i,mode="out")]))
}
g <- subgraph(net,unique(S))
while (length(V(g)[which(degree(g,mode="in")==1 &
degree(g,mode="out")==1)-1])>0) {
D <- V(g)[which(degree(g,mode="in")==1 &
degree(g,mode="out")==1)-1]
v <- head(c(D),n=1) # elementary node
u <- V(g)[nei(v,"in")]
w <- V(g)[nei(v,"out")]
g <- add.edges(g,c(V(g)[u],V(g)[w]))
g <- delete.vertices(g,V(g)[v])
}
g
}
|
sub graph_isomorphic {
my $g1 = shift;
my $g2 = shift;
my $found = 0;
if ($g1->could_be_isomorphic($g2)) {
my $C1 = cluster_map($g1);
my $C2 = cluster_map($g2);
my %X;
for my $node1 ($g1->vertices) {
for my $node2 ($g2->vertices) {
my $lc = List::Compare->new(@{$C1}{$node1},
@{$C2}{$node2});
if ($lc->is_LequivalentR) {
push @{$X{$node1}}, $node2;
}
}
}
my $node1 = undef;
my %M;
my @V1 = sort $g1->vertices;
backtrack($g1,$g2,\%X,$node1,\%M,\@V1,\$found);
}
return $found;
}
|
|
sub backtrack {
my ($g1,$g2,$X,$node1,$M,$V1,$found) = @_;
my %X = %{$X};
my %M = %{$M};
my @V1 = @{$V1};
unless ($$found) {
for my $prev1 (keys %M) {
if ($g1->has_edge($prev1,$node1) &&
!$g2->has_edge($M{$prev1},$M{$node1}) ||
$g1->has_edge($node1,$prev1) &&
!$g2->has_edge($M{$node1},$M{$prev1}) ||
!$g1->has_edge($prev1,$node1) &&
$g2->has_edge($M{$prev1},$M{$node1}) ||
!$g1->has_edge($node1,$prev1) &&
$g2->has_edge($M{$node1},$M{$prev1})) {
return;
}
}
if (scalar @V1) {
$node1 = shift @V1;
for my $node2 (@{$X{$node1}}) {
$M{$node1} = $node2;
backtrack($g1,$g2,$X,$node1,\%M,\@V1,$found);
$M{$node1} = undef;
}
} else {
$$found = 1;
}
}
}
|
|
sub cluster_map {
my $graph = shift;
my %C;
for my $node ($graph->vertices) {
for my $leaf ($graph->successorless_vertices) {
if ($graph->is_reachable($node,$leaf)) {
push @{$C{$node}}, $leaf;
}
}
}
return \%C;
}
|
|
sub max_agreement_subgraph {
my $net1 = shift;
my $net2 = shift;
my $graph1 = $net1->graph;
my $graph2 = $net2->graph;
my $common = Graph->new;
my $size = 0;
for my $node1 ($graph1->interior_vertices) {
my $sub1 = subgraph($graph1,$node1);
for my $node2 ($graph2->interior_vertices) {
my $sub2 = subgraph($graph2,$node2);
if (graph_isomorphic($sub1,$sub2)) {
my $leaves1 = $sub1->successorless_vertices;
if ($leaves1 > $size) {
$common = $sub1;
$size = $leaves1;
}
}
}
}
return $common;
}
|
max.agreement.subgraph <- function (net1,net2) {
max <- graph.empty()
max.size <- 0
for (i in V(net1)) {
g1 <- bottom.up.subgraph(net1,i)
for (j in V(net2)) {
g2 <- bottom.up.subgraph(net2,j)
if (graph.isomorphic(g1,g2)) {
L <- V(g1)[which(degree(g1,mode="out")==0)-1]
if (length(L)>max.size) {
max <- g1
max.size <- length(L)
}
}
}
}
max
}
|
sub triplets_distance {
my $net1 = shift;
my $net2 = shift;
my $comp = Array::Compare->new;
my @L = $net1->leaves;
my $dist = 0;
for (my $i = 0; $i < @L; $i++) {
for (my $j = $i+1; $j < @L; $j++) {
for (my $k = $j+1; $k < @L; $k++) {
my $t1 = triplet($net1,$i,$j,$k);
my $t2 = triplet($net2,$i,$j,$k);
$dist += 2 unless ($t1 eq $t2);
}
}
}
return $dist;
}
|
triplets.distance <- function (net1,net2) {
L <- sort(V(net1)[which(degree(net1,mode="out")==0)-1]$name)
d <- 0
for (i in L[1:(length(L)-2)]) {
i1 <- V(net1)[V(net1)$name == i]
i2 <- V(net2)[V(net2)$name == i]
for (j in L[(match(i,L)+1):(length(L)-1)]) {
j1 <- V(net1)[V(net1)$name == j]
j2 <- V(net2)[V(net2)$name == j]
for (k in L[(match(j,L)+1):length(L)]) {
k1 <- V(net1)[V(net1)$name == k]
k2 <- V(net2)[V(net2)$name == k]
t1 <- triplet(net1,i1,j1,k1)
t2 <- triplet(net2,i2,j2,k2)
if (!isTRUE(all.equal(t1,t2))) { d <- d + 2 }
}
}
}
d
}
|
sub rna_to_protein {
my $rna = shift;
my $protein;
my $i = 0;
while ($i < length($rna) - 2 &&
substr($rna,$i,3) ne "AUG") { # start codon
$i++;
}
$i += 3; # skip the start codon
while ($i < length($rna) - 2 &&
substr($rna,$i,3) ne "UAA" &&
substr($rna,$i,3) ne "UAG" &&
substr($rna,$i,3) ne "UGA") {
my $codon = substr($rna,$i,3);
$protein .= codon_to_amino_acid($codon);
$i += 3;
}
return $protein;
}
|
|
sub codon_to_amino_acid {
my $codon = shift;
if ($codon eq "UUU") { return "F" } # Phe
elsif ($codon eq "UUC") { return "F" } # Phe
elsif ($codon eq "UUA") { return "L" } # Leu
elsif ($codon eq "UUG") { return "L" } # Leu
elsif ($codon eq "UCU") { return "S" } # Ser
elsif ($codon eq "UCC") { return "S" } # Ser
elsif ($codon eq "UCA") { return "S" } # Ser
elsif ($codon eq "UCG") { return "S" } # Ser
elsif ($codon eq "UAU") { return "Y" } # Tyr
elsif ($codon eq "UAC") { return "Y" } # Tyr
elsif ($codon eq "UAA") { return "-" } # stop
elsif ($codon eq "UAG") { return "-" } # stop
elsif ($codon eq "UGU") { return "C" } # Cys
elsif ($codon eq "UGC") { return "C" } # Cys
elsif ($codon eq "UGA") { return "-" } # stop
elsif ($codon eq "UGG") { return "W" } # Trp
elsif ($codon eq "CUU") { return "L" } # Leu
elsif ($codon eq "CUC") { return "L" } # Leu
elsif ($codon eq "CUA") { return "L" } # Leu
elsif ($codon eq "CUG") { return "L" } # Leu
elsif ($codon eq "CCU") { return "P" } # Pro
elsif ($codon eq "CCC") { return "P" } # Pro
elsif ($codon eq "CCA") { return "P" } # Pro
elsif ($codon eq "CCG") { return "P" } # Pro
elsif ($codon eq "CAU") { return "H" } # His
elsif ($codon eq "CAC") { return "H" } # His
elsif ($codon eq "CAA") { return "Q" } # Gln
elsif ($codon eq "CAG") { return "Q" } # Gln
elsif ($codon eq "CGU") { return "R" } # Arg
elsif ($codon eq "CGC") { return "R" } # Arg
elsif ($codon eq "CGA") { return "R" } # Arg
elsif ($codon eq "CGG") { return "R" } # Arg
elsif ($codon eq "AUU") { return "I" } # Ile
elsif ($codon eq "AUC") { return "I" } # Ile
elsif ($codon eq "AUA") { return "I" } # Ile
elsif ($codon eq "AUG") { return "M" } # Met
elsif ($codon eq "ACU") { return "T" } # Thr
elsif ($codon eq "ACC") { return "T" } # Thr
elsif ($codon eq "ACA") { return "T" } # Thr
elsif ($codon eq "ACG") { return "T" } # Thr
elsif ($codon eq "AAU") { return "N" } # Asn
elsif ($codon eq "AAC") { return "N" } # Asn
elsif ($codon eq "AAA") { return "K" } # Lys
elsif ($codon eq "AAG") { return "K" } # Lys
elsif ($codon eq "AGU") { return "S" } # Ser
elsif ($codon eq "AGC") { return "S" } # Ser
elsif ($codon eq "AGA") { return "R" } # Arg
elsif ($codon eq "AGG") { return "R" } # Arg
elsif ($codon eq "GUU") { return "V" } # Val
elsif ($codon eq "GUC") { return "V" } # Val
elsif ($codon eq "GUA") { return "V" } # Val
elsif ($codon eq "GUG") { return "V" } # Val
elsif ($codon eq "GCU") { return "A" } # Ala
elsif ($codon eq "GCC") { return "A" } # Ala
elsif ($codon eq "GCA") { return "A" } # Ala
elsif ($codon eq "GCG") { return "A" } # Ala
elsif ($codon eq "GAU") { return "D" } # Asp
elsif ($codon eq "GAC") { return "D" } # Asp
elsif ($codon eq "GAA") { return "E" } # Glu
elsif ($codon eq "GAG") { return "E" } # Glu
elsif ($codon eq "GGU") { return "G" } # Gly
elsif ($codon eq "GGC") { return "G" } # Gly
elsif ($codon eq "GGA") { return "G" } # Gly
elsif ($codon eq "GGG") { return "G" } # Gly
else { return "*" }
}
|
|
my %codon2aa = ( "UUU" => "F", # Phe "UUC" => "F", # Phe "UUA" => "L", # Leu "UUG" => "L", # Leu "UCU" => "S", # Ser "UCC" => "S", # Ser "UCA" => "S", # Ser "UCG" => "S", # Ser "UAU" => "Y", # Tyr "UAC" => "Y", # Tyr "UAA" => "-", # stop "UAG" => "-", # stop "UGU" => "C", # Cys "UGC" => "C", # Cys "UGA" => "-", # stop "UGG" => "W", # Trp "CUU" => "L", # Leu "CUC" => "L", # Leu "CUA" => "L", # Leu "CUG" => "L", # Leu "CCU" => "P", # Pro "CCC" => "P", # Pro "CCA" => "P", # Pro "CCG" => "P", # Pro "CAU" => "H", # His "CAC" => "H", # His "CAA" => "Q", # Gln "CAG" => "Q", # Gln "CGU" => "R", # Arg "CGC" => "R", # Arg "CGA" => "R", # Arg "CGG" => "R", # Arg "AUU" => "I", # Ile "AUC" => "I", # Ile "AUA" => "I", # Ile "AUG" => "M", # Met "ACU" => "T", # Thr "ACC" => "T", # Thr "ACA" => "T", # Thr "ACG" => "T", # Thr "AAU" => "N", # Asn "AAC" => "N", # Asn "AAA" => "K", # Lys "AAG" => "K", # Lys "AGU" => "S", # Ser "AGC" => "S", # Ser "AGA" => "R", # Arg "AGG" => "R", # Arg "GUU" => "V", # Val "GUC" => "V", # Val "GUA" => "V", # Val "GUG" => "V", # Val "GCU" => "A", # Ala "GCC" => "A", # Ala "GCA" => "A", # Ala "GCG" => "A", # Ala "GAU" => "D", # Asp "GAC" => "D", # Asp "GAA" => "E", # Glu "GAG" => "E", # Glu "GGU" => "G", # Gly "GGC" => "G", # Gly "GGA" => "G", # Gly "GGG" => "G" # Gly ); |
|
sub rna_to_protein {
my $rna = shift;
my $protein;
my $aa;
my $i = 0;
while ($i < length($rna) - 2 &&
substr($rna,$i,3) ne "AUG") { # start codon
$i++;
}
$i += 3; # skip the start codon
while ($i < length($rna) - 2 &&
substr($rna,$i,3) ne "UAA" &&
substr($rna,$i,3) ne "UAG" &&
substr($rna,$i,3) ne "UGA") {
my $codon = substr($rna,$i,3);
if (defined $codon2aa{$codon}) {
$aa = $codon2aa{$codon};
} else {
$aa = "*";
}
$protein .= $aa;
$i += 3;
}
return $protein;
}
|
|
package DNA;
sub reverse_complement {
my $seq = shift;
$seq = reverse $seq;
$seq =~ tr/ACGT/TGCA/;
return $seq;
}
package RNA;
sub reverse_complement {
my $seq = shift;
$seq = reverse $seq;
$seq =~ tr/ACGU/UGCA/;
return $seq;
}
1;
|
|
|
|
rna.to.protein <- function (rna) {
protein <- ""
i <- 1
while (i < nchar(rna)-1 &&
substr(rna,i,i+2) != "AUG") { # start codon
i <- i+1
}
i <- i+3 # skip the start codon
while (i < nchar(rna)-1 &&
substr(rna,i,i+2) != "UAA" &&
substr(rna,i,i+2) != "UAG" &&
substr(rna,i,i+2) != "UGA") {
codon <- substr(rna,i,i+2)
protein <- paste(protein,
codon.to.amino.acid(codon),sep="")
i <- i+3
}
return(protein)
}
|
|
|
codon.to.amino.acid <- function (codon) {
if (codon == "UUU") return("F") # Phe
else if (codon == "UUC") return("F") # Phe
else if (codon == "UUA") return("L") # Leu
else if (codon == "UUG") return("L") # Leu
else if (codon == "UCU") return("S") # Ser
else if (codon == "UCC") return("S") # Ser
else if (codon == "UCA") return("S") # Ser
else if (codon == "UCG") return("S") # Ser
else if (codon == "UAU") return("Y") # Tyr
else if (codon == "UAC") return("Y") # Tyr
else if (codon == "UAA") return("-") # stop
else if (codon == "UAG") return("-") # stop
else if (codon == "UGU") return("C") # Cys
else if (codon == "UGC") return("C") # Cys
else if (codon == "UGA") return("-") # stop
else if (codon == "UGG") return("W") # Trp
else if (codon == "CUU") return("L") # Leu
else if (codon == "CUC") return("L") # Leu
else if (codon == "CUA") return("L") # Leu
else if (codon == "CUG") return("L") # Leu
else if (codon == "CCU") return("P") # Pro
else if (codon == "CCC") return("P") # Pro
else if (codon == "CCA") return("P") # Pro
else if (codon == "CCG") return("P") # Pro
else if (codon == "CAU") return("H") # His
else if (codon == "CAC") return("H") # His
else if (codon == "CAA") return("Q") # Gln
else if (codon == "CAG") return("Q") # Gln
else if (codon == "CGU") return("R") # Arg
else if (codon == "CGC") return("R") # Arg
else if (codon == "CGA") return("R") # Arg
else if (codon == "CGG") return("R") # Arg
else if (codon == "AUU") return("I") # Ile
else if (codon == "AUC") return("I") # Ile
else if (codon == "AUA") return("I") # Ile
else if (codon == "AUG") return("M") # Met
else if (codon == "ACU") return("T") # Thr
else if (codon == "ACC") return("T") # Thr
else if (codon == "ACA") return("T") # Thr
else if (codon == "ACG") return("T") # Thr
else if (codon == "AAU") return("N") # Asn
else if (codon == "AAC") return("N") # Asn
else if (codon == "AAA") return("K") # Lys
else if (codon == "AAG") return("K") # Lys
else if (codon == "AGU") return("S") # Ser
else if (codon == "AGC") return("S") # Ser
else if (codon == "AGA") return("R") # Arg
else if (codon == "AGG") return("R") # Arg
else if (codon == "GUU") return("V") # Val
else if (codon == "GUC") return("V") # Val
else if (codon == "GUA") return("V") # Val
else if (codon == "GUG") return("V") # Val
else if (codon == "GCU") return("A") # Ala
else if (codon == "GCC") return("A") # Ala
else if (codon == "GCA") return("A") # Ala
else if (codon == "GCG") return("A") # Ala
else if (codon == "GAU") return("D") # Asp
else if (codon == "GAC") return("D") # Asp
else if (codon == "GAA") return("E") # Glu
else if (codon == "GAG") return("E") # Glu
else if (codon == "GGU") return("G") # Gly
else if (codon == "GGC") return("G") # Gly
else if (codon == "GGA") return("G") # Gly
else if (codon == "GGG") return("G") # Gly
else return("*")
}
|
|
|
codon2aa <- c( "UUU" = "F", # Phe "UUC" = "F", # Phe "UUA" = "L", # Leu "UUG" = "L", # Leu "UCU" = "S", # Ser "UCC" = "S", # Ser "UCA" = "S", # Ser "UCG" = "S", # Ser "UAU" = "Y", # Tyr "UAC" = "Y", # Tyr "UAA" = "-", # stop "UAG" = "-", # stop "UGU" = "C", # Cys "UGC" = "C", # Cys "UGA" = "-", # stop "UGG" = "W", # Trp "CUU" = "L", # Leu "CUC" = "L", # Leu "CUA" = "L", # Leu "CUG" = "L", # Leu "CCU" = "P", # Pro "CCC" = "P", # Pro "CCA" = "P", # Pro "CCG" = "P", # Pro "CAU" = "H", # His "CAC" = "H", # His "CAA" = "Q", # Gln "CAG" = "Q", # Gln "CGU" = "R", # Arg "CGC" = "R", # Arg "CGA" = "R", # Arg "CGG" = "R", # Arg "AUU" = "I", # Ile "AUC" = "I", # Ile "AUA" = "I", # Ile "AUG" = "M", # Met "ACU" = "T", # Thr "ACC" = "T", # Thr "ACA" = "T", # Thr "ACG" = "T", # Thr "AAU" = "N", # Asn "AAC" = "N", # Asn "AAA" = "K", # Lys "AAG" = "K", # Lys "AGU" = "S", # Ser "AGC" = "S", # Ser "AGA" = "R", # Arg "AGG" = "R", # Arg "GUU" = "V", # Val "GUC" = "V", # Val "GUA" = "V", # Val "GUG" = "V", # Val "GCU" = "A", # Ala "GCC" = "A", # Ala "GCA" = "A", # Ala "GCG" = "A", # Ala "GAU" = "D", # Asp "GAC" = "D", # Asp "GAA" = "E", # Glu "GAG" = "E", # Glu "GGU" = "G", # Gly "GGC" = "G", # Gly "GGA" = "G", # Gly "GGG" = "G" # Gly ) |
|
|
rna.to.protein <- function (rna) {
protein <- ""
i <- 1
while (i < nchar(rna)-1 &&
substr(rna,i,i+2) != "AUG") { # start codon
i <- i+1
}
i <- i+3 # skip the start codon
while (i < nchar(rna)-1 &&
substr(rna,i,i+2) != "UAA" &&
substr(rna,i,i+2) != "UAG" &&
substr(rna,i,i+2) != "UGA") {
codon <- substr(rna,i,i+2)
if (is.na(codon2aa[codon])) {
aa <- "*"
} else {
aa <- codon2aa[codon]
}
protein <- paste(protein,aa,sep="")
i <- i+3
}
return(protein)
}
|
|
|
reverse.complement <- function (seq)
UseMethod("reverse.complement")
reverse.complement.dna <- function (seq) {
rev <- paste(rev(unlist(strsplit(seq,split=""))),sep="",collapse="")
chartr("ACGT","TGCA",rev)
}
reverse.complement.rna <- function (seq) {
rev <- paste(rev(unlist(strsplit(seq,split=""))),sep="",collapse="")
chartr("ACGU","UGCA",rev)
}
|