sub extract_open_reading_frames { my $seq = shift; for my $r (0,1,2) { for (my $i = $r; $i <= length($seq)-3; $i += 3) { if (substr($seq,$i,3) eq "ATG") { my $j = $i+3; while ($j <= length($seq)-3 && substr($seq,$j,3) ne "TAA" && substr($seq,$j,3) ne "TAG" && substr($seq,$j,3) ne "TGA") { $j += 3; } if ($j <= length($seq)-3) { my $len = $j+2-$i+1; if ($len >= 30) { print substr($seq,$i,$j+2-$i+1),"\n"; } } } } } } |
extract.open.reading.frames <- function (seq) { for (i in 1:3) { while (i+2 <= nchar(seq)) { if (substr(seq,i,i+2) == "ATG") { j <- i + 3 while (j+2 <= nchar(seq) && substr(seq,j,j+2) != "TAA" && substr(seq,j,j+2) != "TAG" && substr(seq,j,j+2) != "TGA") { j <- j + 3 } if (j+2 <= nchar(seq)) { if (j+2-i+1 >= 30) { print(c(i,j+2,substr(seq,i,j+2))) } } } i <- i + 3 } } } |
sub longest_open_reading_frame { my $seq = shift; my ($ii,$jj) = (0,0); for my $r (0,1,2) { for (my $i = $r; $i <= length($seq)-3; $i += 3) { if (substr($seq,$i,3) eq "ATG") { my $j = $i+3; while ($j <= length($seq)-3 && substr($seq,$j,3) ne "TAA" && substr($seq,$j,3) ne "TAG" && substr($seq,$j,3) ne "TGA") { $j += 3; } if ($j <= length($seq)-3) { my $len = $j+2-$i+1; if ($j+2-$i+1 > $jj+2-$ii+1) { $ii = $i; $jj = $j; } } } } } return [$ii,$jj+2]; } |
longest.open.reading.frame <- function (seq) { ii <- jj <- 0 for (i in 1:3) { while (i+2 <= nchar(seq)) { if (substr(seq,i,i+2) == "ATG") { j <- i + 3 while (j+2 <= nchar(seq) && substr(seq,j,j+2) != "TAA" && substr(seq,j,j+2) != "TAG" && substr(seq,j,j+2) != "TGA") { j <- j + 3 } if (j+2 <= nchar(seq)) { if (j+2-i+1 > jj+2-ii+1) { ii <- i jj <- j } } } i <- i + 3 } } c(ii,jj+2) } |
sub seq_to_labeled_seq { my $seq = shift; my %seq; for (my $i = 0; $i < length($seq); $i++) { my $elem = substr($seq,$i,1); $seq{$elem}++; } return \%seq; } |
seq.to.labeled.seq <- function (seq) { alphabet <- sort(unique(strsplit(seq,"")[[1]])) lab <- matrix(0,nrow=length(alphabet),ncol=1) dimnames(lab) <- list(alphabet,"count") for (i in 1:nchar(seq)) lab[substr(seq,i,i),] <- lab[substr(seq,i,i),] + 1 lab } |
sub symmetric_difference { my $seq1 = shift; my $seq2 = shift; my %seq1 = %{ seq_to_labeled_seq($seq1) }; my %seq2 = %{ seq_to_labeled_seq($seq2) }; for my $elem (keys %seq2) { $seq1{$elem} = abs($seq1{$elem}-$seq2{$elem}); } return \%seq1; } |
symmetric.difference <- function (seq1,seq2) { lab1 <- seq.to.labeled.seq(seq1) lab2 <- seq.to.labeled.seq(seq2) for (elem in row.names(lab2)) lab1[elem,] <- abs(lab1[elem,] - lab2[elem,]) lab1 } |
sub reverse_complement { my $seq = shift; $seq = reverse $seq; $seq =~ tr/ACGT/TGCA/; return $seq; } |
reverse.complement <- function (seq) { rev <- paste(rev(unlist(strsplit(seq,split=""))),sep="",collapse="") chartr("ACGT","TGCA",rev) } |
sub count { my $n = shift; my @R; $R[0] = $R[1] = $R[2] = 1; for (my $i = 2; $i < $n; $i++) { $R[$i+1] = $R[$i]; for (my $j = 1; $j < $i; $j++) { $R[$i+1] += $R[$j-1] * $R[$i-$j] } } return $R[$n]; } |
count <- function (n) { R <- rep(0,n) R[1:3] <- 1 if (n > 2) { for (i in 2:(n-1)) { R[i+2] <- R[i+1] for (j in 1:(i-1)) { R[i+2] <- R[i+2] + R[j] * R[i-j+1] } } } R[n+1] } |
sub words { my $n = shift; my $alphabet = shift; my @n = split "", $alphabet; if ($n == 1) { return \@n; } else { my $short = words($n - 1, $alphabet); my @long; for my $seq (@$short) { for my $n (@n) { push @long, $seq . $n; } } return \@long; } } |
library(seqinr) words(length=3,alphabet=s2c("ACGT")) |
sub word_composition { my $seq = shift; my $k = shift; my $alphabet = shift; my $words = words($k,$alphabet); my %freq; for my $word (@$words) { $freq{$word} = 0; } for my $i (0 .. length($seq)-$k) { my $word = substr($seq,$i,$k); $freq{$word}++; } return \%freq; } |
library(seqinr) count(s2c(seq),word=3,alphabet=s2c("ACGT")) |
|
library(seqinr) alignment.free.distance <- function (seq1,seq2,k,sigma) { freq1 <- count(seq1,word=k,alphabet=s2c(sigma)) freq2 <- count(seq2,word=k,alphabet=s2c(sigma)) cor(freq1,freq2) } |
sub mean { my $a = shift; my $res; foreach (@$a) { $res += $_ } $res /= @$a; } |
|
sub sd { my $a = shift; my $mean = mean($a); return sqrt( mean( [map $_ ** 2, @$a] ) - ($mean ** 2) ); } |
|
sub cov { my $a1 = shift; my $a2 = shift; my $res; foreach (0 .. @$a1 - 1) { $res += $a1->[$_] * $a2->[$_]; } $res /= @$a1; $res -= mean($a1) * mean($a2); } |
|
sub cor { my $a1 = shift; my $a2 = shift; return cov($a1,$a2) / (sd($a1) * sd($a2)); } |
|
sub occurrences { my $p = shift; my $t = shift; my $m = length $p; my $n = length $t; my @L; for (my $i = 0; $i < $n-$m+1; $i++) { if ($p eq substr($t,$i,$m)) { push @L, $i; } } return \@L; } |
occurrences <- function (p,t) { L <- c() for (i in 1:nchar(t)-nchar(p)+1) if (p == substr(t,i,i+nchar(p)-1)) L <- c(L,i) L } |
sub suffix_array { my $s = shift; my @sa = (0 .. length($s)-1); @sa = sort {substr($s,$a) cmp substr($s,$b)} @sa; return \@sa; } |
suffix.array <- function (seq) { sa <- apply(matrix(1:nchar(seq)),1,function(i)substr(seq,i,nchar(seq))) order(sa) } |
sub occurrences { my $p = shift; my $t = shift; my $sa = shift; my ($l, $r) = (0, $#sa); while ($l <= $r) { my $i = int(($l + $r)/2); my $c = $p cmp substr($t, $sa[$i], length $p); if ($c < 0) { $r = $i - 1; } elsif ($c > 0) { $l = $i + 1; } else { my $j = $i; while ($i > 0 && $p eq substr($t, $sa[$i-1], length $p)) { $i--; } while ($j < $#sa-1 && $p eq substr($t, $sa[$j+1], length $p)) { $j++; } return ($i, $j); } } return (-1, -1); } |
occurrences <- function (p,t,sa) { ell <- 1 r <- nchar(t) while (ell <= r) { i <- as.integer((ell + r) / 2) s <- substr(t,sa[i],sa[i]+nchar(p)-1) if (p < s) { r <- i - 1 } else if (p > s) { ell <- i + 1 } else { j <- i while (i > 1 && p==substr(t,sa[i-1],sa[i-1]+nchar(p)-1)) { i <- i - 1 } while (j < nchar(t) && p==substr(t,sa[j+1],sa[j+1]+nchar(p)-1)) { j <- j + 1 } return(c(i,j)) } } return(c(-1,-1)) } |
sub lcp { my ($p, $i, $t, $j) = @_; my $c = 0; while ($i+$c < length $p && $j+$c < length $t && substr($p,$i+$c,1) eq substr($t,$j+$c,1)) { $c++; } return $c; } |
lcp <- function (seq1,pos1,seq2,pos2) { n1 <- nchar(seq1) n2 <- nchar(seq2) c <- 0 while (pos1+c <= n1 && pos2+c <= n2 && substr(seq1,pos1+c,pos1+c) == substr(seq2,pos2+c,pos2+c)) { c <- c + 1 } c } |
sub occurrences { my $p = shift; my $t = shift; my $sa = shift; my ($l, $r) = (0, $#sa); my ($llcp, $rlcp) = (0, 0); while ($l <= $r) { my $i = int(($l + $r)/2); my $h = $llcp; $h = $rlcp if $rlcp < $h; my $c = lcp($p,$h,$t,$sa[$i]+$h); if ($h+$c == length $p) { my $j = $i; while ($i > 0 && $p eq substr($t, $sa[$i-1], length $p)) { $i--; } while ($j < $#sa-1 && $p eq substr($t, $sa[$j+1], length $p)) { $j++; } return ($i, $j); } elsif (substr($p,$h+$c,1) lt substr($t,$sa[$i]+$h+$c,1)) { $r = $i - 1; $rlcp = $h + $c; } else { $l = $i + 1; $llcp = $h + $c; } } return (-1, -1); } |
occurrences <- function (p,t,sa) { ell <- 1 r <- nchar(t) llcp <- 0 rlcp <- 0 while (ell <= r) { i <- as.integer((ell + r) / 2) h <- min(llcp,rlcp) c <- lcp(p,h+1,t,sa[i]+h) if (h+c == nchar(p)) { j <- i while (i > 1 && p==substr(t,sa[i-1],sa[i-1]+nchar(p)-1)) { i <- i - 1 } while (j < nchar(t) && p==substr(t,sa[j+1],sa[j+1]+nchar(p)-1)) { j <- j + 1 } return(c(i,j)) } else if (substr(p,h+c+1,h+c+1) < substr(t,sa[i]+h+c,sa[i]+h+c)) { r <- i - 1 rlcp <- h + c } else { ell <- i + 1 llcp <- h + c } } return(c(-1,-1)) } |
sub common_occurrences { my $p = shift; my $t1 = shift; my $t2 = shift; my $m = length $p; my $n1 = length $t1; my $n2 = length $t2; my @L; for (my $i = 0; $i < $n1-$m+1; $i++) { if ($p eq substr($t1,$i,$m)) { for (my $j = 0; $j < $n2-$m+1; $j++) { if ($p eq substr($t2,$j,$m)) { push @L, $i, $j; } } } } return \@L; } |
common.occurrences <- function (p,t1,t2) { L <- c() for (i in 1:nchar(t1)-nchar(p)+1) if (p == substr(t1,i,i+nchar(p)-1)) for (j in 1:nchar(t2)-nchar(p)+1) if (p == substr(t2,j,j+nchar(p)-1)) L <- c(L,i,j) L } |
sub longest_common_subsequences { my $s1 = shift; my $s2 = shift; my $n1 = length($s1); my $n2 = length($s2); my @L; my $l = 0; for (my $i = 0; $i < $n1; $i++) { for (my $j = $i; $j < $n1; $j++) { my $seq = substr($s1,$i,$j-$i+1); for (my $k = 0; $k < $n2-$j+$i; $k++) { if ($seq eq substr($s2,$k,$j-$i+1)) { if (length($seq) == $l) { push @L, $seq; } elsif (length($seq) > $l) { @L = $seq; $l = length($seq); } } } } } my %count; for my $seq (@L) { $count{$seq}++; } @L = sort keys %count; return \@L; } |
longest.common.subsequences <- function (s1,s2) { L <- c() ell <- 0 for (i in 1:nchar(s1)) { for (j in i:nchar(s1)) { seq <- substr(s1,i,j) for (k in 1:(nchar(s2)-j+i)) { if (seq == substr(s2,k,k+j-i)) { if (nchar(seq) == ell) { L <- c(L,seq) } else if (nchar(seq) > ell) { L <- c(seq) ell <- nchar(seq) } } } } } unique(L) } |
sub longest_common_subsequences { my $s1 = shift; my $s2 = shift; my @L; my $l = 0; my @LCS; for (my $i = 0; $i < length $s1; $i++ ) { for (my $j = 0; $j < length $s2; $j++ ) { $LCS[$i][$j] = 0; if (substr($s1,$i,1) eq substr($s2,$j,1)) { if ($i == 0 || $j == 0) { $LCS[$i][$j] = 1; } else { $LCS[$i][$j] = $LCS[$i-1][$j-1] + 1; } if ($LCS[$i][$j] > $l) { $l = $LCS[$i][$j]; @L = (); } if ($LCS[$i][$j] == $l) { push @L, substr($s1,$i-$l+1,$l); } } } } return @L; } |
longest.common.subsequences <- function (s1,s2) { L <- c() ell <- 0 LCS <- matrix(0,nrow=nchar(s1),ncol=nchar(s2)) for (i in 1:nchar(s1)) { for (j in 1:nchar(s2)) { if (substr(s1,i,i) == substr(s2,j,j)) { if (i == 1 || j == 1) { LCS[i,j] <- 1 } else { LCS[i,j] <- LCS[i-1,j-1] + 1 } if (LCS[i,j] > ell) { ell <- LCS[i,j] L <- c() } if (LCS[i,j] == ell) { L <- c(L,substr(s1,i-ell+1,i)) } } } } L } |
sub generalized_suffix_array { my $seq = shift; my %seq = %{$seq}; my @gsa; for my $id (keys %seq) { for my $i (0 .. length($seq{$id})-1) { push @gsa, [$i, $id]; } } @gsa = sort {substr($seq{@{$a}[1]}, @{$a}[0]) cmp substr($seq{@{$b}[1]}, @{$b}[0])} @gsa; return \@gsa; } |
generalized.suffix.array <- function (s1,s2) { sa1 <- apply(matrix(1:nchar(s1)),1, function (i) substr(s1,i,nchar(s1))) sa2 <- apply(matrix(1:nchar(s2)),1, function (i) substr(s2,i,nchar(s2))) arr <- order(c(sa1,sa2)) tag <- sapply(arr,function (i) if (i <= nchar(s1)) { 1 } else { 2 } ) arr <- sapply(arr,function (i) if (i <= nchar(s1)) { i } else { i-nchar(s1) } ) cbind(arr,tag) } |
sub common_occurrences { my $p = shift; my $seq = shift; my %seq = %{$seq}; my $gsa = shift; my @gsa = @{$gsa}; my ($l, $r) = (0, $#gsa); while ($l <= $r) { my $i = int(($l + $r)/2); my $c = $p cmp substr($seq{$gsa[$i][1]}, $gsa[$i][0], length $p); if ($c < 0) { $r = $i - 1; } elsif ($c > 0) { $l = $i + 1; } else { my $j = $i; while ($i > 0 && $p eq substr($seq{$gsa[$i-1][1]}, $gsa[$i-1][0], length $p)) { $i--; } while ($j < $#gsa && $p eq substr($seq{$gsa[$j+1][1]}, $gsa[$j+1][0], length $p)) { $j++; } return ($i, $j); } } return (-1, -1); } |
common.occurrences <- function (p,seq,gsa) { ell <- 1 r <- n <- nchar(seq[1]) + nchar(seq[2]) while (ell <= r) { i <- as.integer((ell + r) / 2) s <- substr(seq[gsa[i,2]], gsa[i,1],gsa[i,1]+nchar(p)-1) if (p < s) { r <- i - 1 } else if (p > s) { ell <- i + 1 } else { j <- i while (i > 1 && p == s) { s <- substr(seq[gsa[i-1,2]], gsa[i-1,1],gsa[i-1,1]+nchar(p)-1) if (p == s) i <- i - 1 } s <- p while (j < n && p == s) { s <- substr(seq[gsa[j+1,2]], gsa[j+1,1],gsa[j+1,1]+nchar(p)-1) if (p == s) j <- j + 1 } return(c(i,j)) } } return(c(-1,-1)) } |
sub common_occurrences { my $p = shift; my $seq = shift; my %seq = %{$seq}; my $gsa = shift; my @gsa = @{$gsa}; my ($l, $r) = (0, $#gsa); my ($llcp, $rlcp) = (0, 0); while ($l <= $r) { my $i = int(($l + $r)/2); my $h = $llcp; $h = $rlcp if $rlcp < $h; my $c = lcp($p,$h,$seq{$gsa[$i][1]},$gsa[$i][0]+$h); if ($h+$c == length $p) { my $j = $i; while ($i > 0 && $p eq substr($seq{$gsa[$i-1][1]}, $gsa[$i-1][0], length $p)) { $i--; } while ($j < $#gsa-1 && $p eq substr($seq{$gsa[$j+1][1]}, $gsa[$j+1][0], length $p)) { $j++; } return ($i, $j); } elsif (substr($p,$h+$c,1) lt substr($seq{$gsa[$i][1]},$gsa[$i][0]+$h+$c,1)) { $r = $i - 1; $rlcp = $h + $c; } else { $l = $i + 1; $llcp = $h + $c; } } return (-1, -1); } |
common.occurrences <- function (p,seq,gsa) { ell <- 1 r <- n <- nchar(seq[1]) + nchar(seq[2]) llcp <- 0 rlcp <- 0 while (ell <= r) { i <- as.integer((ell + r) / 2) s <- substr(seq[gsa[i,2]],gsa[i,1], gsa[i,1]+nchar(p)-1) h <- min(llcp,rlcp) c <- lcp(p,h+1,s,1+h) if (h+c == nchar(p)) { j <- i while (i > 1 && p == s) { s <- substr(seq[gsa[i-1,2]],gsa[i-1,1], gsa[i-1,1]+nchar(p)-1) if (p == s) i <- i - 1 } s <- p while (j < n && p == s) { s <- substr(seq[gsa[j+1,2]],gsa[j+1,1], gsa[j+1,1]+nchar(p)-1) if (p == s) j <- j + 1 } return(c(i,j)) } else { s <- substr(seq[gsa[i,2]],gsa[i,1]+h+c, gsa[i,1]+h+c) if (substr(p,h+c+1,h+c+1) < s) { r <- i - 1 rlcp <- h + c } else { ell <- i + 1 llcp <- h + c } } } return(c(-1,-1)) } |
sub longest_common_subsequences { my $seq = shift; my %seq = %{$seq}; my $gsa = shift; my @gsa = @{$gsa}; my @L; my $l = 0; my $f2 = substr($seq{$gsa[0][1]}, $gsa[0][0], length($seq{$gsa[0][1]})); for (my $i = 1; $i < length($seq{1})+length($seq{2}); $i++) { my $f1 = $f2; $f2 = substr($seq{$gsa[$i][1]}, $gsa[$i][0], length($seq{$gsa[$i][1]})); if ($gsa[$i][1] != $gsa[$i-1][1]) { my $p = lcp($f1,0,$f2,0); if ($p > $l) { $l = $p; @L = (); } if ($p == $l) { push @L, substr($seq{$gsa[$i][1]},$gsa[$i][0],$p); } } } return \@L; } |
longest.common.subsequences <- function (seq,gsa) { L <- c() ell <- 0 suf2 <- substr(seq[gsa[1,2]],gsa[1,1], nchar(seq[gsa[1,2]])) for (i in 2:(nrow(gsa))) { suf1 <- suf2 suf2 <- substr(seq[gsa[i,2]],gsa[i,1], nchar(seq[gsa[i,2]])) if (gsa[i-1,2] != gsa[i,2]) { pref <- substr(suf1,1,lcp(suf1,1,suf2,1)) if (nchar(pref) > ell) { ell <- nchar(pref) L <- c() } if (nchar(pref) == ell) { L <- c(L,pref) } } } L } |
sub hamming_distance { my $s1 = shift; my $s2 = shift; my $d = -1; if (length $s1 == length $s2) { $d = 0; for (my $i = 0; $i < length $s1; $i++) { $d++ if substr($s1,$i,1) ne substr($s2,$i,1); } } return $d; } |
hamming.distance <- function (s1,s2) { if (nchar(s1) != nchar(s2)) -1 else sum(strsplit(s1,"")[[1]] != strsplit(s2,"")[[1]]) } |
sub levenshtein_distance { my $s1 = shift; my $s2 = shift; my ($n1, $n2) = (length $s1, length $s2); my @d; for ( my $i = 0; $i <= $n1; $i++ ) { $d[$i][0] = $i; } for ( my $j = 0; $j <= $n2; $j++ ) { $d[0][$j] = $j; } my @t1 = split //, $s1; my @t2 = split //, $s2; for ( my $i = 1; $i <= $n1; $i++ ) { for ( my $j = 1; $j <= $n2; $j++ ) { if ($t1[$i-1] eq $t2[$j-1]) { $d[$i][$j] = $d[$i-1][$j-1]; } else { $d[$i][$j] = $d[$i-1][$j] + 1; if ($d[$i][$j-1] + 1 < $d[$i][$j]) { $d[$i][$j] = $d[$i][$j-1] + 1; } } } } return \@d; } |
levenshtein.distance <- function(s1,s2) { t1 <- strsplit(s1,split="")[[1]] t2 <- strsplit(s2,split="")[[1]] n1 <- length(t1) n2 <- length(t2) d <- array(0,dim=c(n1+1,n2+1)) d[,1] <- 0:n1 d[1,] <- 0:n2 for (i in 2:(n1+1)) for (j in 2:(n2+1)) if (t1[i-1] == t2[j-1]) d[i,j] <- d[i-1,j-1] else d[i,j] <- min(d[i-1,j]+1, d[i,j-1]+1) d } |
sub edit_distance { my $s1 = shift; my $s2 = shift; my ($n1, $n2) = (length $s1, length $s2); my @d; for ( my $i = 0; $i <= $n1; $i++ ) { $d[$i][0] = $i; } for ( my $j = 0; $j <= $n2; $j++ ) { $d[0][$j] = $j; } my @t1 = split //, $s1; my @t2 = split //, $s2; for ( my $i = 1; $i <= $n1; $i++ ) { for ( my $j = 1; $j <= $n2; $j++ ) { $d[$i][$j] = $d[$i-1][$j] + 1; if ($d[$i][$j-1] + 1 < $d[$i][$j]) { $d[$i][$j] = $d[$i][$j-1] + 1; } if ($t1[$i-1] eq $t2[$j-1]) { if ($d[$i-1][$j-1] < $d[$i][$j]) { $d[$i][$j] = $d[$i-1][$j-1]; } } else { if ($d[$i-1][$j-1] + 1 < $d[$i][$j]) { $d[$i][$j] = $d[$i-1][$j-1] + 1; } } } } return \@d; } |
edit.distance <- function (s1,s2) { t1 <- strsplit(s1,split="")[[1]] t2 <- strsplit(s2,split="")[[1]] n1 <- length(t1) n2 <- length(t2) d <- array(0,dim=c(n1+1,n2+1)) d[,1] <- 0:n1 d[1,] <- 0:n2 for (i in 2:(n1+1)) { for (j in 2:(n2+1)) { d[i,j] <- min(d[i-1,j]+1, d[i,j-1]+1) if (t1[i-1] == t2[j-1]) d[i,j] <- min(d[i,j], d[i-1,j-1]) else d[i,j] <- min(d[i,j], d[i-1,j-1]+1) } } d } |
sub alignment { my $s1 = shift; my $s2 = shift; my $d = shift; my @d = @{$d}; my ($t1, $t2) = ("", ""); my @t1 = split //, $s1; my @t2 = split //, $s2; my ($n1, $n2) = (length $s1, length $s2); my ($i, $j) = ($n1, $n2); while ($i > 0 && $j > 0) { if ($d[$i][$j] eq $d[$i][$j-1]+1) { $t1 = "-" . $t1; $t2 = $t2[$j-1] . $t2; $j--; } elsif ($d[$i][$j] eq $d[$i-1][$j]+1) { $t1 = $t1[$i-1] . $t1; $t2 = "-" . $t2; $i--; } else { $t1 = $t1[$i-1] . $t1; $t2 = $t2[$j-1] . $t2; $i--; $j--; } } while ($j > 0) { $t1 = "-" . $t1; $t2 = $t2[$j-1] . $t2; $j--; } while ($i > 0) { $t1 = $t1[$i-1] . $t1; $t2 = "-" . $t2; $i--; } print "$t1\n$t2\n"; } |
alignment <- function (s1,s2,d) { t1 <- strsplit(s1,split="")[[1]] t2 <- strsplit(s2,split="")[[1]] a1 <- a2 <- c() i <- length(t1)+1 j <- length(t2)+1 while (i > 1 && j > 1) { if (d[i,j] == d[i,j-1]+1) { a1 <- c("-",a1) a2 <- c(t2[j-1],a2) j <- j-1 } else if (d[i,j] == d[i-1,j]+1) { a1 <- c(t1[i-1],a1) a2 <- c("-",a2) i <- i-1 } else { a1 <- c(t1[i-1],a1) a2 <- c(t2[j-1],a2) i <- i-1 j <- j-1 } } while (j > 1) { a1 <- c("-",a1) a2 <- c(t2[j-1],a2) j <- j-1 } while (i > 1) { a1 <- c(t1[i-1],a1) a2 <- c("-",a2) i <- i-1 } print(paste(a1,collapse="")) print(paste(a2,collapse="")) } |
sub local_alignment { my ($s1, $s2, $match, $mismatch, $gap) = @_; my ($n1, $n2) = ( length $s1, length $s2); my @s; for (my $i = 0; $i <= $n1; $i++) { $s[$i][0] = 0; } for (my $j = 0; $j <= $n2; $j++) { $s[0][$j] = 0; } my @t1 = split //, $s1; my @t2 = split //, $s2; for ( my $i = 1; $i <= $n1; $i++ ) { for ( my $j = 1; $j <= $n2; $j++ ) { if ($t1[$i-1] eq $t2[$j-1]) { $s[$i][$j] = $s[$i-1][$j-1] + $match; } else { $s[$i][$j] = $s[$i-1][$j-1] + $mismatch; } $s[$i][$j] = $s[$i-1][$j] + $gap if $s[$i-1][$j] + $gap > $s[$i][$j]; $s[$i][$j] = $s[$i][$j-1] + $gap if $s[$i][$j-1] + $gap > $s[$i][$j]; $s[$i][$j] = 0 if 0 > $s[$i][$j]; } } my $max = 0; my @opt; for ( my $i = 1; $i <= $n1; $i++ ) { for ( my $j = 1; $j <= $n2; $j++ ) { if ($s[$i][$j] > $max) { $max = $s[$i][$j]; @opt = (); } if ($s[$i][$j] == $max) { $max = $s[$i][$j]; push @opt, $i, $j; } } } while (@opt) { my $i = shift @opt; my $j = shift @opt; my ($t1, $t2) = ("", ""); while ($s[$i][$j] != 0) { if (($t1[$i-1] eq $t2[$j-1] && $s[$i][$j] eq $s[$i-1][$j-1] + $match) || ($t1[$i-1] ne $t2[$j-1] && $s[$i][$j] eq $s[$i-1][$j-1] + $mismatch)) { $t1 = $t1[$i-1] . $t1; $t2 = $t2[$j-1] . $t2; $i--; $j--; } elsif ($s[$i][$j] eq $s[$i-1][$j] + $gap) { $t1 = $t1[$i-1] . $t1; $t2 = "-" . $t2; $i--; } elsif ($s[$i][$j] eq $s[$i][$j-1] + $gap) { $t1 = "-" . $t1; $t2 = $t2[$j-1] . $t2; $j--; } } print "$t1\n$t2\n"; } } |
local.alignment <- function (s1,s2,match,mismatch,gap) { t1 <- strsplit(s1,split="")[[1]] t2 <- strsplit(s2,split="")[[1]] n1 <- length(t1) n2 <- length(t2) s <- array(0,dim=c(n1+1,n2+1)) s[,1] <- 0 s[1,] <- 0 for (i in 2:(n1+1)) { for (j in 2:(n2+1)) { if (t1[i-1] == t2[j-1]) s[i,j] <- s[i-1,j-1] + match else s[i,j] <- s[i-1,j-1] + mismatch s[i,j] <- max( s[i,j], s[i-1,j] + gap, s[i,j-1] + gap, 0) } } opt <- which(s==max(s),arr.ind=TRUE) for (sol in 1:nrow(opt)) { i <- opt[sol,1] j <- opt[sol,2] m1 <- c() m2 <- c() while (s[i,j] != 0) { if ((t1[i-1] == t2[j-1] && s[i,j] == s[i-1,j-1] + match) || (t1[i-1] != t2[j-1] && s[i,j] == s[i-1,j-1] + mismatch)) { m1 <- c(t1[i-1],m1) m2 <- c(t2[j-1],m2) i <- i - 1 j <- j - 1 } else if (s[i,j] == s[i-1,j] + gap) { m1 <- c(t1[i-1],m1) m2 <- c("-",m2) i <- i - 1 } else if (s[i,j] == s[i,j-1] + gap) { m1 <- c("-",m1) m2 <- c(t2[j-1],m2) j <- j - 1 } } print(paste(m1,collapse="")) print(paste(m2,collapse="")) } } |
sub partition { my $tree = shift; my %partition = (); for my $node ($tree->get_nodes) { next if $node->is_Leaf; # discard terminal nodes next unless $node->ancestor; # discard root my @a = sort { $a cmp $b } map { $_->id } grep {$_->is_Leaf } $node->get_all_Descendents; my $aa = join ",", @a; my @b = (); my %count = (); foreach my $label (@a, map { $_->id } $tree->get_leaf_nodes) { $count{$label}++ } foreach my $label (keys %count) { push @b, $label if $count{$label} == 1; } my $bb = join ",", sort { $a cmp $b } @b; my $p = $aa le $bb ? "$aa:$bb" : "$bb:$aa"; $partition{$p} = 1; } my @partition = sort keys %partition; return \@partition; } |
|
sub partition_distance { my $tree1 = shift; my $tree2 = shift; my @partition1 = @{ partition($tree1) }; my @partition2 = @{ partition($tree2) }; my $dist = 0; my %count = (); foreach my $p (@partition1, @partition2) { $count{$p}++ } foreach my $p (keys %count) { $dist++ if $count{$p} == 1; } return $dist; } |
library(ape) dist.topo(t1,t2) |
sub distances { my $tree = shift; my @leaves = sort {$a->id cmp $b->id} $tree->get_leaf_nodes; my @labels = map { $_->id } @leaves; my $n = scalar @labels; my @dist; for my $i (1..$n-1) { my @nodes = $tree->find_node(-id => $labels[$i-1]); for my $j ($i+1..$n) { push @nodes, $tree->find_node(-id => $labels[$j-1]); push @dist, $tree->distance(-nodes => \@nodes); pop @nodes; } } return \@dist; } |
|
sub nodal_distance { my $tree1 = shift; my $tree2 = shift; my @dist1 = @{ distances($tree1) }; my @dist2 = @{ distances($tree2) }; my ($dist, $d1, $d2); while (@dist1 and @dist2) { $d1 = pop @dist1; $d2 = pop @dist2; $dist += abs($d1-$d2); } return $dist; } |
library(ape) nodal.distance <- function (t1,t2) { m1<-cophenetic.phylo(t1) m1<-m1[order(rownames(m1)),order(colnames(m1))] n1<-m1[upper.tri(m1)] m2<-cophenetic.phylo(t2) m2<-m2[order(rownames(m2)),order(colnames(m2))] n2<-m2[upper.tri(m2)] sum(abs(n1-n2)) } |
sub triplet { my $tree = shift; my $i = shift; my $j = shift; my $k = shift; my @ij = grep { $_->id =~ /$i|$j/ } $tree->get_leaf_nodes; my @ik = grep { $_->id =~ /$i|$k/ } $tree->get_leaf_nodes; my @jk = grep { $_->id =~ /$j|$k/ } $tree->get_leaf_nodes; my $ij = $tree->get_lca(-nodes => \@ij); my $ik = $tree->get_lca(-nodes => \@ik); my $jk = $tree->get_lca(-nodes => \@jk); my $str; if ($ik == $jk) { $str = "(($i,$j),$k);"; } else { if ($ij == $jk) { $str = "(($i,$k),$j);"; } else { $str = "(($j,$k),$i);"; } } return $str; } |
library(ape) triplet <- function (t,i,j,k) { ij <- mrca(t)[i,j] ik <- mrca(t)[i,k] jk <- mrca(t)[j,k] if (ik == jk) paste("((",i,",",j,"),",k,");",sep="") else if (ij == jk) paste("((",i,",",k,"),",j,");",sep="") else paste("((",j,",",k,"),",i,");",sep="") } |
use Clone qw(clone); sub quartet { my ($tree,$i,$j,$k,$l) = @_; my $copy = clone $tree; map { $copy->remove_Node($_) } grep { !($_->id =~ /$i|$j|$k|$l/) } $copy->get_leaf_nodes; return $copy; } |
library(ape) quartet <- function (t,i,j,k,l) { unroot(drop.tip(t,setdiff(t$tip.label,c(i,j,k,l)))) } |
|
parent <- function (tree,node) tree$edge[which(tree$edge[,2]==node),1] |
|
children <- function (tree,node) tree$edge[which(tree$edge[,1]==node),2] |
|
is.root <- function (tree,node) length(tree$tip.label)+1==node |
|
is.ancestor <- function(tree,anc,des) { if (anc==des) return(TRUE) if (is.root(tree,des)) return(FALSE) else return(is.ancestor(tree,anc,parent(tree,des))) } |
|
postorder <- function (tree) { r <- length(tree$tip.label)+1 postorder.traversal(tree,r,c()) } |
|
postorder.traversal <- function (tree,v,res) { for (w in t1$edge[which(tree$edge[,1]==v),2]) res <- postorder.traversal(tree,w,res) c(res,v) } |
sub mast_size { my $tree1 = shift; my $tree2 = shift; my (@m, $m); for my $node1 (@{ $tree1->Bio::Tree::Compatible::postorder_traversal }) { my $n1 = $node1->internal_id; for my $node2 (@{ $tree2->Bio::Tree::Compatible::postorder_traversal }) { my $n2 = $node2->internal_id; if ($node1->is_Leaf) { if ($node2->is_Leaf) { $m = ($node1->id eq $node2->id ? 1 : 0); } else { my $label1 = $node1->id; $m = (grep { /$label1/ } map { $_->id } grep { $_->is_Leaf } $node2->get_all_Descendents) ? 1 : 0; } } else { if ($node2->is_Leaf) { my $label2 = $node2->id; $m = (grep { /$label2/ } map { $_->id } grep { $_->is_Leaf } $node1->get_all_Descendents) ? 1 : 0; } else { my ($l1,$r1) = map { $_->internal_id } $node1->each_Descendent; my ($l2,$r2) = map { $_->internal_id } $node2->each_Descendent; $m = $m[$l1][$l2] + $m[$r1][$r2]; if ($m[$l1][$r2] + $m[$r1][$l2] > $m) { $m = $m[$l1][$r2] + $m[$r1][$l2]; } if ($m[$n1][$l2] > $m) { $m = $m[$n1][$l2]; } if ($m[$n1][$r2] > $m) { $m = $m[$n1][$r2]; } if ($m[$l1][$n2] > $m) { $m = $m[$l1][$n2]; } if ($m[$r1][$n2] > $m) { $m = $m[$r1][$n2]; } } } $m[$n1][$n2] = $m; } } return $m; } |
mast.size <- function (t1,t2) { po1 <- postorder(t1) po2 <- postorder(t2) m <- matrix(0,nrow=length(po1),ncol=length(po2),dimnames=list(po1,po2)) for (i in po1) { for (j in po2) { if (length(t1$edge[which(t1$edge[,1]==i),1])==0) { if (length(t2$edge[which(t2$edge[,1]==j),1])==0) { if (t1$tip.label[i]==t2$tip.label[j]) m[i,j] <- 1 } else { jj <- which(t2$tip.label==t1$tip.label[i]) if (is.ancestor(t2,j,jj)) m[i,j] <- 1 } } else { if (length(t2$edge[which(t2$edge[,1]==j),1])==0) { ii <- which(t1$tip.label==t2$tip.label[j]) if (is.ancestor(t1,i,ii)) m[i,j] <- 1 } else { l1 <- children(t1,i)[1] r1 <- children(t1,i)[2] l2 <- children(t2,j)[1] r2 <- children(t2,j)[2] m[i,j] <- max(m[l1,l2]+m[r1,r2],m[l1,r2]+m[r1,l2], m[i,l2],m[i,r2],m[l1,j],m[r1,j]) } } } } m <- m[po1,po2] dimnames(m) <- list(po1,po2) m } |
sub mast { my $tree1 = shift; my $tree2 = shift; my @m; for my $node1 (@{ $tree1->Bio::Tree::Compatible::postorder_traversal }) { my $n1 = $node1->internal_id; for my $node2 (@{ $tree2->Bio::Tree::Compatible::postorder_traversal }) { my $n2 = $node2->internal_id; my @mm = (); if ($node1->is_Leaf) { if ($node2->is_Leaf) { if ($node1->id eq $node2->id) { @mm = ($node1->id); } } else { my $label1 = $node1->id; if (grep { /$label1/ } map { $_->id } grep { $_->is_Leaf } $node2->get_all_Descendents) { @mm = ($label1); } } } else { if ($node2->is_Leaf) { my $label2 = $node2->id; if (grep { /$label2/ } map { $_->id } grep { $_->is_Leaf } $node1->get_all_Descendents) { @mm = ($label2); } } else { my ($l1,$r1) = map { $_->internal_id } $node1->each_Descendent; my ($l2,$r2) = map { $_->internal_id } $node2->each_Descendent; @mm = @{$m[$l1][$l2]}; push @mm, @{$m[$r1][$r2]}; if (scalar(@{$m[$l1][$r2]})+scalar(@{$m[$r1][$l2]})>scalar(@mm)) { @mm = @{$m[$l1][$r2]}; push @mm, @{$m[$r1][$l2]}; } if (scalar(@{$m[$n1][$l2]})>scalar(@mm)) { @mm = @{$m[$n1][$l2]}; } if (scalar(@{$m[$n1][$r2]})>scalar(@mm)) { @mm = @{$m[$n1][$r2]}; } if (scalar(@{$m[$l1][$n2]})>scalar(@mm)) { @mm = @{$m[$l1][$n2]}; } if (scalar(@{$m[$r1][$n2]})>scalar(@mm)) { @mm = @{$m[$r1][$n2]}; } } } $m[$n1][$n2] = \@mm; } } return \@m; } |
mast <- function (t1,t2) { po1 <- postorder(t1) po2 <- postorder(t2) m <- matrix(rep(list(),length(po1)),nrow=length(po1),ncol=length(po2), dimnames=list(po1,po2)) for (i in po1) { for (j in po2) { mm <- list() if (length(t1$edge[which(t1$edge[,1]==i),1])==0) { if (length(t2$edge[which(t2$edge[,1]==j),1])==0) { if (t1$tip.label[i]==t2$tip.label[j]) mm <- c(t1$tip.label[i]) } else { jj <- which(t2$tip.label==t1$tip.label[i]) if (is.ancestor(t2,j,jj)) mm <- c(t1$tip.label[i]) } } else { if (length(t2$edge[which(t2$edge[,1]==j),1])==0) { ii <- which(t1$tip.label==t2$tip.label[j]) if (is.ancestor(t1,i,ii)) mm <- c(t2$tip.label[j]) } else { l1 <- children(t1,i)[1] r1 <- children(t1,i)[2] l2 <- children(t2,j)[1] r2 <- children(t2,j)[2] mm <- c(m[[l1,l2]],m[[r1,r2]]) if ( length(m[[l1,r2]]) + length(m[[r1,l2]]) > length(mm) ) mm <- c(m[[l1,r2]],m[[r1,l2]]) if ( length(m[[i,l2]]) > length(mm) ) mm <- m[[i,l2]] if ( length(m[[i,r2]]) > length(mm) ) mm <- m[[i,r2]] if ( length(m[[l1,j]]) > length(mm) ) mm <- m[[l1,j]] if ( length(m[[r1,j]]) > length(mm) ) mm <- m[[r1,j]] } } m[[i,j]] <- mm } } unlist(m[[length(t1$tip.label)+1,length(t2$tip.label)+1]]) } |
sub triplets_distance { my $tree1 = shift; my $tree2 = shift; my @leaves = sort {$a->id cmp $b->id} $tree1->get_leaf_nodes; my @labels = map { $_->id } @leaves; my $dist = 0; for (my $i = 0; $i < @leaves; $i++) { for (my $j = $i+1; $j < @leaves; $j++) { for (my $k = $j+1; $k < @leaves; $k++) { my $t1 = triplet($tree1,$labels[$i],$labels[$j],$labels[$k]); my $t2 = triplet($tree2,$labels[$i],$labels[$j],$labels[$k]); $dist += 2 unless ($t1 eq $t2); } } } return $dist; } |
triplets.distance <- function (t1,t2) { L <- sort(t1$tip.label) d <- 0 for (i in L[1:(length(L)-2)]) { for (j in L[(match(i,L)+1):(length(L)-1)]) { for (k in L[(match(j,L)+1):length(L)]) { str1 <- triplet(t1,i,j,k) str2 <- triplet(t2,i,j,k) if (str1 != str2) { d <- d + 2 } } } } d } |
sub quartets_distance { my $tree1 = shift; my $tree2 = shift; my @leaves = sort {$a->id cmp $b->id} $tree1->get_leaf_nodes; my @labels = map { $_->id } @leaves; my $dist = 0; for (my $i = 0; $i < @leaves; $i++) { for (my $j = $i+1; $j < @leaves; $j++) { for (my $k = $j+1; $k < @leaves; $k++) { for (my $l = $k+1; $l < @leaves; $l++) { my $q1 = quartet($tree1, $labels[$i],$labels[$j],$labels[$k],$labels[$l]); my $q2 = quartet($tree2, $labels[$i],$labels[$j],$labels[$k],$labels[$l]); if (partition_distance($q1,$q2) != 0) { $dist += 2; } } } } } return $dist; } |
quartets.distance <- function (t1,t2) { L <- sort(t1$tip.label) d <- 0 for (i in L[1:(length(L)-3)]) { for (j in L[(match(i,L)+1):(length(L)-2)]) { for (k in L[(match(j,L)+1):(length(L)-1)]) { for (l in L[(match(k,L)+1):length(L)]) { q1 <- quartet(t1,i,j,k,l) q2 <- quartet(t2,i,j,k,l) if (dist.topo(q1,q2) != 0) { d <- d + 2 } } } } } d } |
|
network.height <- function (net) { V(net)$height <- 0 V(net)$visited <- FALSE Q <- as.vector(V(net)[which(degree(net,mode="out")==0)-1]) while (length(Q)>0) { j <- Q[1] Q <- Q[-1] V(net)[j]$visited <- TRUE I <- as.vector(V(net)[nei(j,mode="in")]) for (i in I) { V(net)[i]$height <- max(V(net)[i]$height,V(net)[j]$height+1) if (all(V(net)[nei(i,mode="out")]$visited)) Q <- c(Q,i) } } V(net)$height } |
use Bio::PhyloNetwork; sub is_strict_ancestor { my $net = shift; my $u = shift; my $v = shift; my $dag = $net->graph->copy_graph; my @roots = $dag->source_vertices; my $root = shift @roots; $dag->delete_vertex($u); return !$dag->is_reachable($root,$v); } |
library(igraph) is.strict.ancestor <- function (net,i,j) { if (length(get.all.shortest.paths(net, V(net)[i],V(net)[j],mode="out")) == 0) return(FALSE) r <- V(net)[which(degree(net,mode="in")==0)-1] if (i == r || i == j) return(TRUE) net <- delete.vertices(net,i) if (i < j) j <- j - 1 # account for deleted node length(get.all.shortest.paths(net,r,V(net)[j],mode="out")) == 0 } |
|
is.ancestor <- function (net,i,j) { length(get.all.shortest.paths(net,V(net)[i],V(net)[j],mode="out")) != 0 } |
sub CSA { my $net = shift; my $v = shift; my $w = shift; my $dag = $net->graph; my @csa = (); foreach my $u ( $net->nodes ) { if ( $dag->is_reachable($u,$v) and $dag->is_reachable($u,$w) ) { if (is_strict_ancestor($net,$u,$v) or is_strict_ancestor($net,$u,$w)) { push @csa, $u; } } } return \@csa; } |
CSA <- function (net,i,j) { strict <- lapply(V(net),function (u) is.ancestor(net,u,i) && is.ancestor(net,u,j) && (is.strict.ancestor(net,u,i) || is.strict.ancestor(net,u,j))) V(net)[unlist(strict)] } |
sub LCSA { my $net = shift; my $v = shift; my $w = shift; my %height = $net->heights; my @CSA = @{ CSA($net,$v,$w) }; my $lcsa = shift @CSA; for my $node ( @CSA ) { if ( $height{$node} < $height{$lcsa} ) { $lcsa = $node; } } return $lcsa; } |
LCSA <- function (net,i,j) { csa <- match(CSA(net,i,j),V(net)) V(net)[csa[which.min(network.height(net)[csa])]-1] } |
|
path.multiplicity <- function (net) { leaves <- V(net)[which(degree(net,mode="out")==0)-1] mu <- matrix(0,nrow=vcount(net),ncol=length(leaves), dimnames=list(sort(V(net)$name),sort(leaves$name))) for (v in sort(V(net)[leaves]$name)) mu[v,v] <- 1 V(net)$visited <- FALSE Q <- as.vector(V(net)[which(degree(net,mode="out")==0)-1]) while (length(Q)>0) { j <- Q[1] Q <- Q[-1] V(net)[j]$visited <- TRUE I <- as.vector(V(net)[nei(j,mode="in")]) for (i in I) { mu[V(net)[i]$name,] <- mu[V(net)[i]$name,] + mu[V(net)[j]$name,] if (all(V(net)[nei(i,mode="out")]$visited)) Q <- c(Q,i) } } mu } |
|
lex.cmp <- function(vec1,vec2) { index <- which.min(vec1 == vec2) as.numeric(sign(vec1[index] - vec2[index])) } path.multiplicity.distance <- function (net1,net2) { mu1 <- path.multiplicity(net1) mu1 <- mu1[do.call("order",lapply(1:ncol(mu1),function (j) mu1[,j])),] mu2 <- path.multiplicity(net2) mu2 <- mu2[do.call("order",lapply(1:ncol(mu2),function (j) mu2[,j])),] i1 <- 1 i2 <- 1 c <- 0 while (i1 <= nrow(mu1) && i2 <= nrow(mu2)) { lex <- lex.cmp(mu1[i1,],mu2[i2,]) if ( lex == -1 ) { i1 <- i1 + 1 } else if ( lex == 1 ) { i2 <- i2 + 1 } else { i1 <- i1 + 1 i2 <- i2 + 1 c <- c + 1 } } nrow(mu1)+nrow(mu2)-2*c } |
|
tripartitions <- function (net) { leaves <- V(net)[which(degree(net,mode="out")==0)-1] theta <- matrix("",nrow=vcount(net),ncol=length(leaves), dimnames=list(sort(V(net)$name),sort(leaves$name))) for (i in V(net)) { for (j in leaves) { if ( is.ancestor(net,i,j) ) { if ( is.strict.ancestor(net,i,j) ) { theta[V(net)[i]$name,V(net)[j]$name] <- "A" } else { theta[V(net)[i]$name,V(net)[j]$name] <- "B" } } else { theta[V(net)[i]$name,V(net)[j]$name] <- "C" } } } theta } |
use Bio::PhyloNetwork; sub tripartition_distance { my $net1 = shift; my $net2 = shift; $net1->compute_tripartitions() unless defined $net1->{tripartitions}; $net2->compute_tripartitions() unless defined $net2->{tripartitions}; my @tri1 = sort map {$net1->{tripartitions}->{$_}} $net1->nodes; my @tri2 = sort map {$net2->{tripartitions}->{$_}} $net2->nodes; my $i1 = 0; my $i2 = 0; my $c = 0; while ($i1 < scalar @tri1 && $i2 < scalar @tri2) { if ($tri1[$i1] lt $tri2[$i2]) { $i1++; } elsif ($tri1[$i1] gt $tri2[$i2]) { $i2++; } else { $i1++; $i2++; $c++; } } return scalar @tri1+scalar @tri2-2*$c; } |
library(igraph) lex.cmp.char <- function (vec1,vec2) { for (j in 1:length(vec1)) { if (vec1[j] < vec2[j]) { return(-1) } if (vec1[j] > vec2[j]) { return(1) } } return(0) } tripartition.distance <- function (net1,net2) { theta1 <- tripartitions(net1) theta1 <- theta1[do.call("order",lapply(1:ncol(theta1), function (j) theta1[,j])),] theta2 <- tripartitions(net2) theta2 <- theta2[do.call("order",lapply(1:ncol(theta2), function (j) theta2[,j])),] i1 <- 1 i2 <- 1 c <- 0 while (i1 <= nrow(theta1) && i2 <= nrow(theta2)) { lex <- lex.cmp.char(theta1[i1,],theta2[i2,]) if ( lex == -1 ) { i1 <- i1 + 1 } else if ( lex == 1 ) { i2 <- i2 + 1 } else { i1 <- i1 + 1 i2 <- i2 + 1 c <- c + 1 } } nrow(theta1)+nrow(theta2)-2*c } |
sub nodal_distance { my $net1 = shift; my $net2 = shift; my @L = $net1->leaves; my $dist = 0; for (my $i = 0; $i < @L; $i++) { for (my $j = $i+1; $j < @L; $j++) { my $lcsa1 = LCSA($net1,$L[$i],$L[$j]); my $lcsa2 = LCSA($net2,$L[$i],$L[$j]); my @p1 = $net1->graph->SP_Dijkstra($lcsa1,$L[$i]); my @p2 = $net2->graph->SP_Dijkstra($lcsa2,$L[$i]); my $d1 = @p1 - 1; my $d2 = @p2 - 1; $dist += abs($d1 - $d2); @p1 = $net1->graph->SP_Dijkstra($lcsa1,$L[$j]); @p2 = $net2->graph->SP_Dijkstra($lcsa2,$L[$j]); $d1 = @p1 - 1; $d2 = @p2 - 1; $dist += abs($d1 - $d2); } } return $dist; } |
library(igraph) nodal.distance <- function (net1,net2) { l1 <- V(net1)[which(degree(net1,mode="out")==0)-1] n1 <- matrix(0,nrow=length(l1),ncol=length(l1), dimnames=list(sort(l1$name),sort(l1$name))) l2 <- V(net2)[which(degree(net2,mode="out")==0)-1] n2 <- matrix(0,nrow=length(l2),ncol=length(l2), dimnames=list(sort(l2$name),sort(l2$name))) for (i in l1) { for (j in c(l1)[match(i,l1):length(l1)]) { lcsa <- LCSA(net1,i,j) paths <- get.shortest.paths(net1,lcsa,i,mode="out") n1[V(net1)[i]$name,V(net1)[j]$name] <- length(paths[[1]])-1 paths <- get.shortest.paths(net1,lcsa,j,mode="out") n1[V(net1)[j]$name,V(net1)[i]$name] <- length(paths[[1]])-1 } } for (i in l2) { for (j in c(l2)[match(i,l2):length(l2)]) { lcsa <- LCSA(net2,i,j) paths <- get.shortest.paths(net2,lcsa,i,mode="out") n2[V(net2)[i]$name,V(net2)[j]$name] <- length(paths[[1]])-1 paths <- get.shortest.paths(net2,lcsa,j,mode="out") n2[V(net2)[j]$name,V(net2)[i]$name] <- length(paths[[1]])-1 } } sum(abs(n1-n2)) } |
sub explode_rec { my ($self,$trees) = @_; my @h = $self->hybrid_nodes; if (scalar @h) { my $v = shift @h; for my $u ($self->{graph}->predecessors($v)) { $self->{graph}->delete_edge($u,$v); $self->explode_rec($trees); $self->{graph}->add_edge($u,$v); } } else { my $io = IO::String->new($self->eNewick); my $treeio = Bio::TreeIO->new(-format => 'newick', -fh => $io); my $tree = $treeio->next_tree; $tree->contract_linear_paths; push @{$trees}, $tree; } } sub explode { my ($self) = @_; my @trees; $self->explode_rec(\@trees); return @trees; } |
network.to.newick <- function (net) { r <- V(net)[which(degree(net,mode="in")==0)-1] newick <- obtain.newick(net,r,c()) paste(c(newick,";"),collapse="") } obtain.newick <- function (net,v,newick) { if (length(V(net)[nei(v,"out")]) != 0) { if ( length(V(net)[nei(v,"in")]) != 0 && match(v,V(net)[nei(V(net)[nei(v,"in")],"out")]) != 1 ) newick <- c(newick,",") newick <- c(newick,"(") } for (w in V(net)[nei(v,"out")]) { newick <- obtain.newick(net,w,newick) } if (length(V(net)[nei(v,"out")]) != 0) newick <- c(newick,")") if ( length(V(net)[nei(v,"in")]) != 0 && length(V(net)[nei(v,"out")]) == 0 && match(v,V(net)[nei(V(net)[nei(v,"in")],"out")]) != 1 ) newick <- c(newick,",") c(newick,V(net)[v]$name) } network.to.tree <- function (net) { V(net)$del <- FALSE E(net)$del <- FALSE r <- V(net)[which(degree(net,mode="in")==0)-1] net <- postorder.traversal(net,r) net <- delete.vertices(net,V(net)[del]) net <- delete.edges(net,E(net)[del]) network.to.newick(net) } postorder.traversal <- function (net,v) { for (w in V(net)[nei(v,"out")]) { net <- postorder.traversal(net,w) } VW <- E(net)[from == v & del==FALSE] if (length(c(VW)) == 1) { u <- V(net)[nei(v,"in")] w <- get.edge(net,VW)[2] E(net,path=c(V(net)[u],V(net)[v]))$del <- TRUE E(net,path=c(V(net)[v],V(net)[w]))$del <- TRUE V(net)[v]$del <- TRUE net <- add.edges(net,c(V(net)[u],V(net)[w])) E(net,path=c(V(net)[u],V(net)[w]))$del <- FALSE } net } explode <- function (net) { trees <- c() H <- V(net)[which(degree(net,mode="in")>1)-1] if (length(H) == 0) { trees <- c(trees,network.to.tree(net)) } else { v <- c(H)[1] for (u in V(net)[nei(v,"in")]) { new <- delete.edges(net,E(net,P=c(u,v))) trees <- c(trees,explode(new)) } } trees } |
sub error_ratio { my $net1 = shift; my $net2 = shift; my @trees1 = $net1->explode; my @trees2 = $net2->explode; my $FN = 0; my $FP = 0; for my $tree1 (@trees1) { my $found = 0; # false for my $tree2 (@trees2) { if (partition_distance($tree1,$tree2) == 0) { $found = 1; # true } } if (!$found) { $FN++; } } for my $tree2 (@trees2) { my $found = 0; # false for my $tree1 (@trees1) { if (partition_distance($tree1,$tree2) == 0) { $found = 1; # true } } if (!$found) { $FP++; } } return (($FN/@trees1)+($FP/@trees2))/2; } |
error.rate <- function (net1,net2) { trees1 <- explode(net1) trees2 <- explode(net2) FN <- 0 FP <- 0 for (t1 in trees1) { tree1 <- read.tree(text=t1) found <- FALSE for (t2 in trees2) { tree2 <- read.tree(text=t2) if (dist.topo(tree1,tree2) == 0) { found <- TRUE break } } if (!found) FN <- FN+1 } for (t2 in trees2) { tree2 <- read.tree(text=t2) found <- FALSE for (t1 in trees1) { tree1 <- read.tree(text=t1) if (dist.topo(tree1,tree2) == 0) { found <- TRUE break } } if (!found) FP <- FP+1 } (FN/length(trees1)+FP/length(trees2))/2 } |
sub triplet { my ($net,@triple) = @_; my ($i,$j,$k) = @triple; my @L = $net->leaves; my $ij = LCSA($net,$L[$i],$L[$j]); my $ik = LCSA($net,$L[$i],$L[$k]); my $jk = LCSA($net,$L[$j],$L[$k]); my $t; if ($net->graph->SP_Dijkstra($ij,$ik)) { if ($net->graph->SP_Dijkstra($ik,$ij)) { $t = "(($L[$j],$L[$k]),$L[$i]);"; } else { if ($net->graph->SP_Dijkstra($jk,$ij)) { $t = "(($L[$i],$L[$k]),$L[$j]);"; } else { $t = "(($L[$i],($L[$k])#H1),(#H1,$L[$j]));"; } } } else { if ($net->graph->SP_Dijkstra($ik,$ij)) { if ($net->graph->SP_Dijkstra($jk,$ij)) { $t = "(($L[$i],$L[$j]),$L[$k]);"; } else { $t = "(($L[$i],($L[$j])#H1),(#H1,$L[$k]));"; } } else { $t = "(($L[$j],($L[$i])#H1),(#H1,$L[$k]));"; } } return $t; } |
triplet <- function (net,i,j,k) { ij <- LCSA(net,i,j) ik <- LCSA(net,i,k) jk <- LCSA(net,j,k) ij.ancestor.of.ik <- length(get.all.shortest.paths(net, ij,ik,mode="out")) != 0 ik.ancestor.of.ij <- length(get.all.shortest.paths(net, ik,ij,mode="out")) != 0 ij.ancestor.of.jk <- length(get.all.shortest.paths(net, ij,jk,mode="out")) != 0 jk.ancestor.of.ij <- length(get.all.shortest.paths(net, jk,ij,mode="out")) != 0 if (ij.ancestor.of.ik) if (ik.ancestor.of.ij) paste("((",V(net)[j]$name,",",V(net)[k]$name,"),", V(net)[i]$name,");",sep="") else if (jk.ancestor.of.ij) paste("((",V(net)[i]$name,",",V(net)[k]$name,"),", V(net)[j]$name,");",sep="") else paste("((",V(net)[i]$name,",(",V(net)[k]$name,"))#H,(#H,", V(net)[j]$name,"));",sep="") else if (ik.ancestor.of.ij) if (jk.ancestor.of.ij) paste("((",V(net)[i]$name,",",V(net)[j]$name,"),", V(net)[k]$name,");",sep="") else paste("((",V(net)[i]$name,",(",V(net)[j]$name,"))#H,(#H,", V(net)[k]$name,"));",sep="") else paste("((",V(net)[j]$name,",(",V(net)[i]$name,"))#H,(#H,", V(net)[k]$name,"));",sep="") } |
sub triplet { my ($net,@triple) = @_; my ($i,$j,$k) = @triple; my @L = $net->leaves; my $ij = LCSA($net,$L[$i],$L[$j]); my $ik = LCSA($net,$L[$i],$L[$k]); my $jk = LCSA($net,$L[$j],$L[$k]); my @R; $R[0] = 1; $R[1] = $net->graph->SP_Dijkstra($ij,$ik) ? 1 : 0; $R[2] = $net->graph->SP_Dijkstra($ij,$jk) ? 1 : 0; $R[3] = $net->graph->SP_Dijkstra($ik,$ij) ? 1 : 0; $R[4] = 1; $R[5] = $net->graph->SP_Dijkstra($ik,$jk) ? 1 : 0; $R[6] = $net->graph->SP_Dijkstra($jk,$ij) ? 1 : 0; $R[7] = $net->graph->SP_Dijkstra($jk,$ik) ? 1 : 0; $R[8] = 1; return join "", @R; } |
triplet <- function (net,i,j,k) { ij <- LCSA(net,i,j) ik <- LCSA(net,i,k) jk <- LCSA(net,j,k) L <- c(V(net)[i]$name,V(net)[j]$name,V(net)[k]$name) R <- matrix(rep(NA,9),nrow=3,dimnames=list(L,L)) R[1,1] <- TRUE R[1,2] <- length(get.all.shortest.paths(net,ij,ik,mode="out")) != 0 R[1,3] <- length(get.all.shortest.paths(net,ij,jk,mode="out")) != 0 R[2,1] <- length(get.all.shortest.paths(net,ik,ij,mode="out")) != 0 R[2,2] <- TRUE R[2,3] <- length(get.all.shortest.paths(net,ik,jk,mode="out")) != 0 R[3,1] <- length(get.all.shortest.paths(net,jk,ij,mode="out")) != 0 R[3,2] <- length(get.all.shortest.paths(net,jk,ik,mode="out")) != 0 R[3,3] <- TRUE R } |
sub bottom_up_subgraph { my $graph = shift; my $node = shift; my $sub = Graph->new; my @Q = ($node); while (@Q) { $node = shift @Q; $sub->add_vertex($node); push @Q, $graph->successors($node); } for my $edge ($graph->edges) { if ($sub->has_vertex(@$edge[0]) && $sub->has_vertex(@$edge[1])) { $sub->add_edge(@$edge[0],@$edge[1]); } } while (my @V = grep { $sub->in_degree($_)==1 && $sub->out_degree($_)==1 } $sub->interior_vertices) { my $v = shift @V; my @U = $sub->predecessors($v); my $u = shift @U; my @W = $sub->successors($v); my $w = shift @W; $sub->add_edge($u,$w); $sub->delete_vertex($v); } return $sub; } |
bottom.up.subgraph <- function (net,i) { Q <- c(i) S <- c() while (length(Q)>0) { i <- Q[1] Q <- Q[-1] S <- c(S,i) Q <- c(Q,as.vector(V(net)[nei(i,mode="out")])) } g <- subgraph(net,unique(S)) while (length(V(g)[which(degree(g,mode="in")==1 & degree(g,mode="out")==1)-1])>0) { D <- V(g)[which(degree(g,mode="in")==1 & degree(g,mode="out")==1)-1] v <- head(c(D),n=1) # elementary node u <- V(g)[nei(v,"in")] w <- V(g)[nei(v,"out")] g <- add.edges(g,c(V(g)[u],V(g)[w])) g <- delete.vertices(g,V(g)[v]) } g } |
sub graph_isomorphic { my $g1 = shift; my $g2 = shift; my $found = 0; if ($g1->could_be_isomorphic($g2)) { my $C1 = cluster_map($g1); my $C2 = cluster_map($g2); my %X; for my $node1 ($g1->vertices) { for my $node2 ($g2->vertices) { my $lc = List::Compare->new(@{$C1}{$node1}, @{$C2}{$node2}); if ($lc->is_LequivalentR) { push @{$X{$node1}}, $node2; } } } my $node1 = undef; my %M; my @V1 = sort $g1->vertices; backtrack($g1,$g2,\%X,$node1,\%M,\@V1,\$found); } return $found; } |
|
sub backtrack { my ($g1,$g2,$X,$node1,$M,$V1,$found) = @_; my %X = %{$X}; my %M = %{$M}; my @V1 = @{$V1}; unless ($$found) { for my $prev1 (keys %M) { if ($g1->has_edge($prev1,$node1) && !$g2->has_edge($M{$prev1},$M{$node1}) || $g1->has_edge($node1,$prev1) && !$g2->has_edge($M{$node1},$M{$prev1}) || !$g1->has_edge($prev1,$node1) && $g2->has_edge($M{$prev1},$M{$node1}) || !$g1->has_edge($node1,$prev1) && $g2->has_edge($M{$node1},$M{$prev1})) { return; } } if (scalar @V1) { $node1 = shift @V1; for my $node2 (@{$X{$node1}}) { $M{$node1} = $node2; backtrack($g1,$g2,$X,$node1,\%M,\@V1,$found); $M{$node1} = undef; } } else { $$found = 1; } } } |
|
sub cluster_map { my $graph = shift; my %C; for my $node ($graph->vertices) { for my $leaf ($graph->successorless_vertices) { if ($graph->is_reachable($node,$leaf)) { push @{$C{$node}}, $leaf; } } } return \%C; } |
|
sub max_agreement_subgraph { my $net1 = shift; my $net2 = shift; my $graph1 = $net1->graph; my $graph2 = $net2->graph; my $common = Graph->new; my $size = 0; for my $node1 ($graph1->interior_vertices) { my $sub1 = subgraph($graph1,$node1); for my $node2 ($graph2->interior_vertices) { my $sub2 = subgraph($graph2,$node2); if (graph_isomorphic($sub1,$sub2)) { my $leaves1 = $sub1->successorless_vertices; if ($leaves1 > $size) { $common = $sub1; $size = $leaves1; } } } } return $common; } |
max.agreement.subgraph <- function (net1,net2) { max <- graph.empty() max.size <- 0 for (i in V(net1)) { g1 <- bottom.up.subgraph(net1,i) for (j in V(net2)) { g2 <- bottom.up.subgraph(net2,j) if (graph.isomorphic(g1,g2)) { L <- V(g1)[which(degree(g1,mode="out")==0)-1] if (length(L)>max.size) { max <- g1 max.size <- length(L) } } } } max } |
sub triplets_distance { my $net1 = shift; my $net2 = shift; my $comp = Array::Compare->new; my @L = $net1->leaves; my $dist = 0; for (my $i = 0; $i < @L; $i++) { for (my $j = $i+1; $j < @L; $j++) { for (my $k = $j+1; $k < @L; $k++) { my $t1 = triplet($net1,$i,$j,$k); my $t2 = triplet($net2,$i,$j,$k); $dist += 2 unless ($t1 eq $t2); } } } return $dist; } |
triplets.distance <- function (net1,net2) { L <- sort(V(net1)[which(degree(net1,mode="out")==0)-1]$name) d <- 0 for (i in L[1:(length(L)-2)]) { i1 <- V(net1)[V(net1)$name == i] i2 <- V(net2)[V(net2)$name == i] for (j in L[(match(i,L)+1):(length(L)-1)]) { j1 <- V(net1)[V(net1)$name == j] j2 <- V(net2)[V(net2)$name == j] for (k in L[(match(j,L)+1):length(L)]) { k1 <- V(net1)[V(net1)$name == k] k2 <- V(net2)[V(net2)$name == k] t1 <- triplet(net1,i1,j1,k1) t2 <- triplet(net2,i2,j2,k2) if (!isTRUE(all.equal(t1,t2))) { d <- d + 2 } } } } d } |
sub rna_to_protein { my $rna = shift; my $protein; my $i = 0; while ($i < length($rna) - 2 && substr($rna,$i,3) ne "AUG") { # start codon $i++; } $i += 3; # skip the start codon while ($i < length($rna) - 2 && substr($rna,$i,3) ne "UAA" && substr($rna,$i,3) ne "UAG" && substr($rna,$i,3) ne "UGA") { my $codon = substr($rna,$i,3); $protein .= codon_to_amino_acid($codon); $i += 3; } return $protein; } |
|
sub codon_to_amino_acid { my $codon = shift; if ($codon eq "UUU") { return "F" } # Phe elsif ($codon eq "UUC") { return "F" } # Phe elsif ($codon eq "UUA") { return "L" } # Leu elsif ($codon eq "UUG") { return "L" } # Leu elsif ($codon eq "UCU") { return "S" } # Ser elsif ($codon eq "UCC") { return "S" } # Ser elsif ($codon eq "UCA") { return "S" } # Ser elsif ($codon eq "UCG") { return "S" } # Ser elsif ($codon eq "UAU") { return "Y" } # Tyr elsif ($codon eq "UAC") { return "Y" } # Tyr elsif ($codon eq "UAA") { return "-" } # stop elsif ($codon eq "UAG") { return "-" } # stop elsif ($codon eq "UGU") { return "C" } # Cys elsif ($codon eq "UGC") { return "C" } # Cys elsif ($codon eq "UGA") { return "-" } # stop elsif ($codon eq "UGG") { return "W" } # Trp elsif ($codon eq "CUU") { return "L" } # Leu elsif ($codon eq "CUC") { return "L" } # Leu elsif ($codon eq "CUA") { return "L" } # Leu elsif ($codon eq "CUG") { return "L" } # Leu elsif ($codon eq "CCU") { return "P" } # Pro elsif ($codon eq "CCC") { return "P" } # Pro elsif ($codon eq "CCA") { return "P" } # Pro elsif ($codon eq "CCG") { return "P" } # Pro elsif ($codon eq "CAU") { return "H" } # His elsif ($codon eq "CAC") { return "H" } # His elsif ($codon eq "CAA") { return "Q" } # Gln elsif ($codon eq "CAG") { return "Q" } # Gln elsif ($codon eq "CGU") { return "R" } # Arg elsif ($codon eq "CGC") { return "R" } # Arg elsif ($codon eq "CGA") { return "R" } # Arg elsif ($codon eq "CGG") { return "R" } # Arg elsif ($codon eq "AUU") { return "I" } # Ile elsif ($codon eq "AUC") { return "I" } # Ile elsif ($codon eq "AUA") { return "I" } # Ile elsif ($codon eq "AUG") { return "M" } # Met elsif ($codon eq "ACU") { return "T" } # Thr elsif ($codon eq "ACC") { return "T" } # Thr elsif ($codon eq "ACA") { return "T" } # Thr elsif ($codon eq "ACG") { return "T" } # Thr elsif ($codon eq "AAU") { return "N" } # Asn elsif ($codon eq "AAC") { return "N" } # Asn elsif ($codon eq "AAA") { return "K" } # Lys elsif ($codon eq "AAG") { return "K" } # Lys elsif ($codon eq "AGU") { return "S" } # Ser elsif ($codon eq "AGC") { return "S" } # Ser elsif ($codon eq "AGA") { return "R" } # Arg elsif ($codon eq "AGG") { return "R" } # Arg elsif ($codon eq "GUU") { return "V" } # Val elsif ($codon eq "GUC") { return "V" } # Val elsif ($codon eq "GUA") { return "V" } # Val elsif ($codon eq "GUG") { return "V" } # Val elsif ($codon eq "GCU") { return "A" } # Ala elsif ($codon eq "GCC") { return "A" } # Ala elsif ($codon eq "GCA") { return "A" } # Ala elsif ($codon eq "GCG") { return "A" } # Ala elsif ($codon eq "GAU") { return "D" } # Asp elsif ($codon eq "GAC") { return "D" } # Asp elsif ($codon eq "GAA") { return "E" } # Glu elsif ($codon eq "GAG") { return "E" } # Glu elsif ($codon eq "GGU") { return "G" } # Gly elsif ($codon eq "GGC") { return "G" } # Gly elsif ($codon eq "GGA") { return "G" } # Gly elsif ($codon eq "GGG") { return "G" } # Gly else { return "*" } } |
|
my %codon2aa = ( "UUU" => "F", # Phe "UUC" => "F", # Phe "UUA" => "L", # Leu "UUG" => "L", # Leu "UCU" => "S", # Ser "UCC" => "S", # Ser "UCA" => "S", # Ser "UCG" => "S", # Ser "UAU" => "Y", # Tyr "UAC" => "Y", # Tyr "UAA" => "-", # stop "UAG" => "-", # stop "UGU" => "C", # Cys "UGC" => "C", # Cys "UGA" => "-", # stop "UGG" => "W", # Trp "CUU" => "L", # Leu "CUC" => "L", # Leu "CUA" => "L", # Leu "CUG" => "L", # Leu "CCU" => "P", # Pro "CCC" => "P", # Pro "CCA" => "P", # Pro "CCG" => "P", # Pro "CAU" => "H", # His "CAC" => "H", # His "CAA" => "Q", # Gln "CAG" => "Q", # Gln "CGU" => "R", # Arg "CGC" => "R", # Arg "CGA" => "R", # Arg "CGG" => "R", # Arg "AUU" => "I", # Ile "AUC" => "I", # Ile "AUA" => "I", # Ile "AUG" => "M", # Met "ACU" => "T", # Thr "ACC" => "T", # Thr "ACA" => "T", # Thr "ACG" => "T", # Thr "AAU" => "N", # Asn "AAC" => "N", # Asn "AAA" => "K", # Lys "AAG" => "K", # Lys "AGU" => "S", # Ser "AGC" => "S", # Ser "AGA" => "R", # Arg "AGG" => "R", # Arg "GUU" => "V", # Val "GUC" => "V", # Val "GUA" => "V", # Val "GUG" => "V", # Val "GCU" => "A", # Ala "GCC" => "A", # Ala "GCA" => "A", # Ala "GCG" => "A", # Ala "GAU" => "D", # Asp "GAC" => "D", # Asp "GAA" => "E", # Glu "GAG" => "E", # Glu "GGU" => "G", # Gly "GGC" => "G", # Gly "GGA" => "G", # Gly "GGG" => "G" # Gly ); |
|
sub rna_to_protein { my $rna = shift; my $protein; my $aa; my $i = 0; while ($i < length($rna) - 2 && substr($rna,$i,3) ne "AUG") { # start codon $i++; } $i += 3; # skip the start codon while ($i < length($rna) - 2 && substr($rna,$i,3) ne "UAA" && substr($rna,$i,3) ne "UAG" && substr($rna,$i,3) ne "UGA") { my $codon = substr($rna,$i,3); if (defined $codon2aa{$codon}) { $aa = $codon2aa{$codon}; } else { $aa = "*"; } $protein .= $aa; $i += 3; } return $protein; } |
|
package DNA; sub reverse_complement { my $seq = shift; $seq = reverse $seq; $seq =~ tr/ACGT/TGCA/; return $seq; } package RNA; sub reverse_complement { my $seq = shift; $seq = reverse $seq; $seq =~ tr/ACGU/UGCA/; return $seq; } 1; |
|
|
rna.to.protein <- function (rna) { protein <- "" i <- 1 while (i < nchar(rna)-1 && substr(rna,i,i+2) != "AUG") { # start codon i <- i+1 } i <- i+3 # skip the start codon while (i < nchar(rna)-1 && substr(rna,i,i+2) != "UAA" && substr(rna,i,i+2) != "UAG" && substr(rna,i,i+2) != "UGA") { codon <- substr(rna,i,i+2) protein <- paste(protein, codon.to.amino.acid(codon),sep="") i <- i+3 } return(protein) } |
|
codon.to.amino.acid <- function (codon) { if (codon == "UUU") return("F") # Phe else if (codon == "UUC") return("F") # Phe else if (codon == "UUA") return("L") # Leu else if (codon == "UUG") return("L") # Leu else if (codon == "UCU") return("S") # Ser else if (codon == "UCC") return("S") # Ser else if (codon == "UCA") return("S") # Ser else if (codon == "UCG") return("S") # Ser else if (codon == "UAU") return("Y") # Tyr else if (codon == "UAC") return("Y") # Tyr else if (codon == "UAA") return("-") # stop else if (codon == "UAG") return("-") # stop else if (codon == "UGU") return("C") # Cys else if (codon == "UGC") return("C") # Cys else if (codon == "UGA") return("-") # stop else if (codon == "UGG") return("W") # Trp else if (codon == "CUU") return("L") # Leu else if (codon == "CUC") return("L") # Leu else if (codon == "CUA") return("L") # Leu else if (codon == "CUG") return("L") # Leu else if (codon == "CCU") return("P") # Pro else if (codon == "CCC") return("P") # Pro else if (codon == "CCA") return("P") # Pro else if (codon == "CCG") return("P") # Pro else if (codon == "CAU") return("H") # His else if (codon == "CAC") return("H") # His else if (codon == "CAA") return("Q") # Gln else if (codon == "CAG") return("Q") # Gln else if (codon == "CGU") return("R") # Arg else if (codon == "CGC") return("R") # Arg else if (codon == "CGA") return("R") # Arg else if (codon == "CGG") return("R") # Arg else if (codon == "AUU") return("I") # Ile else if (codon == "AUC") return("I") # Ile else if (codon == "AUA") return("I") # Ile else if (codon == "AUG") return("M") # Met else if (codon == "ACU") return("T") # Thr else if (codon == "ACC") return("T") # Thr else if (codon == "ACA") return("T") # Thr else if (codon == "ACG") return("T") # Thr else if (codon == "AAU") return("N") # Asn else if (codon == "AAC") return("N") # Asn else if (codon == "AAA") return("K") # Lys else if (codon == "AAG") return("K") # Lys else if (codon == "AGU") return("S") # Ser else if (codon == "AGC") return("S") # Ser else if (codon == "AGA") return("R") # Arg else if (codon == "AGG") return("R") # Arg else if (codon == "GUU") return("V") # Val else if (codon == "GUC") return("V") # Val else if (codon == "GUA") return("V") # Val else if (codon == "GUG") return("V") # Val else if (codon == "GCU") return("A") # Ala else if (codon == "GCC") return("A") # Ala else if (codon == "GCA") return("A") # Ala else if (codon == "GCG") return("A") # Ala else if (codon == "GAU") return("D") # Asp else if (codon == "GAC") return("D") # Asp else if (codon == "GAA") return("E") # Glu else if (codon == "GAG") return("E") # Glu else if (codon == "GGU") return("G") # Gly else if (codon == "GGC") return("G") # Gly else if (codon == "GGA") return("G") # Gly else if (codon == "GGG") return("G") # Gly else return("*") } |
|
codon2aa <- c( "UUU" = "F", # Phe "UUC" = "F", # Phe "UUA" = "L", # Leu "UUG" = "L", # Leu "UCU" = "S", # Ser "UCC" = "S", # Ser "UCA" = "S", # Ser "UCG" = "S", # Ser "UAU" = "Y", # Tyr "UAC" = "Y", # Tyr "UAA" = "-", # stop "UAG" = "-", # stop "UGU" = "C", # Cys "UGC" = "C", # Cys "UGA" = "-", # stop "UGG" = "W", # Trp "CUU" = "L", # Leu "CUC" = "L", # Leu "CUA" = "L", # Leu "CUG" = "L", # Leu "CCU" = "P", # Pro "CCC" = "P", # Pro "CCA" = "P", # Pro "CCG" = "P", # Pro "CAU" = "H", # His "CAC" = "H", # His "CAA" = "Q", # Gln "CAG" = "Q", # Gln "CGU" = "R", # Arg "CGC" = "R", # Arg "CGA" = "R", # Arg "CGG" = "R", # Arg "AUU" = "I", # Ile "AUC" = "I", # Ile "AUA" = "I", # Ile "AUG" = "M", # Met "ACU" = "T", # Thr "ACC" = "T", # Thr "ACA" = "T", # Thr "ACG" = "T", # Thr "AAU" = "N", # Asn "AAC" = "N", # Asn "AAA" = "K", # Lys "AAG" = "K", # Lys "AGU" = "S", # Ser "AGC" = "S", # Ser "AGA" = "R", # Arg "AGG" = "R", # Arg "GUU" = "V", # Val "GUC" = "V", # Val "GUA" = "V", # Val "GUG" = "V", # Val "GCU" = "A", # Ala "GCC" = "A", # Ala "GCA" = "A", # Ala "GCG" = "A", # Ala "GAU" = "D", # Asp "GAC" = "D", # Asp "GAA" = "E", # Glu "GAG" = "E", # Glu "GGU" = "G", # Gly "GGC" = "G", # Gly "GGA" = "G", # Gly "GGG" = "G" # Gly ) |
|
rna.to.protein <- function (rna) { protein <- "" i <- 1 while (i < nchar(rna)-1 && substr(rna,i,i+2) != "AUG") { # start codon i <- i+1 } i <- i+3 # skip the start codon while (i < nchar(rna)-1 && substr(rna,i,i+2) != "UAA" && substr(rna,i,i+2) != "UAG" && substr(rna,i,i+2) != "UGA") { codon <- substr(rna,i,i+2) if (is.na(codon2aa[codon])) { aa <- "*" } else { aa <- codon2aa[codon] } protein <- paste(protein,aa,sep="") i <- i+3 } return(protein) } |
|
reverse.complement <- function (seq) UseMethod("reverse.complement") reverse.complement.dna <- function (seq) { rev <- paste(rev(unlist(strsplit(seq,split=""))),sep="",collapse="") chartr("ACGT","TGCA",rev) } reverse.complement.rna <- function (seq) { rev <- paste(rev(unlist(strsplit(seq,split=""))),sep="",collapse="") chartr("ACGU","UGCA",rev) } |