The short version
Save file as "entropy2"
#!/bin/bashL=`head -1 $2 | wc -c | awk '{print $1}' | sed 's/.*/&-1/' | bc`x=`expr $L - $1 + 1`n=`cat $2 |wc -l `w=$1for i in `seq $x`
doj=`expr $i + $w - 1`
cat $2 | cut -c$i-$j| sort | uniq -cecho -n "Position=$i "cat $2 | cut -c$i-$j| sort | uniq -c | awk -v n="$n" '{ h=( -1 / n * log(2)) * ( log($1) - log(n) ); H=h+H } END {print "Entropy=" H }'echo -------done
Example input file: input.seq
ASIFKHANT
BSIFKHANT
CTIFKHANT
DSIFKHANT
BSIFKHANT
DSAFKHANT
$ chmod +x entropy2
$ entropy2 2 input.seq
1 AS
2 BS
1 CT
2 DS
Position=1 Entropy=0.667818
-------
1 SA
4 SI
1 TI
Position=2 Entropy=0.460826
-------
1 AF
5 IF
Position=3 Entropy=0.228055
The long version
#!/bin/bash# Usage: prog # this program computes the Entropy of window size w for a# multiple sequence alignment with gaps marked with "-" only.# Date: 22 April 2009# ToDo:# does not trap missing $1# does not handle blank or missing data or missing line# works for windows = 1 onwards. w cannot be = or less than 0.# need to check the equation for any errors# assumes that the data input is well formated according to msa output# does not trap data input errors and may still generate errorneous output# does not cater for description tag of the clustal MSA output# only handles one catenated sequence per line
input=$2# Assumes the first line is the longest# L = number of column positions to be computedL=`head -1 $2 | wc -c | awk '{print $1}' | sed 's/.*/&-1/' | bc`
# x - number of windowsx=`expr $L - $1 + 1`echo Window Size: $1 sizeecho No of Windows: $x windows# number of sequences (=rows, since only one cateneted sequence per line allowed)n=`cat $input |wc -l `echo No of Lines: $n linesw=$1
# i = current residue position beginfor i in `seq $x`do
# i = current residue position endj=`expr $i + $w - 1`
# print aligned sequences within window and count themcat $input | cut -c$i-$j| sort | uniq -c
# Report position numberecho -n "Position=$i "# Compute Entropy in bits of these sequencescat $input | cut -c$i-$j| sort | uniq -c | awk -v n="$n" '{ h=( -1 / n * log(2)) * ( log($1) - log(n) ); H=h+H } END {print "Entropy=" H }'echo -------done
No comments:
Post a Comment