1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
| #!/usr/bin/perl
# Parse alignments from tBLASTn output file
use strict;
use warnings;
print "query\tquery_length\thit\tscore\texpect\tidentity\tquery_strand\tsubject_strand\tcoverage_query\tcoverage_subject\tquery_start\tquery_end\tsubject_start\tsubject_end\n";
my $input=$ARGV[0];
my ($eachline,$query_name,$query_length,$hit_name,$hsp_num);
my ($query_start_temp,$query_end_temp,$subject_start_temp,$subject_end_temp);
my ($query_start,$query_end,$subject_start,$subject_end,$query_seq,$subject_seq,$query_seq_temp,$subject_seq_temp);
my ($score,$expect,$coverage_query,$coverage_subject,$identity_query,$identity_subject,$identity,$query_strand,$subject_strand);
$hsp_num=0;
open(IN,"<$input") or die ("no file $input !");
while (defined($eachline=<IN>)){
$eachline=~s/[\r\n]//g;
if($eachline=~/^Query= /){
($query_name)=($eachline=~/^Query= (\S+)/);
$eachline=<IN>;
($query_length)=($eachline=~/\((\S+) letters\)/);
$query_length=~s/,//g;
}
if($eachline=~/^>/){
($hit_name)=($eachline=~/^>(\S+)/);
}
if($eachline=~/^ Score/){
($score,$expect)=($eachline =~ /Score = (.*) bits.*Expect.* = (\S+),/);
$hsp_num++;
if($hsp_num>1){
$coverage_query=$query_end-$query_start+1;
$coverage_subject=$subject_end-$subject_start+1;
print "$coverage_query\t$coverage_subject\t$query_start\t$query_end\t$subject_start\t$subject_end\t$query_seq\t$subject_seq\n";
}
$query_start=1000000000;$query_end=0;
$subject_start=1000000000;$subject_end=0;
$query_seq='';$subject_seq='';
print "$query_name\t$query_length\t$hit_name\t";
print "$score\t$expect\t";
}
if($eachline=~/^ Identities/){
($identity_query,$identity_subject,$identity)=($eachline =~ /Identities = (\d+)\/(\d+) \((\d+)%\)/);
print "$identity\t";
}
if($eachline=~/^ Frame = /){
if($eachline =~ / Frame = -/){$subject_strand='Minus';}elsif($eachline =~ / Frame = +/){$subject_strand='Plus';}
$query_strand='Plus';
print "$query_strand\t$subject_strand\t";
}
if($eachline=~/^Query: (\d+)/){
if($query_strand eq 'Plus'){($query_start_temp,$query_seq_temp,$query_end_temp)=($eachline =~ /(\d+)(\D+)(\d+)/);}
if($query_strand eq 'Minus'){($query_end_temp,$query_seq_temp,$query_start_temp)=($eachline =~ /(\d+)(\D+)(\d+)/);}
$query_seq_temp=~s/[ -]//g;$query_seq.=$query_seq_temp;
if($query_start>$query_start_temp){$query_start=$query_start_temp;}
if($query_end<$query_end_temp){$query_end=$query_end_temp;}
}
if($eachline=~/^Sbjct: (\d+)/){
if($subject_strand eq 'Plus'){($subject_start_temp,$subject_seq_temp,$subject_end_temp)=($eachline =~ /(\d+)(\D+)(\d+)/);}
if($subject_strand eq 'Minus'){($subject_end_temp,$subject_seq_temp,$subject_start_temp)=($eachline =~ /(\d+)(\D+)(\d+)/);}
$subject_seq_temp=~s/[ -]//g;$subject_seq.=$subject_seq_temp;
if($subject_start>$subject_start_temp){$subject_start=$subject_start_temp;}
if($subject_end<$subject_end_temp){$subject_end=$subject_end_temp;}
}
if($eachline=~/^Matrix: /){
$coverage_query=$query_end-$query_start+1;
$coverage_subject=$subject_end-$subject_start+1;
print "$coverage_query\t$coverage_subject\t$query_start\t$query_end\t$subject_start\t$subject_end\t$query_seq\t$subject_seq\n";
}
}
close IN;
|