#!/bin/perlusestrict;useBio::SeqIO;my$file=shift;# get the file name, somehowmy$seqio_object=Bio::SeqIO->new(-file=>$file);my$seq_object=$seqio_object->next_seq;
# first, bring in the SeqIO moduleuseBio::SeqIO;# Notice that you do not have to use any Bio:SeqI# objects, because SeqIO does this for you. In fact, it# even knows which SeqI object to use for the provided# format.# Bring in the file and format, or die with a nice# usage statement if one or both arguments are missing.my$usage="getaccs.pl file format\n";my$file=shiftordie$usage;my$format=shiftordie$usage;# Now create a new SeqIO object to bring in the input# file. The new method takes arguments in the format# key => value, key => value. The basic keys that it# can accept values for are '-file' which expects some# information on how to access your data, and '-format'# which expects one of the Bioperl-format-labels mentioned# above. Although it is optional, it is good# programming practice to provide > and < in front of any# filenames provided in the -file parameter. This makes the# resulting filehandle created by SeqIO explicitly read (<)# or write(>). It will definitely help others reading your# code understand the function of the SeqIO object.my$inseq=Bio::SeqIO->new(-file=>"<$file",-format=>$format,);# Now that we have a seq stream,# we need to tell it to give us a $seq.# We do this using the 'next_seq' method of SeqIO.while(my$seq=$inseq->next_seq){print$seq->accession_number,"\n";}
usestrict;useBio::SeqIO;my$input_file=shift;my$seq_in=Bio::SeqIO->new(-format=>'embl',-file=>$input_file,);# loads the whole file into memory - be careful# if this is a big file, then this script will# use a lot of memorymy$seq;my@seq_array;while($seq=$seq_in->next_seq()){push(@seq_array,$seq);}# now do something with these. First sort by length,# find the average and median lengths and print them out@seq_array=sort{$a->length<=>$b->length}@seq_array;my$total=0;my$count=0;foreachmy$seq(@seq_array){$total+=$seq->length;$count++;}print"Mean length ",$total/$count," Median ",$seq_array[$count/2]->length,"\n";
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="x2y.pl infile infileformat outfile outfileformat\n";my$infile=shiftordie$usage;my$infileformat=shiftordie$usage;my$outfile=shiftordie$usage;my$outfileformat=shiftordie$usage;# create one SeqIO object to read in,and another to write outmy$seq_in=Bio::SeqIO->new(-file=>"<$infile",-format=>$infileformat,);my$seq_out=Bio::SeqIO->new(-file=>">$outfile",-format=>$outfileformat,);# write each entry in the input file to the output filewhile(my$inseq=$seq_in->next_seq){$seq_out->write_seq($inseq);}
可以将$seq_in和$seq_out想象成两个特殊的文件句柄,并且这个文件句柄“知道”序列及其格式。用文件句柄时一般用类似<F>的操作符,而$seq_in和$seq_out则使用next_seq()方法来读取或输出序列对象,如用“$seqio->write_seq($seq_object)”相对于“print F $line”。
>cat myseqs.fa | all2y.pl fasta newseqs.gb genbank
其代码如下:
12345678910111213141516171819202122
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="all2y.pl informat outfile outfileformat\n";my$informat=shiftordie$usage;my$outfile=shiftordie$usage;my$outformat=shiftordie$usage;# create one SeqIO object to read in, and another to write out# *STDIN is a 'globbed' filehandle with the contents of Standard Inmy$seqin=Bio::SeqIO->new(-fh=>\*STDIN,-format=>$informat,);my$seqout=Bio::SeqIO->new(-file=>">$outfile",-format=>$outformat,);# write each entry in the input file to the output filewhile(my$inseq=$seqin->next_seq){$seqout->write_seq($inseq);}
cat *.seq | in2out.pl EMBL Genbank | someother program
代码如下:
1234567891011121314151617181920
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="in2out.pl informat outformat\n";my$informat=shiftordie$usage;my$outformat=shiftordie$usage;# create one SeqIO object to read in, and another to write outmy$seqin=Bio::SeqIO->new(-fh=>\*STDIN,-format=>$informat,);my$outseq=Bio::SeqIO->new(-fh=>\*STDOUT,-format=>$outformat,);# write each entry in the input to the outputwhile(my$inseq=$seqin->next_seq){$outseq->write_seq($inseq);}
useIO::String;# only needed for Perl versions previous to 5.8.0useBio::SeqIO;## get a string into $string somehow, with its format in $format, say from a web form.my$string=">SEQ1\nacgt\n>revseq1\ntgca\n";my$format="fasta";my$stringfh=IO::String->new($string);# Use this for Perl BEFORE 5.8.0open($stringfh,"<",\$string)ordie"Could not open string for reading: $!";# Use this for Perl AFTER 5.8.0 (inclusive)my$seqio=Bio::SeqIO->new(-fh=>$stringfh,-format=>$format,);while(my$seq=$seqio->next_seq){# process each seqprint$seq->id.' = '.$seq->seq()."\n";}
useIO::String;# only needed for Perl versions BEFORE 5.8.0useBio::SeqIO;my$string;my$stringfh=IO::String->new(\$string);# Use this for Perl BEFORE 5.8.0open($stringfh,">",\$string)ordie"Could not open string for writing: $!";# Use this for Perl AFTER 5.8.0 (inclusive)my$seqOut=Bio::SeqIO->new(-format=>'swiss',-fh=>$io,);$seqOut->write_seq($seq_obj);print$string;
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="gzip2fasta.pl infile informat outfile\n";my$infile=shiftordie$usage;my$informat=shiftordie$usage;my$outfile=shiftordie$usage;# create one SeqIO object to read in, and another to write outmy$seqin=Bio::SeqIO->new(-file=>"/usr/local/bin/gunzip -c $infile |",-format=>$informat,);my$seqout=Bio::SeqIO->new(-file=>">$outfile",-format=>'Fasta',);# write each entry in the input to the output filewhile(my$inseq=$seqin->next_seq){$seqout->write_seq($inseq);}
any2wublastable.pl myfile.gb Genbank mywublastable p
any2wublastable.pl的代码:
1234567891011121314151617181920212223
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="any2wublastable.pl infile informat outdbname outdbtype\n";my$infile=shiftordie$usage;my$informat=shiftordie$usage;my$outdbname=shiftordie$usage;my$outdbtype=shiftordie$usage;# create one SeqIO object to read in, and another to write outmy$seqin=Bio::SeqIO->new(-file=>"<$infile",-format=>$informat,);my$seqout=Bio::SeqIO->new(-file=>"| /usr/local/bin/xdformat -o $outdbname -${outdbtype} -- -",-format=>'Fasta',);# write each entry in the input to the outputwhile(my$inseq=$seqin->next_seq){$seqout->write_seq($inseq);}
useBio::SeqIO;# get command-line argument, or die with a usage statementmy$usage="splitgb.pl infile\n";my$infile=shiftordie$usage;my$inseq=Bio::SeqIO->new(-file=>"<$infile",-format=>'Genbank',);my%outfiles=('human'=>Bio::SeqIO->new(-file=>'>human.gb',-format=>'Genbank',),'other'=>Bio::SeqIO->new(-file=>'>other.gb',-format=>'Genbank',),);while(my$seqin=$inseq->next_seq){# here we make use of the species attribute, which returns a# species object, which has a binomial attribute that# holds the binomial species name of the source of the sequenceif($seqin->species->binomial=~m/Homo sapiens/){$outfiles{'human'}->write_seq($seqin);}else{$outfiles{'other'}->write_seq($seqin);}}
useBio::SeqIO;# get command-line argument, or die with a usage statementmy$usage="splitgb.pl infile\n";my$infile=shiftordie$usage;my$inseq=Bio::SeqIO->new(-file=>"<$infile",-format=>'Genbank',);my%outfiles=(human=>{Genbank=>Bio::SeqIO->new(-file=>'>human.gb',-format=>'Genbank',),Fasta=>Bio::SeqIO->new(-file=>'>human.fa',-format=>'Fasta',),},other=>{Genbank=>Bio::SeqIO->new(-file=>'>other.gb',-format=>'Genbank',),Fasta=>Bio::SeqIO->new(-file=>'>other.fa',-format=>'Fasta',),});while(my$seqin=$inseq->next_seq){if($seqin->species->binomial=~m/Homo sapiens/){$outfiles{'human'}->{'Genbank'}->write_seq($seqin);$outfiles{'human'}->{'Fasta'}->write_seq($seqin);}else{$outfiles{'other'}->{'Genbank'}->write_seq($seqin);$outfiles{'other'}->{'Fasta'}->write_seq($seqin);}}
usestrict;useBio::SeqIO;my$input_file=shift;my$output_file=shift;# we have to declare $seq_in and $seq_out before# the eval block as we want to use them afterwardsmy$seq_in;my$seq_out;eval{$seq_in=Bio::SeqIO->new(-format=>'genbank',-file=>$input_file,);$seq_out=Bio::SeqIO->new(-format=>'fasta',-file=>">$output_file",);};if($@){# an error occurredprint"Was not able to open files, sorry!\n";print"Full error is\n\n$@\n";exit(-1);}my$seq;while($seq=$seq_in->next_seq()){$seq_out->write_seq($seq);}