一篇由National Cancer Institute (NCI)的Stephen Chanock领导完成。将 31,717个癌症患者和26,136健康人血液或口腔拭子的SNP芯片数据分析比较。健康人中染色体复制异常的比率随着年龄而增长,50岁以下0.23%,到了70多岁就飙到了1.91%。平均下来,癌症患者中染色体复制异常的比率比健康者高一些(0.97% VS 0.74%)。有些白血病患者被诊断出来前一年的血液样本中的染色体复制异常也比健康人普遍。
#!/bin/perlusestrict;useBio::SeqIO;my$file=shift;# get the file name, somehowmy$seqio_object=Bio::SeqIO->new(-file=>$file);my$seq_object=$seqio_object->next_seq;
# first, bring in the SeqIO moduleuseBio::SeqIO;# Notice that you do not have to use any Bio:SeqI# objects, because SeqIO does this for you. In fact, it# even knows which SeqI object to use for the provided# format.# Bring in the file and format, or die with a nice# usage statement if one or both arguments are missing.my$usage="getaccs.pl file format\n";my$file=shiftordie$usage;my$format=shiftordie$usage;# Now create a new SeqIO object to bring in the input# file. The new method takes arguments in the format# key => value, key => value. The basic keys that it# can accept values for are '-file' which expects some# information on how to access your data, and '-format'# which expects one of the Bioperl-format-labels mentioned# above. Although it is optional, it is good# programming practice to provide > and < in front of any# filenames provided in the -file parameter. This makes the# resulting filehandle created by SeqIO explicitly read (<)# or write(>). It will definitely help others reading your# code understand the function of the SeqIO object.my$inseq=Bio::SeqIO->new(-file=>"<$file",-format=>$format,);# Now that we have a seq stream,# we need to tell it to give us a $seq.# We do this using the 'next_seq' method of SeqIO.while(my$seq=$inseq->next_seq){print$seq->accession_number,"\n";}
usestrict;useBio::SeqIO;my$input_file=shift;my$seq_in=Bio::SeqIO->new(-format=>'embl',-file=>$input_file,);# loads the whole file into memory - be careful# if this is a big file, then this script will# use a lot of memorymy$seq;my@seq_array;while($seq=$seq_in->next_seq()){push(@seq_array,$seq);}# now do something with these. First sort by length,# find the average and median lengths and print them out@seq_array=sort{$a->length<=>$b->length}@seq_array;my$total=0;my$count=0;foreachmy$seq(@seq_array){$total+=$seq->length;$count++;}print"Mean length ",$total/$count," Median ",$seq_array[$count/2]->length,"\n";
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="x2y.pl infile infileformat outfile outfileformat\n";my$infile=shiftordie$usage;my$infileformat=shiftordie$usage;my$outfile=shiftordie$usage;my$outfileformat=shiftordie$usage;# create one SeqIO object to read in,and another to write outmy$seq_in=Bio::SeqIO->new(-file=>"<$infile",-format=>$infileformat,);my$seq_out=Bio::SeqIO->new(-file=>">$outfile",-format=>$outfileformat,);# write each entry in the input file to the output filewhile(my$inseq=$seq_in->next_seq){$seq_out->write_seq($inseq);}
可以将$seq_in和$seq_out想象成两个特殊的文件句柄,并且这个文件句柄“知道”序列及其格式。用文件句柄时一般用类似<F>的操作符,而$seq_in和$seq_out则使用next_seq()方法来读取或输出序列对象,如用“$seqio->write_seq($seq_object)”相对于“print F $line”。
>cat myseqs.fa | all2y.pl fasta newseqs.gb genbank
其代码如下:
12345678910111213141516171819202122
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="all2y.pl informat outfile outfileformat\n";my$informat=shiftordie$usage;my$outfile=shiftordie$usage;my$outformat=shiftordie$usage;# create one SeqIO object to read in, and another to write out# *STDIN is a 'globbed' filehandle with the contents of Standard Inmy$seqin=Bio::SeqIO->new(-fh=>\*STDIN,-format=>$informat,);my$seqout=Bio::SeqIO->new(-file=>">$outfile",-format=>$outformat,);# write each entry in the input file to the output filewhile(my$inseq=$seqin->next_seq){$seqout->write_seq($inseq);}
cat *.seq | in2out.pl EMBL Genbank | someother program
代码如下:
1234567891011121314151617181920
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="in2out.pl informat outformat\n";my$informat=shiftordie$usage;my$outformat=shiftordie$usage;# create one SeqIO object to read in, and another to write outmy$seqin=Bio::SeqIO->new(-fh=>\*STDIN,-format=>$informat,);my$outseq=Bio::SeqIO->new(-fh=>\*STDOUT,-format=>$outformat,);# write each entry in the input to the outputwhile(my$inseq=$seqin->next_seq){$outseq->write_seq($inseq);}
useIO::String;# only needed for Perl versions previous to 5.8.0useBio::SeqIO;## get a string into $string somehow, with its format in $format, say from a web form.my$string=">SEQ1\nacgt\n>revseq1\ntgca\n";my$format="fasta";my$stringfh=IO::String->new($string);# Use this for Perl BEFORE 5.8.0open($stringfh,"<",\$string)ordie"Could not open string for reading: $!";# Use this for Perl AFTER 5.8.0 (inclusive)my$seqio=Bio::SeqIO->new(-fh=>$stringfh,-format=>$format,);while(my$seq=$seqio->next_seq){# process each seqprint$seq->id.' = '.$seq->seq()."\n";}
useIO::String;# only needed for Perl versions BEFORE 5.8.0useBio::SeqIO;my$string;my$stringfh=IO::String->new(\$string);# Use this for Perl BEFORE 5.8.0open($stringfh,">",\$string)ordie"Could not open string for writing: $!";# Use this for Perl AFTER 5.8.0 (inclusive)my$seqOut=Bio::SeqIO->new(-format=>'swiss',-fh=>$io,);$seqOut->write_seq($seq_obj);print$string;
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="gzip2fasta.pl infile informat outfile\n";my$infile=shiftordie$usage;my$informat=shiftordie$usage;my$outfile=shiftordie$usage;# create one SeqIO object to read in, and another to write outmy$seqin=Bio::SeqIO->new(-file=>"/usr/local/bin/gunzip -c $infile |",-format=>$informat,);my$seqout=Bio::SeqIO->new(-file=>">$outfile",-format=>'Fasta',);# write each entry in the input to the output filewhile(my$inseq=$seqin->next_seq){$seqout->write_seq($inseq);}
any2wublastable.pl myfile.gb Genbank mywublastable p
any2wublastable.pl的代码:
1234567891011121314151617181920212223
useBio::SeqIO;# get command-line arguments, or die with a usage statementmy$usage="any2wublastable.pl infile informat outdbname outdbtype\n";my$infile=shiftordie$usage;my$informat=shiftordie$usage;my$outdbname=shiftordie$usage;my$outdbtype=shiftordie$usage;# create one SeqIO object to read in, and another to write outmy$seqin=Bio::SeqIO->new(-file=>"<$infile",-format=>$informat,);my$seqout=Bio::SeqIO->new(-file=>"| /usr/local/bin/xdformat -o $outdbname -${outdbtype} -- -",-format=>'Fasta',);# write each entry in the input to the outputwhile(my$inseq=$seqin->next_seq){$seqout->write_seq($inseq);}
useBio::SeqIO;# get command-line argument, or die with a usage statementmy$usage="splitgb.pl infile\n";my$infile=shiftordie$usage;my$inseq=Bio::SeqIO->new(-file=>"<$infile",-format=>'Genbank',);my%outfiles=('human'=>Bio::SeqIO->new(-file=>'>human.gb',-format=>'Genbank',),'other'=>Bio::SeqIO->new(-file=>'>other.gb',-format=>'Genbank',),);while(my$seqin=$inseq->next_seq){# here we make use of the species attribute, which returns a# species object, which has a binomial attribute that# holds the binomial species name of the source of the sequenceif($seqin->species->binomial=~m/Homo sapiens/){$outfiles{'human'}->write_seq($seqin);}else{$outfiles{'other'}->write_seq($seqin);}}
useBio::SeqIO;# get command-line argument, or die with a usage statementmy$usage="splitgb.pl infile\n";my$infile=shiftordie$usage;my$inseq=Bio::SeqIO->new(-file=>"<$infile",-format=>'Genbank',);my%outfiles=(human=>{Genbank=>Bio::SeqIO->new(-file=>'>human.gb',-format=>'Genbank',),Fasta=>Bio::SeqIO->new(-file=>'>human.fa',-format=>'Fasta',),},other=>{Genbank=>Bio::SeqIO->new(-file=>'>other.gb',-format=>'Genbank',),Fasta=>Bio::SeqIO->new(-file=>'>other.fa',-format=>'Fasta',),});while(my$seqin=$inseq->next_seq){if($seqin->species->binomial=~m/Homo sapiens/){$outfiles{'human'}->{'Genbank'}->write_seq($seqin);$outfiles{'human'}->{'Fasta'}->write_seq($seqin);}else{$outfiles{'other'}->{'Genbank'}->write_seq($seqin);$outfiles{'other'}->{'Fasta'}->write_seq($seqin);}}
usestrict;useBio::SeqIO;my$input_file=shift;my$output_file=shift;# we have to declare $seq_in and $seq_out before# the eval block as we want to use them afterwardsmy$seq_in;my$seq_out;eval{$seq_in=Bio::SeqIO->new(-format=>'genbank',-file=>$input_file,);$seq_out=Bio::SeqIO->new(-format=>'fasta',-file=>">$output_file",);};if($@){# an error occurredprint"Was not able to open files, sorry!\n";print"Full error is\n\n$@\n";exit(-1);}my$seq;while($seq=$seq_in->next_seq()){$seq_out->write_seq($seq);}