[ensembl-dev] downloading exons

Gary Duncan gryduncan at gmail.com
Tue Dec 13 17:00:00 GMT 2011


I have the script below but it isnt quite working what did i do wrong?
Plus can you get the entire length of the gene?


use strict;
use warnings;
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::DBSQL::DBConnection;
use Bio::EnsEMBL::Utils::SqlHelper;

#Load all DBs from Ensembl Genomes
Bio::EnsEMBL::Registry->load_registry_from_db(
  -HOST => 'mysql.ebi.ac.uk',
  -PORT => 4157,
  -USER => 'anonymous'
);

my $dbc = Bio::EnsEMBL::DBSQL::DBConnection->new(
  -HOST => 'localhost',
  -PORT => 3306,
  -USER => 'xxxxxx',
  -PASS => 'xxxxxx',
  -DBNAME => 'xxxxxx'
);
#Holds a soft reference to the DBConnection so we cannot combine the two
news
my $h = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dbc);

#Cleanup schema using do()
$dbc->do('DROP TABLE IF EXISTS exon');
$dbc->do(<<'SQL');
CREATE TABLE exon (
  idgld int(10) NOT NULL auto_increment,
   stbl_id varchar(12),
  seq_start varchar(12),
  seq_stop varchar(12),
  sequence text,
  PRIMARY KEY(idgld)#)
SQL

#File management
my $file = '/home/glduncan/data/intron.txt';
if(-f $file) {
  unlink $file or die "Cannot remove file '${file}': $!";
}
 open my $out, '>', $file or die "Cannot open ${file} for writing: $!";

my $ids_file = '/home/glduncan/data/three4.txt';
open my $fh, '<', $ids_file or die "Failed. Cannot open '${ids_file}' for
reading: $!";
my $i = 1;

#Get the adaptors once
my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor('Dictyostelium discoideum',
'core');
my $gene_adaptor = $dba->get_GeneAdaptor();

my @data;

while(my $stable_id = <$fh>){
    chomp $stable_id;
    my $gene = $gene_adaptor->fetch_by_stable_id($stable_id);
    foreach my $transcript (@{$gene->get_all_Transcripts()}) {
      # print $out $transcript->stable_id(), "\n";
        foreach my $exons (@{$transcript->get_all_Exons}) {
        print $out $exons->seq_region_start.' - '.$exons->seq_region_end.'
- '.$exons->seq. "\n";
          #Store into a temporary array
          push(@data, [
            $transcript->stable_id(),
            $exons->seq_region_start(),
            $exons->seq_region_end(),
            $exons->seq()
          ]);
        }
    }
}
close($fh) or die "Cannot close input file handle: $!";

#Use the batch command from the helper to insert the data using only one
prepared statement
my $dml = <<'SQL';
INSERT INTO exon (stbl_id, seq_start, seq_stop, sequence)
VALUES (?,?,?,?)
SQL
my $affected_rows = $h->batch(-DATA => \@data, -SQL => $dml);
foreach my $row(@data){
    foreach my $value(@$row){
    print $value,'\n';
}
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.ensembl.org/pipermail/dev_ensembl.org/attachments/20111213/00501bcc/attachment.html>


More information about the Dev mailing list