[ensembl-dev] downloading exons
Gary Duncan
gryduncan at gmail.com
Tue Dec 13 17:00:00 GMT 2011
I have the script below but it isnt quite working what did i do wrong?
Plus can you get the entire length of the gene?
use strict;
use warnings;
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::DBSQL::DBConnection;
use Bio::EnsEMBL::Utils::SqlHelper;
#Load all DBs from Ensembl Genomes
Bio::EnsEMBL::Registry->load_registry_from_db(
-HOST => 'mysql.ebi.ac.uk',
-PORT => 4157,
-USER => 'anonymous'
);
my $dbc = Bio::EnsEMBL::DBSQL::DBConnection->new(
-HOST => 'localhost',
-PORT => 3306,
-USER => 'xxxxxx',
-PASS => 'xxxxxx',
-DBNAME => 'xxxxxx'
);
#Holds a soft reference to the DBConnection so we cannot combine the two
news
my $h = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dbc);
#Cleanup schema using do()
$dbc->do('DROP TABLE IF EXISTS exon');
$dbc->do(<<'SQL');
CREATE TABLE exon (
idgld int(10) NOT NULL auto_increment,
stbl_id varchar(12),
seq_start varchar(12),
seq_stop varchar(12),
sequence text,
PRIMARY KEY(idgld)#)
SQL
#File management
my $file = '/home/glduncan/data/intron.txt';
if(-f $file) {
unlink $file or die "Cannot remove file '${file}': $!";
}
open my $out, '>', $file or die "Cannot open ${file} for writing: $!";
my $ids_file = '/home/glduncan/data/three4.txt';
open my $fh, '<', $ids_file or die "Failed. Cannot open '${ids_file}' for
reading: $!";
my $i = 1;
#Get the adaptors once
my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor('Dictyostelium discoideum',
'core');
my $gene_adaptor = $dba->get_GeneAdaptor();
my @data;
while(my $stable_id = <$fh>){
chomp $stable_id;
my $gene = $gene_adaptor->fetch_by_stable_id($stable_id);
foreach my $transcript (@{$gene->get_all_Transcripts()}) {
# print $out $transcript->stable_id(), "\n";
foreach my $exons (@{$transcript->get_all_Exons}) {
print $out $exons->seq_region_start.' - '.$exons->seq_region_end.'
- '.$exons->seq. "\n";
#Store into a temporary array
push(@data, [
$transcript->stable_id(),
$exons->seq_region_start(),
$exons->seq_region_end(),
$exons->seq()
]);
}
}
}
close($fh) or die "Cannot close input file handle: $!";
#Use the batch command from the helper to insert the data using only one
prepared statement
my $dml = <<'SQL';
INSERT INTO exon (stbl_id, seq_start, seq_stop, sequence)
VALUES (?,?,?,?)
SQL
my $affected_rows = $h->batch(-DATA => \@data, -SQL => $dml);
foreach my $row(@data){
foreach my $value(@$row){
print $value,'\n';
}
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.ensembl.org/pipermail/dev_ensembl.org/attachments/20111213/00501bcc/attachment.html>
More information about the Dev
mailing list