[ensembl-dev] ensembl_id in DBEntry
Nicole Washington
nlwashington at lbl.gov
Fri Mar 1 21:34:03 GMT 2013
Hi,
I want to fetch all DBEntries by a given source, say EntrezGene, and then locally make a gene-based hash of the entries. My reason is that repeated queries of fetch_all_by_Gene is a very expensive operation time-wise, particularly when needing to fetch for all genes in a genome.
In order to to this, I need the ensembl ids linked to the DBEntry. However, the objects I get from a fetch_all_by_Source don't seem to be delivering this info.
I'm using r70 of the ensembl API.
Here's a bit of my code:
my $dbentries_by_source = {};
my %dbentries_by_ensembl_id = {};
print STDOUT "Fetching xrefs...\n";
my $xref_count=0;
foreach my $xref (@xrefs_to_fetch) {
my $dbes = $dbentry_adaptor->fetch_all_by_source($xref);
print STDOUT @$dbes . " found. Sorting...";
$dbentries_by_source->{$xref} = $dbes;
foreach my $dbe (@$dbes) {
print STDOUT $dbe->ensembl_id() . ", ";
if (!defined ($dbentries_by_ensembl_id{$dbe->ensembl_id()})) {
@{$dbentries_by_ensembl_id{$dbe->ensembl_id()}} = ();
print "c"; #feedback for making reference hash
}
push(@{$dbentries_by_ensembl_id{$dbe->ensembl_id()}},$dbe);
print "r"; #feedback for adding an reference element
}
Below you'll find what the Dumper of the first object returned is...you'll notice that there's no "ensembl_id" key-value pair in the DBEntry object.
Am I going about this the wrong way? Any hints? Thanks in advance...
Nicole
$VAR1 = bless( {
'priority' => '250',
'adaptor' => bless( {
'_is_multispecies' => '',
'db' => bless( {
'seq_region_cache' => bless( {
'id_cache' => {
'27527' => [
'27527',
'5',
'2',
'180915260'
],
'27526' => [
'27526',
'19',
'2',
'59128983'
],
'27525' => [
'27525',
'10',
'2',
'135534747'
],
'27524' => [
'27524',
'4',
'2',
'191154276'
],
'27523' => [
'27523',
'8',
'2',
'146364022'
],
'27522' => [
'27522',
'20',
'2',
'63025520'
],
'27521' => [
'27521',
'15',
'2',
'102531392'
],
'27520' => [
'27520',
'14',
'2',
'107349540'
],
'27519' => [
'27519',
'12',
'2',
'133851895'
],
'27518' => [
'27518',
'9',
'2',
'141213431'
],
'27517' => [
'27517',
'3',
'2',
'198022430'
],
'27515' => [
'27515',
'6',
'2',
'171115067'
],
'27514' => [
'27514',
'16',
'2',
'90354753'
],
'27513' => [
'27513',
'13',
'2',
'115169878'
],
'27512' => [
'27512',
'18',
'2',
'78077248'
],
'27511' => [
'27511',
'1',
'2',
'249250621'
],
'27510' => [
'27510',
'22',
'2',
'51304566'
],
'27509' => [
'27509',
'17',
'2',
'81195210'
],
'27508' => [
'27508',
'2',
'2',
'243199373'
],
'27507' => [
'27507',
'Y',
'2',
'59373566'
],
'27516' => [
'27516',
'X',
'2',
'155270560'
],
'27506' => [
'27506',
'7',
'2',
'159138663'
],
'27505' => [
'27505',
'21',
'2',
'48129895'
],
'27504' => [
'27504',
'11',
'2',
'135006516'
],
'100965601' => [
'100965601',
'MT',
'2',
'16569'
]
},
'name_cache' => {
'5:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27527'},
'19:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27526'},
'10:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27525'},
'4:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27524'},
'8:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27523'},
'20:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27522'},
'15:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27521'},
'14:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27520'},
'12:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27519'},
'9:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27518'},
'3:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27517'},
'X:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27516'},
'6:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27515'},
'16:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27514'},
'13:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27513'},
'18:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27512'},
'1:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27511'},
'22:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27510'},
'17:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27509'},
'2:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27508'},
'7:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27506'},
'21:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27505'},
'11:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27504'},
'MT:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'100965601'},
'Y:2' => $VAR1->{'adaptor'}{'db'}{'seq_region_cache'}{'id_cache'}{'27507'}
}
}, 'Bio::EnsEMBL::Utils::SeqRegionCache' ),
'_is_multispecies' => '',
'_dbc' => bless( {
'_username' => 'anonymous',
'connected86253' => 1,
'_timeout' => 0,
'_host' => 'ensembldb.ensembl.org',
'_port' => '5306',
'_query_count' => 10,
'_driver' => 'mysql',
'_dbname' => 'homo_sapiens_core_70_37',
'db_handle86253' => bless( {}, 'DBI::db' )
}, 'Bio::EnsEMBL::DBSQL::DBConnection' ),
'_species' => 'homo_sapiens',
'_group' => 'core',
'_species_id' => 1
}, 'Bio::EnsEMBL::DBSQL::DBAdaptor' ),
'dbc' => $VAR1->{'adaptor'}{'db'}{'_dbc'},
'species_id' => 1
}, 'Bio::EnsEMBL::DBSQL::DBEntryAdaptor' ),
'display_id' => 'A1BG',
'primary_id' => '1',
'version' => '0',
'description' => 'alpha-1-B glycoprotein',
'dbname' => 'EntrezGene',
'dbID' => '936659',
'synonyms' => [
'A1B'
],
'info_text' => '',
'info_type' => 'DEPENDENT',
'type' => 'MISC',
'db_display_name' => 'EntrezGene'
}, 'Bio::EnsEMBL::DBEntry' );
More information about the Dev
mailing list