[ensembl-dev] retrieve conservation score of all human exons
Julien Roux
julien.roux at unil.ch
Thu Jan 16 08:03:00 GMT 2014
Dear list,
I have been trying to extract the mean GERP conservation score for all
human exons using the Ensembl API.
My script is working properly, however it is quite slow (only ~1000
exons per hour) and I am wondering if there would be a way to speed it up.
I have tried to set "display_size" to 1, but t doesn't seem to give
yield the exact mean score (because it is an average of precalculated
averages I guess).
I am putting the script below.
Thanks for your help
Julien
#!/usr/bin/env perl
use strict;
use warnings;
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::Utils::Exception qw(throw);
# This script is inspired by the example script at
http://cvs.sanger.ac.uk/cgi-bin/viewvc.cgi/ensembl-compara/scripts/examples/dna_getConservationScores.pl?root=ensembl&view=markup
my $reg = "Bio::EnsEMBL::Registry";
my $species = "Homo sapiens";
$reg->load_registry_from_db(
-host => "ensembldb.ensembl.org",
-user => "anonymous",
);
#get slice adaptor for $species
my $slice_adaptor = $reg->get_adaptor($species, 'core', 'Slice');
#get exon adaptor for $species
my $exon_adaptor = $reg->get_adaptor($species, 'Core', 'Exon');
#get method_link_species_set adaptor
my $mlss_adaptor = $reg->get_adaptor("Multi", "compara",
"MethodLinkSpeciesSet");
#get method_link_species_set object for gerp conservation scores for mammals
my $mlss =
$mlss_adaptor->fetch_by_method_link_type_species_set_name("GERP_CONSERVATION_SCORE",
"mammals");
#get conservation score adaptor
my $cs_adaptor = $reg->get_adaptor("Multi", 'compara', 'ConservationScore');
# fetch all exon IDs
my @exon_ids = @{$exon_adaptor->list_stable_ids()};
foreach my $exon (@exon_ids){
print $exon, "\n";
my $slice = $slice_adaptor->fetch_by_exon_stable_id($exon);
#get one score per base in the slice
my $display_size = $slice->end - $slice->start + 1;
my $scores =
$cs_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice($mlss, $slice,
$display_size);
# average the scores in the slice
my $sum = 0;
my $count = 0;
my $diff_score;
foreach my $score (@$scores) {
if (defined $score->diff_score) {
$sum += $score->diff_score;
$count++;
}
}
if ($count > 0){
$diff_score = $sum / $count;
}
if (defined $diff_score){
print "$exon\t$diff_score\n";
}
else {
print "$exon\tNA\n";
}
}
close OUT;
exit;
--
Julien Roux, PhD
Gilad lab, Department of Human Genetics, University of Chicago
http://giladlab.uchicago.edu/
920 East 58th Street, CLSC 317, Chicago, IL 60637, USA
tel: +1-773-834-1984 fax: +1-773-834-8470
--
Julien Roux, PhD
Gilad lab, Department of Human Genetics, University of Chicago
http://giladlab.uchicago.edu/
920 East 58th Street, CLSC 317, Chicago, IL 60637, USA
tel: +1-773-834-1984 fax: +1-773-834-8470
More information about the Dev
mailing list