00001
00002 =head1 LICENSE
00003
00004 Copyright [2009-2016] EMBL-European Bioinformatics Institute
00005
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009
00010 http:
00011
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017
00018 =head1 CONTACT
00019
00020 Please email comments or questions to the public Ensembl
00021 developers list at <dev@ensembl.org>.
00022
00023 Questions may also be sent to the Ensembl help desk at
00024 <helpdesk@ensembl.org>.
00025
00026 =head1 NAME
00027
00028 Bio::EnsEMBL::LookUp::LocalLookUp
00029
00030 =head1 SYNOPSIS
00031
00032 # creation from a database server
00033 Bio::EnsEMBL::LookUp::LocalLookUp->register_all_dbs( $conf->{host},
00034 $conf->{port}, $conf->{user}, $conf->{pass}, $conf->{db});
00035 my $lookup = Bio::EnsEMBL::LookUp::LocalLookUp->new();
00036 my $dbas = $lookup->registry()->get_all();
00037 $dbas = $lookup->get_all_by_taxon_id(388919);
00038 $dbas = $lookup->get_by_name_pattern("Escherichia.*");
00039
00040 # creation from a URL
00041 my $lookup = Bio::EnsEMBL::LookUp::::LocalLookUp->new(-URL=>'http://bacteria.ensembl.org/registry.json');
00042
00043 =head1 DESCRIPTION
00044
00045 This module is an implementation of Bio::EnsEMBL::LookUp that uses a local hash to store information about available Ensembl Genomes data.
00046 It can be constructed in a number of different ways:
00047
00048 A remote cache file containing a JSON representation of the hash can be supplied:
00049
00050 my $lookup = Bio::EnsEMBL::LookUp::LocalLookUp->new(-URL=>'http://bacteria.ensembl.org/registry.json');
00051
00052 Alternatively, a local file containing the required JSON can be specified instead:
00053
00054 my $lookup = Bio::EnsEMBL::LookUp::LocalLookUp->new(-FILE=>"/path/to/reg.json");
00055
00056 Finally, a Registry already loaded with core databases can be supplied:
00057
00058 my $lookup = Bio::EnsEMBL::LookUp::LocalLookUp->new(-REGISTRY=>'Bio::EnsEMBL::Registry');
00059
00060 If the standard Registry is used, the Arg can be omitted completely:
00061
00062 my $lookup = Bio::EnsEMBL::LookUp::LocalLookUp->new();
00063
00064 To populate the registry with just the Ensembl Genomes databases for the current software
00065 release on a specified server, the following method can be used:
00066
00067 Bio::EnsEMBL::LookUp::LocalLookUp->register_all_dbs( $host,
00068 $port, $user, $pass);
00069
00070 Once a lookup has been created, it can be used as specified in Bio::EnsEMBL::LookUp
00071
00072 =head1 SEE ALSO
00073
00074 Bio::EnsEMBL::LookUp
00075
00076 =head1 AUTHOR
00077
00078 Dan Staines
00079
00080 =cut
00081
00082 package Bio::EnsEMBL::LookUp::LocalLookUp;
00083
00084 use warnings;
00085 use strict;
00086 use Bio::EnsEMBL::ApiVersion;
00087 use Bio::EnsEMBL::Utils::ConfigRegistry;
00088 use Bio::EnsEMBL::DBSQL::DBAdaptor;
00089 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00090 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
00091 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00092 use Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyNodeAdaptor;
00093 use List::MoreUtils qw(uniq);
00094 use Scalar::Util qw(looks_like_number);
00095 use DBI;
00096 use JSON;
00097 use LWP::Simple;
00098 use Carp;
00099
00100 my $default_cache_file = qw/lookup_cache.json/;
00101
00102 =head1 SUBROUTINES/METHODS
00103
00104 =head2 new
00105
00106 Description : Creates a new instance of this object.
00107 Arg [-REGISTRY] : (optional) Registry module to use (default is Bio::EnsEMBL::Registry)
00108 Arg [-NO_CACHE] : (optional) int 1
00109 This option will turn off the use of a local cache file for storing species details
00110 Arg [-CACHE_FILE] : (optional) String (default lookup_cache.json)
00111 This option allows use of a user specified local cache file
00112 Arg [-URL] : (optional)
00113 This option allows the use of a remote resource supplying species details in JSON format. This is not used if a local cache exists and -NO_CACHE is not set
00114 Arg [-FILE] : (optional)
00115 This option allows the use of a file supplying species details in JSON format. This is not used if a local cache exists and -NO_CACHE is not set
00116 Returntype : Instance of lookup
00117 Status : Stable
00118
00119 Example :
00120 my $lookup = Bio::EnsEMBL::LookUp::LocalLookUp->new(
00121 -REGISTRY => $reg);
00122 =cut
00123
00124 sub new {
00125 my ( $class, @args ) = @_;
00126 my $self = bless( {}, ref($class) || $class );
00127 my ( $reg, $url, $file, $nocache, $cache_file, $clear_cache, $skip_contigs ) =
00128 rearrange( [qw(registry url file no_cache cache_file clear_cache skip_contigs)],
00129 @args );
00130 if ( !defined $reg ) { $reg = 'Bio::EnsEMBL::Registry' }
00131 $self->{registry} = $reg;
00132 $self->{dbas_by_taxid} = {};
00133 $self->{dbas_by_name} = {};
00134 $self->{dbas_by_acc} = {};
00135 $self->{dbas_by_vacc} = {};
00136 $self->{dbas_by_gc} = {};
00137 $self->{dbas_by_vgc} = {};
00138 $self->{dbas} = {};
00139 $self->{dbc_meta} = {};
00140 $self->{skip_contigs} = $skip_contigs;
00141 $cache_file ||= $default_cache_file;
00142 $self->cache_file($cache_file);
00143
00144 if ( defined $clear_cache && $clear_cache == 1 ) {
00145 $self->clear_cache();
00146 }
00147 if ( !$nocache && !defined $file && -e $cache_file ) {
00148 $file = $cache_file;
00149 }
00150 if ( defined $file ) {
00151 $self->load_registry_from_file($file);
00152 }
00153 elsif ( defined $url ) {
00154 $self->load_registry_from_url($url);
00155 }
00156 else {
00157 $self->update_from_registry();
00158 }
00159 if ( !$nocache && !-e $cache_file ) {
00160 $self->write_registry_to_file($cache_file);
00161 }
00162 return $self;
00163 } ## end sub new
00164
00165 =head2 registry
00166
00167 Arg [1] : Registry module to use (default is Bio::EnsEMBL::Registry)
00168 Description : Sets and retrieves the Registry
00169 Returntype : Registry if set; otherwise undef
00170 Exceptions : if an attempt is made to set the value more than once
00171 Status : Stable
00172 =cut
00173
00174 sub registry {
00175 my ( $self, $registry ) = @_;
00176 if ( defined $registry ) {
00177 if ( exists $self->{registry} ) {
00178 throw('Cannot reset the Registry object; already defined ');
00179 }
00180 $self->{registry} = $registry;
00181 $self->update_from_registry();
00182 }
00183 return $self->{registry};
00184 }
00185
00186 =head2 cache_file
00187 Arg [1] : File to use for local caching
00188 Description : Sets and retrieves the local cache file
00189 Returntype : File name if set; otherwise undef
00190 Status : Stable
00191 =cut
00192
00193 sub cache_file {
00194 my ( $self, $cache_file ) = @_;
00195 if ( defined $cache_file ) {
00196 $self->{cache_file} = $cache_file;
00197 }
00198 return $self->{cache_file};
00199 }
00200
00201 =head2 update_from_registry
00202 Description : Update internal hashes of database adaptors by name/taxids from the registry. Invoke when registry has been updated independently.
00203 Arg : (optional) If set to 1, do not update from a DBAdaptor if already processed (allows updates of an existing LookUp instance)
00204 Exceptions : None
00205 =cut
00206
00207 sub update_from_registry {
00208 my ( $self, $update_only ) = @_;
00209 $self->_intern_db_connections();
00210 for my $dba (
00211 @{ $self->registry()->get_all_DBAdaptors( -group => 'core' ) } )
00212 {
00213 $self->_hash_dba( $dba, $update_only );
00214 $dba->dbc()->disconnect_if_idle();
00215 }
00216 return;
00217 }
00218
00219 =head2 taxonomy_adaptor
00220 Description : Get/set the taxonomy adaptor
00221 Arg : (optional) Bio::EnsEMBL::Taxonomy::DBSQL::DBAdaptor
00222 Caller : Internal
00223 Status : Stable
00224 Return : Bio::EnsEMBL::Taxonomy::DBSQL::DBAdaptor
00225 =cut
00226 sub taxonomy_adaptor {
00227 my ( $self, $adaptor ) = @_;
00228 if ( defined $adaptor ) {
00229 $self->{taxonomy_adaptor} = $adaptor;
00230 }
00231 elsif ( !defined $self->{taxonomy_adaptor} ) {
00232 $self->{taxonomy_adaptor} =
00233 Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyNodeAdaptor->new_public();
00234 }
00235 return $self->{taxonomy_adaptor};
00236 }
00237
00238 =head2 _register_dba
00239 Description : Add a single DBAdaptor to the registry and the internal hashes of details
00240 Arg : Bio::EnsEMBL::DBSQL::DBAdaptor
00241 Return : None
00242 =cut
00243
00244 sub _register_dba {
00245 my ( $self, $species, $dba ) = @_;
00246 $self->registry()->add_DBAdaptor( $species, "core", $dba );
00247 $self->_hash_dba($dba);
00248 return;
00249 }
00250
00251 sub _dba_id {
00252 my ( $self, $dba ) = @_;
00253 return $dba->dbc()->dbname() . '/' . $dba->species_id();
00254 }
00255
00256 sub _get_dbc_meta {
00257 my ( $self, $dbc ) = @_;
00258 my $name = $dbc->dbname();
00259 if ( !defined $self->{dbc_meta}{$name} ) {
00260 # load metadata in batch for the whole database
00261 $self->{dbc_meta}{$name} = {};
00262 # taxid
00263
00264 $dbc->sql_helper()->execute(
00265 -SQL =>
00266 q/select species_id,meta_value from meta where meta_key='species.taxonomy_id'/,
00267 -PARAMS => [],
00268 -CALLBACK => sub {
00269 my $row = shift @_;
00270 $self->{dbc_meta}{$name}{ $row->[0] }{taxid} = $row->[1];
00271 } );
00272 # aliases
00273 $dbc->sql_helper()->execute(
00274 -SQL =>
00275 'select species_id,meta_value from meta where meta_key=\'species.alias\'',
00276 -PARAMS => [],
00277 -CALLBACK => sub {
00278 my @row = @{ shift @_ };
00279 push @{ $self->{dbc_meta}{$name}{ $row[0] }{aliases} }, $row[1];
00280 } );
00281 # assembly.accession
00282 $dbc->sql_helper()->execute(
00283 -SQL =>
00284 'select species_id,meta_value from meta where meta_key=\'assembly.accession\'',
00285 -PARAMS => [],
00286 -CALLBACK => sub {
00287 my @row = @{ shift @_ };
00288 $self->{dbc_meta}{$name}{ $row[0] }{assembly_accession} =
00289 $row[1];
00290 } );
00291
00292 if(!defined $self->{skip_contigs}) {
00293 # seq_region_names and synonyms
00294 $dbc->sql_helper()->execute(
00295 -SQL =>
00296 q/select cs.species_id,sr.name,ss.synonym from coord_system cs
00297 join seq_region sr using (coord_system_id)
00298 left join seq_region_synonym ss using (seq_region_id)/,
00299 -PARAMS => [],
00300 -CALLBACK => sub {
00301 my @row = @{ shift @_ };
00302 push @{ $self->{dbc_meta}{$name}{ $row[0] }{accessions} },
00303 $row[1];
00304 if ( defined $row[2] ) {
00305 push @{ $self->{dbc_meta}{$name}{ $row[0] }{accessions} },
00306 $row[2];
00307 }
00308 } );
00309 }
00310 } ## end if ( !defined $self->{...})
00311 return $self->{dbc_meta}{$name};
00312 } ## end sub _get_dbc_meta
00313
00314 =head2 _hash_dba
00315 Description : Add details from a DBAdaptor to the internal hashes of details
00316 Arg : Bio::EnsEMBL::DBAdaptor
00317 Arg : (optional) If set to 1, do not update from a DBAdaptor if already processed (allows updates of an existing LookUp instance)
00318 =cut
00319
00320 sub _hash_dba {
00321 my ( $self, $dba, $update_only ) = @_;
00322 my $nom = $self->_dba_id($dba);
00323 if ( !defined $update_only || !defined $self->{dbas}{$nom} ) {
00324 my $dbc_meta = $self->_get_dbc_meta( $dba->dbc() );
00325 my $dba_meta = $dbc_meta->{ $dba->species_id() };
00326 $self->_hash_dba_from_values( $dba,
00327 [ $dba_meta->{taxid} ],
00328 $dba_meta->{aliases},
00329 $dba_meta->{accessions},
00330 $dba_meta->{assembly_accession} );
00331 }
00332 return;
00333 }
00334
00335 =head2 _hash_dba_from_values
00336 Description : Add supplied details to the internal hashes of details
00337 Arg : Bio::EnsEMBL::DBAdaptor to hash
00338 Arg : Arrayref of taxonomy IDs to use as keys
00339 Arg : Arrayref of aliases to use as keys
00340 Arg : Arrayref of ENA accessions to use as keys
00341 =cut
00342
00343 sub _hash_dba_from_values {
00344 my ( $self, $dba, $taxids, $aliases, $accessions, $vgc ) = @_;
00345
00346 for my $taxid ( @{$taxids} ) {
00347 if ( defined $taxid ) {
00348 push @{ $self->{dbas_by_taxid}{$taxid} }, $dba;
00349 }
00350 }
00351 for my $name ( uniq( @{$aliases} ) ) {
00352 push @{ $self->{dbas_by_name}{$name} }, $dba;
00353 }
00354 for my $acc ( uniq( @{$accessions} ) ) {
00355 push @{ $self->{dbas_by_vacc}{$acc} }, $dba;
00356 $acc =~ s/\.[0-9]+$
00357 push @{ $self->{dbas_by_acc}{$acc} }, $dba;
00358 }
00359 if ( defined $vgc ) {
00360 if ( defined $self->{dbas_by_vgc}{$vgc} ) {
00361 croak "DBA with GC accession $vgc already registered";
00362 }
00363 $self->{dbas_by_vgc}{$vgc} = $dba;
00364 $vgc =~ s/\.[0-9]+$
00365 if ( defined $self->{dbas_by_gc}{$vgc} ) {
00366 croak "DBA with GC set chain $vgc already registered";
00367 }
00368 $self->{dbas_by_gc}{$vgc} = $dba;
00369 }
00370 $self->{dbas}{ $self->_dba_id($dba) } = $dba;
00371 return;
00372 } ## end sub _hash_dba_from_values
00373
00374 sub _invert_dba_hash {
00375 my ($hash) = @_;
00376 my $inv_hash;
00377 while ( my ( $item, $dbas ) = each( %{$hash} ) ) {
00378 if ( ref $dbas ne 'ARRAY' ) {
00379 $dbas = [$dbas];
00380 }
00381 for my $dba ( @{$dbas} ) {
00382 my $dba_loc = _dba_to_locator($dba);
00383 push @{ $inv_hash->{$dba_loc} }, $item;
00384 }
00385 }
00386 return $inv_hash;
00387 }
00388
00389 =head2 _registry_to_hash
00390 Description : Generate a hash array structure for the current registry and species details for turning to JSON
00391 Returns : Arrayref of hashes
00392 =cut
00393
00394 sub _registry_to_hash {
00395 my ($self) = @_;
00396 # hash dbcs and dbas by locators
00397 my $dbc_hash;
00398 my $dba_hash;
00399 for my $dba ( values %{ $self->{dbas} } ) {
00400 my $dbc_loc = _dbc_to_locator( $dba->dbc() );
00401 $dbc_hash->{$dbc_loc} = $dba->dbc();
00402 push @{ $dba_hash->{$dbc_loc} }, $dba;
00403 }
00404 # create auxillary hashes
00405 my $acc_hash = _invert_dba_hash( $self->{dbas_by_vacc} );
00406 my $name_hash = _invert_dba_hash( $self->{dbas_by_name} );
00407 my $taxid_hash = _invert_dba_hash( $self->{dbas_by_taxid} );
00408 my $gc_hash = _invert_dba_hash( $self->{dbas_by_vgc} );
00409 # create array of hashes
00410 my $out_arr;
00411 while ( my ( $dbc_loc, $dbconn ) = each( %{$dbc_hash} ) ) {
00412 my $dbc_h = { driver => $dbconn->driver(),
00413 host => $dbconn->host(),
00414 port => $dbconn->port(),
00415 username => $dbconn->username(),
00416 password => $dbconn->password(),
00417 dbname => $dbconn->dbname(),
00418 species => [] };
00419 for my $dba ( @{ $dba_hash->{$dbc_loc} } ) {
00420 my $dba_loc = _dba_to_locator($dba);
00421 my $gc = $gc_hash->{$dba_loc};
00422 if ( ref $gc eq 'ARRAY' ) {
00423 $gc = $gc->[0];
00424 }
00425 push @{ $dbc_h->{species} },
00426 { species_id => $dba->species_id(),
00427 species => $dba->species(),
00428 taxids => $taxid_hash->{$dba_loc},
00429 aliases => $name_hash->{$dba_loc},
00430 accessions => $acc_hash->{$dba_loc},
00431 gc => $gc };
00432 }
00433 push @{$out_arr}, $dbc_h;
00434 }
00435 return $out_arr;
00436 } ## end sub _registry_to_hash
00437
00438 sub _registry_to_json {
00439 my ($self) = @_;
00440 my $out_arr = $self->_registry_to_hash();
00441 my $json;
00442 if ($out_arr) {
00443 $json = encode_json($out_arr);
00444 }
00445 else {
00446 carp('No DBAs found');
00447 }
00448 return $json;
00449 }
00450
00451 =head2 write_registry_to_file
00452 Description : Write the contents of the registry and species lists to a JSON file
00453 Arg : File name
00454 Return : None
00455 =cut
00456
00457 sub write_registry_to_file {
00458 my ( $self, $file ) = @_;
00459 my $json = $self->_registry_to_json();
00460 if ($json) {
00461 open( my $fh, '>', $file ) ||
00462 croak "Cannot open '${file}' for writing: $!";
00463 print $fh $json;
00464 close($fh);
00465 }
00466 return;
00467 }
00468
00469 sub clear_cache {
00470 my ($self) = @_;
00471 if ( defined $self->cache_file() && -e $self->cache_file() ) {
00472 unlink $self->cache_file();
00473 }
00474 return;
00475 }
00476
00477 =head2 _load_registry_from_json
00478 Description : load the registry from the supplied JSON string
00479 Arg : JSON string
00480 =cut
00481
00482 sub _load_registry_from_json {
00483 my ( $self, $json ) = @_;
00484 my $reg_arr = decode_json($json);
00485 for my $dbc_h ( @{$reg_arr} ) {
00486 my $dbc =
00487 Bio::EnsEMBL::DBSQL::DBConnection->new(
00488 -driver => $dbc_h->{driver},
00489 -host => $dbc_h->{host},
00490 -port => $dbc_h->{port},
00491 -user => $dbc_h->{username},
00492 -pass => $dbc_h->{password},
00493 -dbname => $dbc_h->{dbname}
00494 );
00495 for my $species ( @{ $dbc_h->{species} } ) {
00496 my $dba =
00497 Bio::EnsEMBL::DBSQL::DBAdaptor->new(
00498 -DBCONN => $dbc,
00499 -species => $species->{species},
00500 -species_id => $species->{species_id},
00501 -multispecies_db => 1,
00502 -group => 'core', );
00503 # no need to register - found automagically
00504 my $gc = $species->{gc};
00505 if ( ref $gc eq 'ARRAY' ) { $gc = $gc->[0]; }
00506 $self->_hash_dba_from_values( $dba,
00507 $species->{taxids},
00508 $species->{aliases},
00509 $species->{accessions},
00510 $species->{gc} );
00511 }
00512 } ## end for my $dbc_h ( @{$reg_arr...})
00513 $self->_intern_db_connections();
00514 return;
00515 } ## end sub _load_registry_from_json
00516
00517 sub load_registry_from_file {
00518 my ( $self, $file ) = @_;
00519 open( my $fh, '<', $file ) or croak "Cannot open file $file: $!";
00520 my $data = do { local $/ = undef; <$fh> };
00521 close($fh);
00522 if ( defined $data && $data ne '' ) {
00523 $self->_load_registry_from_json($data);
00524 }
00525 return;
00526 }
00527
00528 sub load_registry_from_url {
00529 my ( $self, $url ) = @_;
00530 my $json = get($url);
00531 croak "Could not retrieve JSON from $url: $@" unless defined $json;
00532 $self->_load_registry_from_json($json);
00533 return;
00534 }
00535
00536 =head2 dba_to_args
00537 Description : Dump the Args needed for contructing a DBA
00538 Arg : Bio::EnsEMBL::DBAdaptor
00539 Return : Arrayref of args
00540 =cut
00541
00542 sub dba_to_args {
00543 my ( $self, $dba ) = @_;
00544 my $dbc = $dba->dbc();
00545 my $args = [ -species => $dba->species(),
00546 -multispecies_db => $dba->is_multispecies(),
00547 -species_id => $dba->species_id(),
00548 -group => $dba->group(),
00549 -host => $dbc->host(),
00550 -port => $dbc->port(),
00551 -user => $dbc->username(),
00552 -pass => $dbc->password(),
00553 -driver => $dbc->driver(),
00554 -dbname => $dbc->dbname() ];
00555 return $args;
00556 }
00557
00558 =head2 _dba_to_locator
00559 Description : return a hash key for a DBAdaptor
00560 Arg : Bio::EnsEMBL::DBAdaptor
00561 =cut
00562
00563 sub _dba_to_locator {
00564 my ($dba) = @_;
00565 confess(
00566 "Arg must be Bio::EnsEMBL::DBSQL::DBAdaptor not " . ref($dba) )
00567 unless ref($dba) eq "Bio::EnsEMBL::DBSQL::DBAdaptor";
00568 my $locator =
00569 join( q{!-!}, $dba->species_id(), _dbc_to_locator( $dba->dbc() ) );
00570 return $locator;
00571 }
00572
00573 =head2 _dbc_to_locator
00574 Description : return a hash key for a DBConnection
00575 Arg : Bio::EnsEMBL::DBConnection
00576 =cut
00577
00578 sub _dbc_to_locator {
00579 my ($dbc) = @_;
00580 my $locator = join( q{!-!},
00581 $dbc->host(), $dbc->dbname(),
00582 $dbc->driver(), $dbc->port(),
00583 $dbc->username() );
00584 return $locator;
00585 }
00586
00587 =head2 _intern_db_connections()
00588 Description : Go through all available DBAdaptors of registry and ensure they use the same
00589 DBConnection instance.
00590 Exceptions : None
00591 Status : At Risk
00592 =cut
00593
00594 sub _intern_db_connections {
00595 my ($self) = @_;
00596 my $adaptors = $self->registry()->get_all_DBAdaptors();
00597 my %dbc_intern;
00598 foreach my $dba ( @{$adaptors} ) {
00599 my $dbc = $dba->dbc();
00600 my $locator = _dbc_to_locator($dbc);
00601 if ( exists $dbc_intern{$locator} ) {
00602 $dba->dbc( $dbc_intern{$locator} );
00603 }
00604 else {
00605 $dbc_intern{$locator} = $dba->dbc();
00606 }
00607 }
00608 return;
00609 }
00610
00611 =head2 get_all_DBConnections
00612 Description : Return all database connections used by the DBAs retrieved from the registry
00613 Arg : None
00614 Exceptions : None
00615 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DBConnection
00616 =cut
00617
00618 sub get_all_DBConnections {
00619 my ($self) = @_;
00620 my $adaptors = $self->registry()->get_all_DBAdaptors();
00621 my %dbcs;
00622 foreach my $dba ( values %{ $self->{dbas} } ) {
00623 my $dbc = $dba->dbc();
00624 $dbcs{ _dbc_to_locator($dbc) } = $dbc;
00625 }
00626 return [ values %dbcs ];
00627 }
00628
00629 =head2 get_all_dbnames
00630 Description : Return all database names used by the DBAs retrieved from the registry
00631 Arg : None
00632 Exceptions : None
00633 Return type : Arrayref of strings
00634 =cut
00635
00636 sub get_all_dbnames {
00637 my ($self) = @_;
00638 my %dbnames;
00639 foreach my $dba ( values %{ $self->{dbas} } ) {
00640 my $dbc = $dba->dbc();
00641 $dbnames{ $dbc->dbname }++;
00642 }
00643 return [ keys %dbnames ];
00644 }
00645
00646 =head2 get_all
00647 Description : Return all database adaptors that have been retrieved from registry
00648 Arg : None
00649 Exceptions : None
00650 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00651 =cut
00652
00653 sub get_all {
00654 my ($self) = @_;
00655 return [ values %{ $self->{dbas} } ];
00656 }
00657
00658 sub get_all_by_taxon_branch {
00659 my ( $self, $root ) = @_;
00660 if ( ref($root) ne 'Bio::EnsEMBL::Taxonomy::TaxonomyNode' ) {
00661 if ( looks_like_number($root) ) {
00662 $root = $self->taxonomy_adaptor()->fetch_by_taxon_id($root);
00663 }
00664 else {
00665 ($root) =
00666 @{ $self->taxonomy_adaptor()->fetch_all_by_name($root) };
00667 }
00668 }
00669 if ( !defined $root ) {
00670 return [];
00671 }
00672 my @genomes = @{ $self->get_all_by_taxon_id( $root->taxon_id() ) };
00673 for my $node ( @{ $root->adaptor()->fetch_descendants($root) } ) {
00674 @genomes = ( @genomes,
00675 @{ $self->get_all_by_taxon_id( $node->taxon_id() ) } );
00676 }
00677 return \@genomes;
00678 }
00679
00680 =head2 get_all_by_taxon_id
00681 Description : Returns all database adaptors that have the supplied taxonomy ID
00682 Arg : Int
00683 Exceptions : None
00684 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00685 =cut
00686
00687 sub get_all_by_taxon_id {
00688 my ( $self, $id ) = @_;
00689 return $self->{dbas_by_taxid}{$id} || [];
00690 }
00691
00692 =head2 get_by_name_exact
00693 Description : Return all database adaptors that have the supplied string as an alias/name
00694 Arg : String
00695 Exceptions : None
00696 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00697 =cut
00698
00699 sub get_by_name_exact {
00700 my ( $self, $name ) = @_;
00701 return $self->{dbas_by_name}{$name};
00702 }
00703
00704 =head2 get_all_by_accession
00705 Description : Returns the database adaptor(s) that contains a seq_region with the supplied INSDC accession (or other seq_region name)
00706 Arg : Int
00707 Exceptions : None
00708 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00709 =cut
00710
00711 sub get_all_by_accession {
00712 my ( $self, $acc ) = @_;
00713 my $dba = $self->{dbas_by_vacc}{$acc};
00714 if ( !defined $dba ) {
00715 $acc =~ s/\.[0-9]+$
00716 $dba = $self->{dbas_by_acc}{$acc};
00717 }
00718 return $dba;
00719 }
00720
00721 =head2 get_by_assembly_accession
00722 Description : Returns the database adaptor that contains the assembly with the supplied INSDC assembly accession
00723 Arg : Int
00724 Exceptions : None
00725 Return type : Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00726 =cut
00727
00728 sub get_by_assembly_accession {
00729 my ( $self, $acc ) = @_;
00730 my $dba = $self->{dbas_by_vgc}{$acc};
00731 if ( !defined $dba ) {
00732 $acc =~ s/\.[0-9]+$
00733 $dba = $self->{dbas_by_gc}{$acc};
00734 }
00735 return $dba;
00736 }
00737
00738 =head2 get_all_by_name_pattern
00739 Description : Return all database adaptors that have an alias/name that match the supplied regexp
00740 Arg : String
00741 Exceptions : None
00742 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00743 =cut
00744
00745 sub get_all_by_name_pattern {
00746 my ( $self, $pattern ) = @_;
00747 my %dbas;
00748 for my $name ( keys %{ $self->{dbas_by_name} } ) {
00749 if ( $name =~ m/$pattern/x ) {
00750 for my $dba ( @{ $self->{dbas_by_name}{$name} } ) {
00751 my $key = _dba_to_locator($dba);
00752 $dbas{$key} = $dba;
00753 }
00754 }
00755 }
00756 return [ values(%dbas) ];
00757 }
00758
00759 =head2 get_all_by_dbname
00760 Description : Returns all database adaptors that have the supplied dbname
00761 Arg : String
00762 Exceptions : None
00763 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00764 =cut
00765
00766 sub get_all_by_dbname {
00767 my ( $self, $dbname ) = @_;
00768
00769 my @filtered_dbas;
00770
00771 my $all = $self->get_all();
00772 foreach my $dba ( values %{ $self->{dbas} } ) {
00773 if ( $dba->dbc->dbname eq $dbname ) {
00774 push @filtered_dbas, $dba;
00775 }
00776 }
00777
00778 return \@filtered_dbas;
00779 }
00780
00781 =head2 get_all_taxon_ids
00782 Description : Return list of all taxon IDs registered with the helper
00783 Exceptions : None
00784 Return type : Arrayref of integers
00785 =cut
00786
00787 sub get_all_taxon_ids {
00788 my ($self) = @_;
00789 return [ keys( %{ $self->{dbas_by_taxid} } ) ];
00790 }
00791
00792 =head2 get_all_names
00793 Description : Return list of all species names registered with the helper
00794 Exceptions : None
00795 Return type : Arrayref of strings
00796 =cut
00797
00798 sub get_all_names {
00799 my ($self) = @_;
00800 return [ keys( %{ $self->{dbas_by_name} } ) ];
00801 }
00802
00803 =head2 get_all_accessions
00804 Description : Return list of all INSDC sequence accessions (or other seq_region names) registered with the helper
00805 Exceptions : None
00806 Return type : Arrayref of strings
00807 =cut
00808
00809 sub get_all_accessions {
00810 my ($self) = @_;
00811 return [ keys( %{ $self->{dbas_by_acc} } ) ];
00812 }
00813
00814 =head2 get_all_versioned_accessions
00815 Description : Return list of all versioned INSDC sequence accessions (or other seq_region names) registered with the helper
00816 Exceptions : None
00817 Return type : Arrayref of strings
00818 =cut
00819
00820 sub get_all_versioned_accessions {
00821 my ($self) = @_;
00822 return [ keys( %{ $self->{dbas_by_vacc} } ) ];
00823 }
00824
00825 =head2 get_all_assemblies
00826 Description : Return list of all INSDC assembly accessions registered with the helper
00827 Exceptions : None
00828 Return type : Arrayref of strings
00829 =cut
00830
00831 sub get_all_assemblies {
00832 my ($self) = @_;
00833 return [ keys( %{ $self->{dbas_by_gc} } ) ];
00834 }
00835
00836 =head2 get_all_versioned_assemblies
00837 Description : Return list of all versioned INSDC assembly accessions registered with the helper
00838 Exceptions : None
00839 Return type : Arrayref of strings
00840 =cut
00841
00842 sub get_all_versioned_assemblies {
00843 my ($self) = @_;
00844 return [ keys( %{ $self->{dbas_by_vgc} } ) ];
00845 }
00846
00847 =head2 register_all_dbs
00848 Description : Helper method to load the registry with all multispecies core databases on the supplied server
00849 Arg : Host
00850 Arg : Port
00851 Arg : User
00852 Arg : Password
00853 Arg : (optional) String with database regexp (default is _collection_core_[0-9]_eVersion_[0-9]+)
00854 Exceptions : None
00855 Return type : None
00856 =cut
00857
00858 sub register_all_dbs {
00859 my ( $class, $host, $port, $user, $pass, $regexp ) = @_;
00860
00861 if ( !$regexp ) {
00862 $regexp = '_core_[0-9]+_' . software_version() . '_[0-9]+';
00863 }
00864
00865 if ( !defined $host ) {
00866 croak "Host must be supplied for registration of databases";
00867 }
00868
00869 if ( !defined $port ) {
00870 croak
00871 "Port must be supplied for registration of databases on host $host";
00872 }
00873
00874 my $str = "DBI:mysql:host=$host;port=$port";
00875 my $dbh = DBI->connect( $str, $user, $pass ) ||
00876 croak "Could not connect to database $str (user=$user, pass=$pass)";
00877
00878 my @dbnames =
00879 grep { m/$regexp/xi }
00880 map { $_->[0] } @{ $dbh->selectall_arrayref('SHOW DATABASES') };
00881
00882 for my $db (@dbnames) {
00883 if ( $db =~ m/.*_collection_core_.*/ ) {
00884 _register_multispecies_core( $host, $port, $user, $pass, $db );
00885 }
00886 else {
00887 _register_singlespecies_core( $host, $port, $user, $pass, $db );
00888 }
00889 }
00890 return;
00891 } ## end sub register_all_dbs
00892
00893 =head2 _register_singlespecies_core
00894 Description : Register core dbas for all species in the supplied database
00895 =cut
00896
00897 sub _register_singlespecies_core {
00898 my ( $host, $port, $user, $pass, $db ) = @_;
00899 my ( $species, $num ) = (
00900 $db =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?) # species name
00901 _
00902 core # type
00903 _
00904 (?:\d+_)? # optional endbit for ensembl genomes
00905 (\d+) # databases release
00906 _
00907 /x );
00908 return
00909 Bio::EnsEMBL::DBSQL::DBAdaptor->new( -HOST => $host,
00910 -PORT => $port,
00911 -USER => $user,
00912 -PASS => $pass,
00913 -DBNAME => $db,
00914 -SPECIES => $species );
00915 }
00916
00917 =head2 _register_multispecies_core
00918 Description : Register core dbas for all species in the supplied database
00919 =cut
00920
00921 sub _register_multispecies_core {
00922 return _register_multispecies_x(
00923 'Bio::EnsEMBL::DBSQL::DBAdaptor',
00924 sub {
00925 my %args = @_;
00926 return Bio::EnsEMBL::DBSQL::DBAdaptor->new(%args);
00927 },
00928 @_ );
00929 }
00930
00931 =head2 _register_multispecies_x
00932 Description : Register specified dba type for all species in the supplied database
00933 =cut
00934
00935 sub _register_multispecies_x {
00936
00937 my ( $adaptor, $closure, $host, $port, $user, $pass, $instance,
00938 $species, $alias_arr_ref, $disconnect_when_idle )
00939 = @_;
00940 _runtime_include($adaptor);
00941 _check_name($instance);
00942 my %args =
00943 ( _login_hash( $host, $port, $user, $pass ), '-DBNAME', $instance );
00944 my @species_array = @{ _query_multispecies_db( $species, %args ) };
00945 my @dbas;
00946 foreach my $species_args (@species_array) {
00947 my %new_args = ( %args, %{$species_args}, '-MULTISPECIES_DB' => 1 );
00948 $new_args{-DISCONNECT_WHEN_INACTIVE} = 1 if $disconnect_when_idle;
00949 my $dba = $closure->(%new_args);
00950 if ( defined $species && defined $alias_arr_ref ) {
00951 _add_aliases( $species, $alias_arr_ref );
00952 }
00953 push( @dbas, $dba );
00954 }
00955 return \@dbas;
00956 }
00957
00958 =head2 _query_multispecies_db
00959 Description : Find all species in the multispecies database
00960 =cut
00961
00962 sub _query_multispecies_db {
00963 my ( $species, %db_args ) = @_;
00964 my @species_array;
00965 eval {
00966 my $dbh = Bio::EnsEMBL::DBSQL::DBConnection->new(%db_args);
00967 my $sql;
00968 my @params = ('species.db_name');
00969
00970 if ( defined $species ) {
00971 $sql =
00972 'select meta_value, species_id from meta where meta_key =? and meta_value =?';
00973 push( @params, $species );
00974 }
00975 else {
00976 $sql =
00977 'select distinct meta_value, species_id from meta where meta_key =? and species_id is not null';
00978 }
00979 my $sth = $dbh->prepare($sql);
00980 $sth->execute(@params);
00981 while ( my $row = $sth->fetchrow_arrayref() ) {
00982 my ( $species_name, $species_id ) = @{$row};
00983 push( @species_array,
00984 { '-SPECIES' => $species_name,
00985 '-SPECIES_ID' => $species_id } );
00986 }
00987 $sth->finish();
00988 };
00989 throw("Error detected: $@") if $@;
00990 return \@species_array;
00991 } ## end sub _query_multispecies_db
00992
00993 =head2 _add_aliases
00994 Description : Registry all aliases
00995 Arg : Species name
00996 Arg : Arrayref of alias strings
00997 =cut
00998
00999 sub _add_aliases {
01000 my ( $species_name, $alias_ref ) = @_;
01001 if ( defined $alias_ref ) {
01002 if ( ref($alias_ref) ne 'ARRAY' ) {
01003 $alias_ref = [$alias_ref];
01004 }
01005
01006 if ( scalar( @{$alias_ref} ) > 0 ) {
01007 Bio::EnsEMBL::Utils::ConfigRegistry->add_alias(
01008 -species => $species_name,
01009 -alias => $alias_ref );
01010 }
01011 }
01012 return;
01013 }
01014
01015 =head2 _runtime_include
01016 Description : Load the specified module (usually a DatabaseAdaptor)
01017 Arg : Module name
01018 =cut
01019
01020 sub _runtime_include {
01021 my ($full_module) = @_;
01022 ## no critic (ProhibitStringyEval)
01023 eval "use ${full_module}" ||
01024 throw("Cannot import module '${full_module}': $@")
01025 if $@;
01026 ## use critic
01027 return;
01028 }
01029
01030 =head2 _check_name
01031 Description : Check that the name is not longer than 64 characters
01032 Arg : Name to check
01033 =cut
01034
01035 sub _check_name {
01036 #Throws a wobbly if the db name is longer than 64 characters
01037 my ($name) = @_;
01038 my $length = length($name);
01039 throw(
01040 "Invalid length for database name used. Max is 64. The name ${name}- was ${length}"
01041 ) if $length > 64;
01042 return;
01043 }
01044
01045 =head2 _login_hash
01046 Description : Generate dbadaptor login hash from supplied Args
01047 Arg : Host
01048 Arg : Port
01049 Arg : User
01050 Arg : Password
01051 =cut
01052
01053 sub _login_hash {
01054 my ( $host, $port, $user, $pass ) = @_;
01055 my %details = ( -HOST => $host, -PORT => $port, -USER => $user );
01056 $details{-PASS} = $pass if defined $pass;
01057 return %details;
01058 }
01059
01060 1;
01061