Go to the documentation of this file.00001
00002 =head1 LICENSE
00003
00004 Copyright [2009-2014] EMBL-European Bioinformatics Institute
00005
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009
00010 http:
00011
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017
00018 =cut
00019
00020 =pod
00021
00022 =head1 CONTACT
00023
00024 Please email comments or questions to the public Ensembl
00025 developers list at <dev@ensembl.org>.
00026
00027 Questions may also be sent to the Ensembl help desk at
00028 <helpdesk@ensembl.org>.
00029
00030 =head1 NAME
00031
00032 Bio::EnsEMBL::LookUp
00033
00034 =head1 SYNOPSIS
00035
00036 my $adaptor = Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor->build_ensembl_genomesadaptor();
00037 my $lookup = Bio::EnsEMBL::RemoteLookUp->new(-ADAPTOR=>$adaptor);
00038 my $dbas = $lookup->registry()->get_all();
00039 $dbas = $lookup->get_all_by_taxon_id(388919);
00040 $dbas = $lookup->get_by_name_pattern("Escherichia.*");
00041
00042 =head1 DESCRIPTION
00043
00044 This module is an implementation of Bio::EnsEMBL::LookUp that uses Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor to
00045 access a MySQL database containing information about Ensembl and Ensembl Genomes contents and then instantiate DBAdaptors.
00046
00047 To instantiate using the public Ensembl/EG servers for creating DBAdaptors:
00048
00049 my $lookup = Bio::EnsEMBL::LookUp::RemoteLookUp->new(-ADAPTOR=>$adaptor);
00050
00051 To instantiate to use a specific server on which core databases are located:
00052
00053 my $lookup = Bio::EnsEMBL::LookUp::RemoteLookUp->new(-USER=>$user, -HOST=>$host, -PORT=>$port, -ADAPTOR=>$adaptor);
00054
00055 Once constructed, the LookUp instance can be used as documented in Bio::EnsEMBL::LookUp.
00056
00057 =head1 SEE ALSO
00058
00059 Bio::EnsEMBL::LookUp
00060
00061 =head1 AUTHOR
00062
00063 Dan Staines
00064
00065 =cut
00066
00067 package Bio::EnsEMBL::LookUp::RemoteLookUp;
00068
00069 use warnings;
00070 use strict;
00071 use Bio::EnsEMBL::DBSQL::DBAdaptor;
00072 use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
00073 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00074 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00075 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
00076 use Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor;
00077 use Bio::EnsEMBL::MetaData::DBSQL::MySQLServerProvider;
00078 use Bio::EnsEMBL::MetaData::DBSQL::ParameterMySQLServerProvider;
00079 use Carp;
00080 use List::MoreUtils qw(uniq);
00081
00082 =head1 SUBROUTINES/METHODS
00083
00084 =head2 new
00085 Arg [-ADAPTOR] : Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00086 Arg [-REGISTRY] : Registry to obtain DBAdaptors from
00087 Arg [-PROVIDER] : Bio::EnsEMBL::MetaData::DBSQL::MySQLServerProvider
00088 Arg [-HOST] : Host containing DBAdaptors
00089 Arg [-PORT] : Port for DBAdaptors
00090 Arg [-USER] : User for DBAdaptors
00091 Arg [-PASS] : Password for DBAdaptors
00092 Description : Creates a new instance of this object.
00093 Returntype : Instance of lookup
00094 Status : Stable
00095 Example :
00096 my $lookup = Bio::EnsEMBL::RemoteLookUp->new();
00097 =cut
00098
00099 sub new {
00100 my ( $class, @args ) = @_;
00101 my $self = bless( {}, ref($class) || $class );
00102 ( $self->{_adaptor}, $self->{registry}, $self->{user},
00103 $self->{pass}, $self->{host}, $self->{port},
00104 $self->{provider} )
00105 = rearrange( [ 'ADAPTOR', 'REGISTRY', 'USER', 'PASS',
00106 'HOST', 'PORT', 'PROVIDER' ],
00107 @args );
00108 if ( !defined $self->{provider} ) {
00109 # provider is used to figure out where DBAs come from
00110 if ( defined $self->{host} ) {
00111 # we have a host, so use a fixed provider
00112 $self->{provider} =
00113 Bio::EnsEMBL::MetaData::DBSQL::ParameterMySQLServerProvider->new(
00114 -HOST => $self->{host},
00115 -POR => $self->{port},
00116 -USER => $self->{user},
00117 -PASS => $self->{pass}
00118 );
00119 }
00120 else {
00121 # default is the public provider
00122 $self->{provider} =
00123 Bio::EnsEMBL::MetaData::DBSQL::MySQLServerProvider->new();
00124 }
00125 }
00126 $self->{dba_cache} = {};
00127 $self->{registry} ||= q/Bio::EnsEMBL::Registry/;
00128 return $self;
00129 } ## end sub new
00130
00131 =head2 genome_to_dba
00132 Description : Build a Bio::EnsEMBL::DBSQL::DBAdaptor instance with the supplied info object
00133 Argument : Bio::EnsEMBL::MetaData::GenomeInfo
00134 Argument : (optional) Group to use
00135 Exceptions : None
00136 Return type : Bio::EnsEMBL::DBSQL::DBAdaptor
00137 =cut
00138
00139 sub genome_to_dba {
00140 my ( $self, $genome_info ) = @_;
00141 my $dba;
00142 if ( defined $genome_info ) {
00143 assert_ref( $genome_info, 'Bio::EnsEMBL::MetaData::GenomeInfo' );
00144 $dba = $self->_cache()->{ $genome_info->dbID() };
00145 if ( !defined $dba ) {
00146
00147 my $args = $self->{provider}->args_for_genome($genome_info);
00148
00149 $args->{-DBNAME} = $genome_info->dbname();
00150 $args->{-SPECIES} = $genome_info->name();
00151 $args->{-SPECIES_ID} = $genome_info->species_id();
00152 $args->{-MULTISPECIES_DB} =
00153 $genome_info->dbname() =~ m/_collection_/ ? 1 : 0;
00154 $args->{-GROUP} = 'core';
00155 $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(%$args);
00156 $self->_cache()->{ $genome_info->dbID() } = $dba;
00157
00158 }
00159 }
00160 return $dba;
00161 } ## end sub genome_to_dba
00162
00163 =head2 genomes_to_dbas
00164 Description : Build a set of Bio::EnsEMBL::DBSQL::DBAdaptor instances with the supplied info objects
00165 Argument : array ref of Bio::EnsEMBL::MetaData::GenomeInfo
00166 Exceptions : None
00167 Return type : array ref of Bio::EnsEMBL::DBSQL::DBAdaptor
00168 =cut
00169
00170 sub genomes_to_dbas {
00171 my ( $self, $genomes ) = @_;
00172 my $dbas = [];
00173 if ( defined $genomes ) {
00174 for my $genome ( @{$genomes} ) {
00175 push @$dbas, $self->genome_to_dba($genome);
00176 }
00177 }
00178 return $dbas;
00179 }
00180
00181 =head2 compara_to_dba
00182 Description : Build a Bio::EnsEMBL::Compara::DBSQL::DBAdaptor instance with the supplied info object
00183 Argument : Bio::EnsEMBL::MetaData::GenomeComparaInfo
00184 Exceptions : None
00185 Return type : Arrayref of strings
00186 =cut
00187
00188 sub compara_to_dba {
00189 my ( $self, $genome_info ) = @_;
00190 assert_ref( $genome_info, 'Bio::EnsEMBL::MetaData::GenomeComparaInfo' );
00191 my $dba = $self->_cache()->{compara}->{ $genome_info->dbID() };
00192 if ( !defined $dba ) {
00193 my $div = $genome_info->division();
00194 if ( !$div eq 'Ensembl' ) {
00195 $div = 'multi';
00196 }
00197
00198 my $args = $self->_get_args($genome_info);
00199 $args->{-DBNAME} = $genome_info->dbname();
00200 $args->{-SPECIES} = $div;
00201 $args->{-GROUP} = 'compara';
00202 $dba = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(%$args);
00203
00204 $self->_cache()->{compara}->{ $genome_info->dbID() } = $dba;
00205
00206 }
00207 return $dba;
00208 }
00209
00210 =head2 get_all_dbnames
00211 Description : Return all database names used by the DBAs retrieved from the registry
00212 Argument : None
00213 Exceptions : None
00214 Return type : Arrayref of strings
00215 =cut
00216
00217 sub get_all_dbnames {
00218 my ($self) = @_;
00219 return [ uniq( map { $_->dbname() } @{ $self->adaptor()->fetch_all() } ) ];
00220 }
00221
00222 =head2 get_all
00223 Description : Return all database adaptors that have been retrieved from registry
00224 Argument : None
00225 Exceptions : None
00226 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00227 =cut
00228
00229 sub get_all {
00230 my ($self) = @_;
00231 return $self->genomes_to_dbas( $self->adaptor()->fetch_all() );
00232 }
00233
00234 =head2 get_all_by_taxon_branch
00235 Description : Returns all database adaptors that lie beneath the specified taxon node
00236 Argument : String
00237 Exceptions : None
00238 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00239 =cut
00240
00241 sub get_all_by_taxon_branch {
00242 my ( $self, $taxid ) = @_;
00243 return $self->genomes_to_dbas(
00244 $self->adaptor()->fetch_all_by_taxonomy_branch($taxid) );
00245 }
00246
00247 =head2 get_all_by_taxon_id
00248 Description : Returns all database adaptors that have the supplied taxonomy ID
00249 Argument : String
00250 Exceptions : None
00251 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00252 =cut
00253
00254 sub get_all_by_taxon_id {
00255 my ( $self, $taxid ) = @_;
00256 return $self->genomes_to_dbas(
00257 $self->adaptor()->fetch_all_by_taxonomy_id($taxid) );
00258 }
00259
00260 =head2 get_by_name_exact
00261 Description : Return database adaptor that has the supplied string as an alias/name
00262 Argument : String
00263 Exceptions : None
00264 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00265 =cut
00266
00267 sub get_by_name_exact {
00268 my ( $self, $name ) = @_;
00269 return $self->genome_to_dba( $self->adaptor()->fetch_by_any_name($name) );
00270 }
00271
00272 =head2 get_all_by_accession
00273 Description : Returns the database adaptor(s) that contains a seq_region with the supplied INSDC accession (or other seq_region name)
00274 Argument : Int
00275 Exceptions : None
00276 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00277 =cut
00278
00279 sub get_all_by_accession {
00280 my ( $self, $acc ) = @_;
00281 my $genomes = $self->adaptor()->fetch_all_by_sequence_accession($acc);
00282 if ( !defined $genomes || scalar(@$genomes) == 0 ) {
00283 $genomes =
00284 $self->adaptor()->fetch_all_by_sequence_accession_unversioned($acc);
00285 }
00286 return $self->genomes_to_dbas($genomes);
00287 }
00288
00289 =head2 get_by_assembly_accession
00290 Description : Returns the database adaptor that contains the assembly with the supplied INSDC assembly accession
00291 Argument : Int
00292 Exceptions : None
00293 Return type : Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00294 =cut
00295
00296 sub get_by_assembly_accession {
00297 my ( $self, $acc ) = @_;
00298 my $genome = $self->adaptor()->fetch_by_assembly_id($acc);
00299 if ( !defined $genome ) {
00300 $genome = $self->adaptor()->fetch_by_assembly_id_unversioned($acc);
00301 }
00302 return $self->genome_to_dba($genome);
00303 }
00304
00305 =head2 get_all_by_name_pattern
00306 Description : Return all database adaptors that have an alias/name that match the supplied regexp
00307 Argument : String
00308 Exceptions : None
00309 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00310 =cut
00311
00312 sub get_all_by_name_pattern {
00313 my ( $self, $name ) = @_;
00314 return $self->genomes_to_dbas(
00315 $self->adaptor()->fetch_all_by_name_pattern($name) );
00316 }
00317
00318 =head2 get_all_by_dbname
00319 Description : Returns all database adaptors that have the supplied dbname
00320 Argument : String
00321 Exceptions : None
00322 Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00323 =cut
00324
00325 sub get_all_by_dbname {
00326 my ( $self, $name ) = @_;
00327 return $self->genomes_to_dbas($self->adaptor()->fetch_all_by_dbname($name) );
00328 }
00329
00330 =head2 get_all_taxon_ids
00331 Description : Return list of all taxon IDs registered with the helper
00332 Exceptions : None
00333 Return type : Arrayref of integers
00334 =cut
00335
00336 sub get_all_taxon_ids {
00337 my ($self) = @_;
00338 return [
00339 uniq( map { $_->taxonomy_id() } @{ $self->adaptor()->fetch_all() } ) ];
00340 }
00341
00342 =head2 get_all_names
00343 Description : Return list of all species names registered with the helper
00344 Exceptions : None
00345 Return type : Arrayref of strings
00346 =cut
00347
00348 sub get_all_names {
00349 my ($self) = @_;
00350 return [ map { $_->name() } @{ $self->adaptor()->fetch_all() } ];
00351 }
00352
00353 =head2 get_all_accessions
00354 Description : Return list of all INSDC sequence accessions (or other seq_region names) registered with the helper
00355 Exceptions : None
00356 Return type : Arrayref of strings
00357 =cut
00358
00359 sub get_all_accessions {
00360 throw "Unimplemented method";
00361 }
00362
00363 =head2 get_all_versioned_accessions
00364 Description : Return list of all versioned INSDC sequence accessions (or other seq_region names) registered with the helper
00365 Exceptions : None
00366 Return type : Arrayref of strings
00367 =cut
00368
00369 sub get_all_versioned_accessions {
00370 throw "Unimplemented method";
00371 }
00372
00373 =head2 get_all_assemblies
00374 Description : Return list of all INSDC assembly accessions registered with the helper
00375 Exceptions : None
00376 Return type : Arrayref of strings
00377 =cut
00378
00379 sub get_all_assemblies {
00380 my ($self) = @_;
00381 return [ map { s/\.[0-9]+$
00382 }
00383
00384 =head2 get_all_versioned_assemblies
00385 Description : Return list of all versioned INSDC assembly accessions registered with the helper
00386 Exceptions : None
00387 Return type : Arrayref of strings
00388 =cut
00389
00390 sub get_all_versioned_assemblies {
00391 my ($self) = @_;
00392 return [
00393 uniq( map { $_->assembly_id() || '' } @{ $self->adaptor()->fetch_all() } )
00394 ];
00395 }
00396
00397 =head1 INTERNAL METHODS
00398 =head2 _cache
00399 Description : Return hash of DBAs
00400 Exceptions : None
00401 Return type : Hashref of Bio::EnsEMBL::DBSQL::DBAdaptor by name
00402 Caller : Internal
00403 Status : Stable
00404 =cut
00405
00406 sub _cache {
00407 my ($self) = @_;
00408 return $self->{dba_cache};
00409 }
00410
00411 =head2 _adaptor
00412 Description : Return GenomeInfoAdaptor
00413 Exceptions : None
00414 Return type : Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00415 Caller : Internal
00416 Status : Stable
00417 =cut
00418
00419 sub adaptor {
00420 my ($self) = @_;
00421 if ( !defined $self->{_adaptor} ) {
00422 # default to previous behaviour
00423 $self->{_adaptor} =
00424 Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00425 ->build_ensembl_genomes_adaptor();
00426 }
00427 return $self->{_adaptor};
00428 }
00429
00430 1;
00431