RemoteLookUp.pm
Go to the documentation of this file.
00001 
00002 =head1 LICENSE
00003 
00004 Copyright [2009-2014] EMBL-European Bioinformatics Institute
00005 
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009 
00010      http://www.apache.org/licenses/LICENSE-2.0
00011 
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017 
00018 =cut
00019 
00020 =pod
00021 
00022 =head1 CONTACT
00023 
00024   Please email comments or questions to the public Ensembl
00025   developers list at <dev@ensembl.org>.
00026 
00027   Questions may also be sent to the Ensembl help desk at
00028   <helpdesk@ensembl.org>.
00029 
00030 =head1 NAME
00031 
00032 Bio::EnsEMBL::LookUp
00033 
00034 =head1 SYNOPSIS
00035 
00036 my $adaptor = Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor->build_ensembl_genomesadaptor();
00037 my $lookup = Bio::EnsEMBL::RemoteLookUp->new(-ADAPTOR=>$adaptor);
00038 my $dbas = $lookup->registry()->get_all();
00039 $dbas = $lookup->get_all_by_taxon_id(388919);
00040 $dbas = $lookup->get_by_name_pattern("Escherichia.*");
00041 
00042 =head1 DESCRIPTION
00043 
00044 This module is an implementation of Bio::EnsEMBL::LookUp that uses Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor to
00045 access a MySQL database containing information about Ensembl and Ensembl Genomes contents and then instantiate DBAdaptors.
00046 
00047 To instantiate using the public Ensembl/EG servers for creating DBAdaptors:
00048 
00049     my $lookup = Bio::EnsEMBL::LookUp::RemoteLookUp->new(-ADAPTOR=>$adaptor);
00050 
00051 To instantiate to use a specific server on which core databases are located:
00052 
00053     my $lookup = Bio::EnsEMBL::LookUp::RemoteLookUp->new(-USER=>$user, -HOST=>$host, -PORT=>$port, -ADAPTOR=>$adaptor);
00054 
00055 Once constructed, the LookUp instance can be used as documented in Bio::EnsEMBL::LookUp.
00056 
00057 =head1 SEE ALSO
00058 
00059 Bio::EnsEMBL::LookUp
00060 
00061 =head1 AUTHOR
00062 
00063 Dan Staines
00064 
00065 =cut
00066 
00067 package Bio::EnsEMBL::LookUp::RemoteLookUp;
00068 
00069 use warnings;
00070 use strict;
00071 use Bio::EnsEMBL::DBSQL::DBAdaptor;
00072 use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
00073 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00074 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00075 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref check_ref);
00076 use Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor;
00077 use Bio::EnsEMBL::MetaData::DBSQL::MySQLServerProvider;
00078 use Bio::EnsEMBL::MetaData::DBSQL::ParameterMySQLServerProvider;
00079 use Carp;
00080 use List::MoreUtils qw(uniq);
00081 
00082 =head1 SUBROUTINES/METHODS
00083 
00084 =head2 new
00085   Arg [-ADAPTOR]    : Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00086   Arg [-REGISTRY]   : Registry to obtain DBAdaptors from
00087   Arg [-PROVIDER]   : Bio::EnsEMBL::MetaData::DBSQL::MySQLServerProvider
00088   Arg [-HOST]       : Host containing DBAdaptors
00089   Arg [-PORT]       : Port  for DBAdaptors
00090   Arg [-USER]       : User for DBAdaptors
00091   Arg [-PASS]       : Password for DBAdaptors
00092   Description       : Creates a new instance of this object. 
00093   Returntype        : Instance of lookup
00094   Status            : Stable
00095   Example           : 
00096   my $lookup = Bio::EnsEMBL::RemoteLookUp->new();
00097 =cut
00098 
00099 sub new {
00100   my ( $class, @args ) = @_;
00101   my $self = bless( {}, ref($class) || $class );
00102   ( $self->{_adaptor}, $self->{registry}, $self->{user},
00103     $self->{pass},     $self->{host},     $self->{port},
00104     $self->{provider} )
00105     = rearrange( [ 'ADAPTOR', 'REGISTRY', 'USER', 'PASS',
00106                    'HOST',    'PORT',     'PROVIDER' ],
00107                  @args );
00108   if ( !defined $self->{provider} ) {
00109     # provider is used to figure out where DBAs come from
00110     if ( defined $self->{host} ) {
00111       # we have a host, so use a fixed provider
00112       $self->{provider} =
00113         Bio::EnsEMBL::MetaData::DBSQL::ParameterMySQLServerProvider->new(
00114                                                          -HOST => $self->{host},
00115                                                          -POR  => $self->{port},
00116                                                          -USER => $self->{user},
00117                                                          -PASS => $self->{pass}
00118         );
00119     }
00120     else {
00121       # default is the public provider
00122       $self->{provider} =
00123         Bio::EnsEMBL::MetaData::DBSQL::MySQLServerProvider->new();
00124     }
00125   }
00126   $self->{dba_cache} = {};
00127   $self->{registry} ||= q/Bio::EnsEMBL::Registry/;
00128   return $self;
00129 } ## end sub new
00130 
00131 =head2 genome_to_dba
00132     Description : Build a Bio::EnsEMBL::DBSQL::DBAdaptor instance with the supplied info object
00133     Argument    : Bio::EnsEMBL::MetaData::GenomeInfo
00134     Argument    : (optional) Group to use
00135     Exceptions  : None
00136     Return type : Bio::EnsEMBL::DBSQL::DBAdaptor
00137 =cut
00138 
00139 sub genome_to_dba {
00140   my ( $self, $genome_info ) = @_;
00141   my $dba;
00142   if ( defined $genome_info ) {
00143     assert_ref( $genome_info, 'Bio::EnsEMBL::MetaData::GenomeInfo' );
00144     $dba = $self->_cache()->{ $genome_info->dbID() };
00145     if ( !defined $dba ) {
00146 
00147       my $args = $self->{provider}->args_for_genome($genome_info);
00148 
00149       $args->{-DBNAME}     = $genome_info->dbname();
00150       $args->{-SPECIES}    = $genome_info->name();
00151       $args->{-SPECIES_ID} = $genome_info->species_id();
00152       $args->{-MULTISPECIES_DB} =
00153         $genome_info->dbname() =~ m/_collection_/ ? 1 : 0;
00154       $args->{-GROUP} = 'core';
00155         $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(%$args);
00156       $self->_cache()->{ $genome_info->dbID() } = $dba;
00157 
00158     }
00159   }
00160   return $dba;
00161 } ## end sub genome_to_dba
00162 
00163 =head2 genomes_to_dbas
00164     Description : Build a set of Bio::EnsEMBL::DBSQL::DBAdaptor instances with the supplied info objects
00165     Argument    : array ref of Bio::EnsEMBL::MetaData::GenomeInfo
00166     Exceptions  : None
00167     Return type : array ref of Bio::EnsEMBL::DBSQL::DBAdaptor
00168 =cut
00169 
00170 sub genomes_to_dbas {
00171   my ( $self, $genomes ) = @_;
00172   my $dbas = [];
00173   if ( defined $genomes ) {
00174     for my $genome ( @{$genomes} ) {
00175       push @$dbas, $self->genome_to_dba($genome);
00176     }
00177   }
00178   return $dbas;
00179 }
00180 
00181 =head2 compara_to_dba
00182     Description : Build a Bio::EnsEMBL::Compara::DBSQL::DBAdaptor instance with the supplied info object
00183     Argument    : Bio::EnsEMBL::MetaData::GenomeComparaInfo
00184     Exceptions  : None
00185     Return type : Arrayref of strings
00186 =cut
00187 
00188 sub compara_to_dba {
00189   my ( $self, $genome_info ) = @_;
00190   assert_ref( $genome_info, 'Bio::EnsEMBL::MetaData::GenomeComparaInfo' );
00191   my $dba = $self->_cache()->{compara}->{ $genome_info->dbID() };
00192   if ( !defined $dba ) {
00193     my $div = $genome_info->division();
00194     if ( !$div eq 'Ensembl' ) {
00195       $div = 'multi';
00196     }
00197 
00198     my $args = $self->_get_args($genome_info);
00199     $args->{-DBNAME}  = $genome_info->dbname();
00200     $args->{-SPECIES} = $div;
00201     $args->{-GROUP}   = 'compara';
00202     $dba              = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(%$args);
00203 
00204     $self->_cache()->{compara}->{ $genome_info->dbID() } = $dba;
00205 
00206   }
00207   return $dba;
00208 }
00209 
00210 =head2 get_all_dbnames
00211     Description : Return all database names used by the DBAs retrieved from the registry
00212     Argument    : None
00213     Exceptions  : None
00214     Return type : Arrayref of strings
00215 =cut
00216 
00217 sub get_all_dbnames {
00218   my ($self) = @_;
00219   return [ uniq( map { $_->dbname() } @{ $self->adaptor()->fetch_all() } ) ];
00220 }
00221 
00222 =head2 get_all
00223     Description : Return all database adaptors that have been retrieved from registry
00224     Argument    : None
00225     Exceptions  : None
00226     Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00227 =cut
00228 
00229 sub get_all {
00230   my ($self) = @_;
00231   return $self->genomes_to_dbas( $self->adaptor()->fetch_all() );
00232 }
00233 
00234 =head2 get_all_by_taxon_branch
00235     Description : Returns all database adaptors that lie beneath the specified taxon node
00236     Argument    : String
00237     Exceptions  : None
00238     Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00239 =cut
00240 
00241 sub get_all_by_taxon_branch {
00242   my ( $self, $taxid ) = @_;
00243   return $self->genomes_to_dbas(
00244                       $self->adaptor()->fetch_all_by_taxonomy_branch($taxid) );
00245 }
00246 
00247 =head2 get_all_by_taxon_id
00248     Description : Returns all database adaptors that have the supplied taxonomy ID
00249     Argument    : String
00250     Exceptions  : None
00251     Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00252 =cut
00253 
00254 sub get_all_by_taxon_id {
00255   my ( $self, $taxid ) = @_;
00256   return $self->genomes_to_dbas(
00257                           $self->adaptor()->fetch_all_by_taxonomy_id($taxid) );
00258 }
00259 
00260 =head2 get_by_name_exact
00261     Description : Return database adaptor that has the supplied string as an alias/name
00262     Argument    : String
00263     Exceptions  : None
00264     Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00265 =cut
00266 
00267 sub get_by_name_exact {
00268   my ( $self, $name ) = @_;
00269   return $self->genome_to_dba( $self->adaptor()->fetch_by_any_name($name) );
00270 }
00271 
00272 =head2 get_all_by_accession
00273     Description : Returns the database adaptor(s) that contains a seq_region with the supplied INSDC accession (or other seq_region name)
00274     Argument    : Int
00275     Exceptions  : None
00276     Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00277 =cut    
00278 
00279 sub get_all_by_accession {
00280   my ( $self, $acc ) = @_;
00281   my $genomes = $self->adaptor()->fetch_all_by_sequence_accession($acc);
00282   if ( !defined $genomes || scalar(@$genomes) == 0 ) {
00283     $genomes =
00284       $self->adaptor()->fetch_all_by_sequence_accession_unversioned($acc);
00285   }
00286   return $self->genomes_to_dbas($genomes);
00287 }
00288 
00289 =head2 get_by_assembly_accession
00290     Description : Returns the database adaptor that contains the assembly with the supplied INSDC assembly accession
00291     Argument    : Int
00292     Exceptions  : None
00293     Return type : Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00294 =cut
00295 
00296 sub get_by_assembly_accession {
00297   my ( $self, $acc ) = @_;
00298   my $genome = $self->adaptor()->fetch_by_assembly_id($acc);
00299   if ( !defined $genome ) {
00300     $genome = $self->adaptor()->fetch_by_assembly_id_unversioned($acc);
00301   }
00302   return $self->genome_to_dba($genome);
00303 }
00304 
00305 =head2 get_all_by_name_pattern
00306     Description : Return all database adaptors that have an alias/name that match the supplied regexp
00307     Argument    : String
00308     Exceptions  : None
00309     Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00310 =cut    
00311 
00312 sub get_all_by_name_pattern {
00313   my ( $self, $name ) = @_;
00314   return $self->genomes_to_dbas(
00315                           $self->adaptor()->fetch_all_by_name_pattern($name) );
00316 }
00317 
00318 =head2 get_all_by_dbname
00319     Description : Returns all database adaptors that have the supplied dbname
00320     Argument    : String
00321     Exceptions  : None
00322     Return type : Arrayref of Bio::EnsEMBL::DBSQL::DatabaseAdaptor
00323 =cut
00324 
00325 sub get_all_by_dbname {
00326   my ( $self, $name ) = @_;
00327   return $self->genomes_to_dbas($self->adaptor()->fetch_all_by_dbname($name) );
00328 }
00329 
00330 =head2 get_all_taxon_ids
00331     Description : Return list of all taxon IDs registered with the helper
00332     Exceptions  : None
00333     Return type : Arrayref of integers
00334 =cut
00335 
00336 sub get_all_taxon_ids {
00337   my ($self) = @_;
00338   return [
00339         uniq( map { $_->taxonomy_id() } @{ $self->adaptor()->fetch_all() } ) ];
00340 }
00341 
00342 =head2 get_all_names
00343     Description : Return list of all species names registered with the helper
00344     Exceptions  : None
00345     Return type : Arrayref of strings
00346 =cut
00347 
00348 sub get_all_names {
00349   my ($self) = @_;
00350   return [ map { $_->name() } @{ $self->adaptor()->fetch_all() } ];
00351 }
00352 
00353 =head2 get_all_accessions
00354     Description : Return list of all INSDC sequence accessions (or other seq_region names) registered with the helper
00355     Exceptions  : None
00356     Return type : Arrayref of strings
00357 =cut
00358 
00359 sub get_all_accessions {
00360   throw "Unimplemented method";
00361 }
00362 
00363 =head2 get_all_versioned_accessions
00364     Description : Return list of all versioned INSDC sequence accessions (or other seq_region names) registered with the helper
00365     Exceptions  : None
00366     Return type : Arrayref of strings
00367 =cut
00368 
00369 sub get_all_versioned_accessions {
00370   throw "Unimplemented method";
00371 }
00372 
00373 =head2 get_all_assemblies
00374     Description : Return list of all INSDC assembly accessions registered with the helper
00375     Exceptions  : None
00376     Return type : Arrayref of strings
00377 =cut
00378 
00379 sub get_all_assemblies {
00380   my ($self) = @_;
00381   return [ map { s/\.[0-9]+$// } @{ $self->get_all_versioned_assemblies() } ];
00382 }
00383 
00384 =head2 get_all_versioned_assemblies
00385     Description : Return list of all versioned INSDC assembly accessions registered with the helper
00386     Exceptions  : None
00387     Return type : Arrayref of strings
00388 =cut
00389 
00390 sub get_all_versioned_assemblies {
00391   my ($self) = @_;
00392   return [
00393      uniq( map { $_->assembly_id() || '' } @{ $self->adaptor()->fetch_all() } )
00394   ];
00395 }
00396 
00397 =head1 INTERNAL METHODS
00398 =head2 _cache
00399     Description : Return hash of DBAs
00400     Exceptions  : None
00401     Return type : Hashref of Bio::EnsEMBL::DBSQL::DBAdaptor by name
00402     Caller      : Internal
00403     Status      : Stable
00404 =cut
00405 
00406 sub _cache {
00407   my ($self) = @_;
00408   return $self->{dba_cache};
00409 }
00410 
00411 =head2 _adaptor
00412     Description : Return GenomeInfoAdaptor
00413     Exceptions  : None
00414     Return type : Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00415     Caller      : Internal
00416     Status      : Stable
00417 =cut
00418 
00419 sub adaptor {
00420   my ($self) = @_;
00421   if ( !defined $self->{_adaptor} ) {
00422     # default to previous behaviour
00423     $self->{_adaptor} =
00424       Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00425       ->build_ensembl_genomes_adaptor();
00426   }
00427   return $self->{_adaptor};
00428 }
00429 
00430 1;
00431