LookUp.pm
Go to the documentation of this file.
00001 
00002 =head1 LICENSE
00003 
00004 Copyright [2009-2014] EMBL-European Bioinformatics Institute
00005 
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009 
00010      http://www.apache.org/licenses/LICENSE-2.0
00011 
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017 
00018 =cut
00019 
00020 =pod
00021 
00022 =head1 CONTACT
00023 
00024   Please email comments or questions to the public Ensembl
00025   developers list at <dev@ensembl.org>.
00026 
00027   Questions may also be sent to the Ensembl help desk at
00028   <helpdesk@ensembl.org>.
00029 
00030 =head1 NAME
00031 
00032 Bio::EnsEMBL::LookUp
00033 
00034 =head1 SYNOPSIS
00035 
00036 # default creation using latest public release of Ensembl Genomes
00037 my $lookup = Bio::EnsEMBL::LookUp->new();
00038 
00039 =head1 DESCRIPTION
00040 
00041 This module is a helper that provides additional methods to aid navigating a registry of >30000 species across >200 databases. 
00042 It does not replace the Registry but provides some additional methods for finding species e.g. by searching for species that 
00043 have an alias that match a regular expression, or species which are derived from a specific ENA/INSDC accession, or species
00044 that belong to a particular part of the taxonomy. 
00045 
00046 There are a number of ways of creating a lookup but the simplest is to use the default setting of the latest publicly 
00047 available Ensembl Genomes databases: 
00048 
00049     my $lookup = Bio::EnsEMBL::LookUp->new();
00050 
00051 Once a lookup has been created, there are various methods to retreive DBAdaptors for species of interest:
00052 
00053 1. To find species by name - all DBAdaptors for species with a name or alias matching the supplied string:
00054 
00055     $dbas = $lookup->get_by_name_exact('Escherichia coli str. K-12 substr. MG1655');
00056 
00057 2. To find species by name pattern - all DBAdaptors for species with a name or alias matching the supplied regexp:
00058 
00059     $dbas = $lookup->get_by_name_exact('Escherichia coli .*);
00060 
00061 3. To find species with the supplied taxonomy ID:
00062 
00063     $dbas = $lookup->get_all_by_taxon_id(388919);
00064     
00065 4. To find DBAs for all descendants of a node:
00066 
00067     $dbas = $lookup->get_all_by_taxon_branch(511145);
00068 
00069 The retrieved DBAdaptors can then be used as normal e.g.
00070 
00071     for my $gene (@{$dba->get_GeneAdaptor()->fetch_all_by_biotype('protein_coding')}) {
00072         print $gene->external_name."\n";
00073     }
00074 
00075 If a DBAdaptor is not likely to be used again, it should be disconnected to avoid running out of connections 
00076 (a disconnected DBAdaptor can be used again without having to be explicitly reconnected):
00077     
00078     $dba->dbc()->disconnect_if_idle();
00079 
00080 Once retrieved, the arguments needed for constructing a DBAdaptor directly can be dumped for later use e.g.
00081 
00082     my $args = $lookup->dba_to_args($dba);
00083     ... store and retrieve $args for use in another script ... 
00084     my $resurrected_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(@$args);
00085 
00086 =head2 Local implementation
00087 
00088 The default implementation of LookUp is a remoting implementation that uses a MySQL database backend
00089 to look up genome information. The previous implementation loaded an internal hash from either a JSON file
00090 (remote or local) or by processing the contents of the Registry. 
00091 
00092 This implementation is still available, but has been renamed Bio::EnsEMBL::LookUp::LocalLookUp and should
00093 be constructed directly.
00094   
00095 =head1 AUTHOR
00096 
00097 dstaines
00098 
00099 =head1 MAINTANER
00100 
00101 $Author$
00102 
00103 =head1 VERSION
00104 
00105 $Revision$
00106 
00107 =cut
00108 
00109 package Bio::EnsEMBL::LookUp;
00110 
00111 use warnings;
00112 use strict;
00113 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00114 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
00115 use DBI;
00116 use JSON;
00117 use LWP::Simple;
00118 use Carp;
00119 use Data::Dumper;
00120 use Bio::EnsEMBL::LookUp::LocalLookUp;
00121 use Bio::EnsEMBL::LookUp::RemoteLookUp;
00122 my $default_cache_file = qw/lookup_cache.json/;
00123 
00124 =head1 SUBROUTINES/METHODS
00125 
00126 =head2 new
00127 
00128   Description       : Creates a new instance of LookUp, by default using Bio::EnsEMBL::LookUp::RemoteLookUp
00129   Returntype        : Instance of lookup
00130   Status            : Stable
00131 
00132   Example           : 
00133   my $lookup = Bio::EnsEMBL::LookUp->new();
00134 =cut
00135 
00136 sub new {
00137   my ($class, @args) = @_;
00138   my $self = bless({}, ref($class) || $class);
00139   ($self->{lookup}, $self->{registry},
00140    $self->{url},    $self->{file}
00141   ) = rearrange([qw(lookup registry url file)], @args);
00142   if (defined $self->{url} ||
00143       defined $self->{registry} ||
00144       defined $self->{file})
00145   {
00146     warning(
00147 q/Direct construction of local or url\/file-based LookUp deprecated.  
00148     Use Bio::EnsEMBL::LookUp->new() for new remoting implementation or
00149     use Bio::EnsEMBL::LookUp::LocalLookUp->new() directly for previous implementation/
00150     );
00151     $self->{lookup} =
00152       Bio::EnsEMBL::LookUp::LocalLookUp->new(@args);
00153   }
00154   if (!defined $self->{lookup}) {
00155     $self->{lookup} = Bio::EnsEMBL::LookUp::RemoteLookUp->new(@args);
00156   }
00157   return $self;
00158 }
00159 
00160 sub register_all_dbs {
00161   my ($class, $host, $port, $user, $pass, $regexp) = @_;
00162   warning(
00163     q/register_all_dbs is now part of Bio::EnsEMBL::LookUp::LocalLookUp 
00164   and should be invoked directly/);
00165   Bio::EnsEMBL::LookUp::LocalLookUp->register_all_dbs($host, $port,
00166                                                  $user, $pass, $regexp);
00167   return;
00168 }
00169 
00170 use vars '$AUTOLOAD';
00171 sub AUTOLOAD {
00172    my ( $self, @args ) = @_;
00173    (my $method = $AUTOLOAD) =~ s/^.*::(\w+)$/$1/ ;
00174    return $self->{lookup}->$method(@args);
00175 }
00176 sub DESTROY { }    # required due to AUTOLOAD
00177 
00178 sub uniq {
00179     my @out = ();
00180     my $keys = {};
00181     for my $v (@_) {
00182         if(!exists $keys->{$v}) {
00183             push @out,$v;
00184             $keys->{$v} = 1;
00185         } 
00186     }
00187     return @out;
00188 }
00189 
00190 1;
00191