00001 00002 =head1 LICENSE 00003 00004 Copyright [2009-2014] EMBL-European Bioinformatics Institute 00005 00006 Licensed under the Apache License, Version 2.0 (the "License"); 00007 you may not use this file except in compliance with the License. 00008 You may obtain a copy of the License at 00009 00010 http://www.apache.org/licenses/LICENSE-2.0 00011 00012 Unless required by applicable law or agreed to in writing, software 00013 distributed under the License is distributed on an "AS IS" BASIS, 00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 See the License for the specific language governing permissions and 00016 limitations under the License. 00017 00018 =cut 00019 00020 =pod 00021 00022 =head1 CONTACT 00023 00024 Please email comments or questions to the public Ensembl 00025 developers list at <dev@ensembl.org>. 00026 00027 Questions may also be sent to the Ensembl help desk at 00028 <helpdesk@ensembl.org>. 00029 00030 =head1 NAME 00031 00032 Bio::EnsEMBL::LookUp 00033 00034 =head1 SYNOPSIS 00035 00036 # default creation using latest public release of Ensembl Genomes 00037 my $lookup = Bio::EnsEMBL::LookUp->new(); 00038 00039 =head1 DESCRIPTION 00040 00041 This module is a helper that provides additional methods to aid navigating a registry of >30000 species across >200 databases. 00042 It does not replace the Registry but provides some additional methods for finding species e.g. by searching for species that 00043 have an alias that match a regular expression, or species which are derived from a specific ENA/INSDC accession, or species 00044 that belong to a particular part of the taxonomy. 00045 00046 There are a number of ways of creating a lookup but the simplest is to use the default setting of the latest publicly 00047 available Ensembl Genomes databases: 00048 00049 my $lookup = Bio::EnsEMBL::LookUp->new(); 00050 00051 Once a lookup has been created, there are various methods to retreive DBAdaptors for species of interest: 00052 00053 1. To find species by name - all DBAdaptors for species with a name or alias matching the supplied string: 00054 00055 $dbas = $lookup->get_by_name_exact('Escherichia coli str. K-12 substr. MG1655'); 00056 00057 2. To find species by name pattern - all DBAdaptors for species with a name or alias matching the supplied regexp: 00058 00059 $dbas = $lookup->get_by_name_exact('Escherichia coli .*); 00060 00061 3. To find species with the supplied taxonomy ID: 00062 00063 $dbas = $lookup->get_all_by_taxon_id(388919); 00064 00065 4. To find DBAs for all descendants of a node: 00066 00067 $dbas = $lookup->get_all_by_taxon_branch(511145); 00068 00069 The retrieved DBAdaptors can then be used as normal e.g. 00070 00071 for my $gene (@{$dba->get_GeneAdaptor()->fetch_all_by_biotype('protein_coding')}) { 00072 print $gene->external_name."\n"; 00073 } 00074 00075 If a DBAdaptor is not likely to be used again, it should be disconnected to avoid running out of connections 00076 (a disconnected DBAdaptor can be used again without having to be explicitly reconnected): 00077 00078 $dba->dbc()->disconnect_if_idle(); 00079 00080 Once retrieved, the arguments needed for constructing a DBAdaptor directly can be dumped for later use e.g. 00081 00082 my $args = $lookup->dba_to_args($dba); 00083 ... store and retrieve $args for use in another script ... 00084 my $resurrected_dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new(@$args); 00085 00086 =head2 Local implementation 00087 00088 The default implementation of LookUp is a remoting implementation that uses a MySQL database backend 00089 to look up genome information. The previous implementation loaded an internal hash from either a JSON file 00090 (remote or local) or by processing the contents of the Registry. 00091 00092 This implementation is still available, but has been renamed Bio::EnsEMBL::LookUp::LocalLookUp and should 00093 be constructed directly. 00094 00095 =head1 AUTHOR 00096 00097 dstaines 00098 00099 =head1 MAINTANER 00100 00101 $Author$ 00102 00103 =head1 VERSION 00104 00105 $Revision$ 00106 00107 =cut 00108 00109 package Bio::EnsEMBL::LookUp; 00110 00111 use warnings; 00112 use strict; 00113 use Bio::EnsEMBL::Utils::Argument qw(rearrange); 00114 use Bio::EnsEMBL::Utils::Exception qw(throw warning); 00115 use DBI; 00116 use JSON; 00117 use LWP::Simple; 00118 use Carp; 00119 use Data::Dumper; 00120 use Bio::EnsEMBL::LookUp::LocalLookUp; 00121 use Bio::EnsEMBL::LookUp::RemoteLookUp; 00122 my $default_cache_file = qw/lookup_cache.json/; 00123 00124 =head1 SUBROUTINES/METHODS 00125 00126 =head2 new 00127 00128 Description : Creates a new instance of LookUp, by default using Bio::EnsEMBL::LookUp::RemoteLookUp 00129 Returntype : Instance of lookup 00130 Status : Stable 00131 00132 Example : 00133 my $lookup = Bio::EnsEMBL::LookUp->new(); 00134 =cut 00135 00136 sub new { 00137 my ($class, @args) = @_; 00138 my $self = bless({}, ref($class) || $class); 00139 ($self->{lookup}, $self->{registry}, 00140 $self->{url}, $self->{file} 00141 ) = rearrange([qw(lookup registry url file)], @args); 00142 if (defined $self->{url} || 00143 defined $self->{registry} || 00144 defined $self->{file}) 00145 { 00146 warning( 00147 q/Direct construction of local or url\/file-based LookUp deprecated. 00148 Use Bio::EnsEMBL::LookUp->new() for new remoting implementation or 00149 use Bio::EnsEMBL::LookUp::LocalLookUp->new() directly for previous implementation/ 00150 ); 00151 $self->{lookup} = 00152 Bio::EnsEMBL::LookUp::LocalLookUp->new(@args); 00153 } 00154 if (!defined $self->{lookup}) { 00155 $self->{lookup} = Bio::EnsEMBL::LookUp::RemoteLookUp->new(@args); 00156 } 00157 return $self; 00158 } 00159 00160 sub register_all_dbs { 00161 my ($class, $host, $port, $user, $pass, $regexp) = @_; 00162 warning( 00163 q/register_all_dbs is now part of Bio::EnsEMBL::LookUp::LocalLookUp 00164 and should be invoked directly/); 00165 Bio::EnsEMBL::LookUp::LocalLookUp->register_all_dbs($host, $port, 00166 $user, $pass, $regexp); 00167 return; 00168 } 00169 00170 use vars '$AUTOLOAD'; 00171 sub AUTOLOAD { 00172 my ( $self, @args ) = @_; 00173 (my $method = $AUTOLOAD) =~ s/^.*::(\w+)$/$1/ ; 00174 return $self->{lookup}->$method(@args); 00175 } 00176 sub DESTROY { } # required due to AUTOLOAD 00177 00178 sub uniq { 00179 my @out = (); 00180 my $keys = {}; 00181 for my $v (@_) { 00182 if(!exists $keys->{$v}) { 00183 push @out,$v; 00184 $keys->{$v} = 1; 00185 } 00186 } 00187 return @out; 00188 } 00189 00190 1; 00191