GenomeOrganismInfoAdaptor.pm
Go to the documentation of this file.
00001 
00002 =head1 LICENSE
00003 
00004 Copyright [1999-2014] EMBL-European Bioinformatics Institute
00005 
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009 
00010      http://www.apache.org/licenses/LICENSE-2.0
00011 
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017 
00018 =cut
00019 
00020 =pod
00021 
00022 =head1 NAME
00023 
00024 Bio::EnsEMBL::MetaData::DBSQL::GenomeOrganismInfoAdaptor
00025 
00026 =head1 SYNOPSIS
00027 
00028 # metadata_db is an instance of MetaDataDBAdaptor
00029 my $adaptor = $metadata_db->get_GenomeOrganismInfoAdaptor();
00030 my $assembly = $adaptor->fetch_by_name('homo_sapiens');
00031 
00032 =head1 DESCRIPTION
00033 
00034 Adaptor for storing and retrieving GenomeOrganismInfo objects from MySQL ensembl_metadata database
00035 
00036 =head1 SEE ALSO
00037 
00038 Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00039 
00040 =head1 Author
00041 
00042 Dan Staines
00043 
00044 =cut
00045 
00046 package Bio::EnsEMBL::MetaData::DBSQL::GenomeOrganismInfoAdaptor;
00047 
00048 use strict;
00049 use warnings;
00050 
00051 use base qw/Bio::EnsEMBL::MetaData::DBSQL::BaseInfoAdaptor/;
00052 
00053 use Carp qw(cluck croak);
00054 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
00055 use Bio::EnsEMBL::MetaData::GenomeOrganismInfo;
00056 use Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyDBAdaptor;
00057 use List::MoreUtils qw(natatime);
00058 use Scalar::Util qw(looks_like_number);
00059 
00060 =head1 METHODS
00061 =cut
00062 
00063 =head2 store
00064   Arg        : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00065   Description: Store the supplied object
00066   Returntype : none
00067   Exceptions : none
00068   Caller     : general
00069   Status     : Stable
00070 =cut
00071 
00072 sub store {
00073   my ( $self, $organism ) = @_;
00074   if ( !defined $organism->dbID() ) {
00075     # find out if organism exists first
00076     my ($dbID) =
00077       @{
00078       $self->dbc()->sql_helper()->execute_simple(
00079                         -SQL => "select organism_id from organism where name=?",
00080                         -PARAMS => [ $organism->name() ] ) };
00081 
00082     if ( defined $dbID ) {
00083       $organism->dbID($dbID);
00084       $organism->adaptor($self);
00085     }
00086   }
00087   if ( defined $organism->dbID() ) {
00088     $self->update($organism);
00089   }
00090   else {
00091     $self->dbc()->sql_helper()->execute_update(
00092       -SQL =>
00093         q/insert into organism(name,display_name,strain,serotype,taxonomy_id,
00094 species_taxonomy_id,is_reference)
00095         values(?,?,?,?,?,?,?)/,
00096       -PARAMS => [ $organism->name(),        $organism->display_name(),
00097                    $organism->strain(),      $organism->serotype(),
00098                    $organism->taxonomy_id(), $organism->species_taxonomy_id(),
00099                    $organism->is_reference() ],
00100       -CALLBACK => sub {
00101         my ( $sth, $dbh, $rv ) = @_;
00102         $organism->dbID( $dbh->{mysql_insertid} );
00103       } );
00104     $self->_store_aliases($organism);
00105     $self->_store_publications($organism);
00106     $organism->adaptor($self);
00107     $self->_store_cached_obj($organism);
00108   }
00109   return;
00110 } ## end sub store
00111 
00112 =head2 update
00113   Arg        : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00114   Description: Update the supplied object (must be previously stored)
00115   Returntype : none
00116   Exceptions : none
00117   Caller     : general
00118   Status     : Stable
00119 =cut
00120 
00121 sub update {
00122   my ( $self, $organism ) = @_;
00123   if ( !defined $organism->dbID() ) {
00124     croak "Cannot update an object that has not already been stored";
00125   }
00126 
00127   $self->dbc()->sql_helper()->execute_update(
00128     -SQL =>
00129 q/update organism set name=?,display_name=?,strain=?,serotype=?,taxonomy_id=?,species_taxonomy_id=?,
00130 is_reference=? where organism_id=?/,
00131     -PARAMS => [ $organism->name(),         $organism->display_name(),
00132                  $organism->strain(),       $organism->serotype(),
00133                  $organism->taxonomy_id(),  $organism->species_taxonomy_id(),
00134                  $organism->is_reference(), $organism->dbID() ] );
00135 
00136   $self->_store_aliases($organism);
00137   $self->_store_publications($organism);
00138   return;
00139 }
00140 
00141 =head2 fetch_by_taxonomy_id
00142   Arg        : Taxonomy ID
00143   Arg        : (optional) if 1, expand children of genome info
00144   Description: Fetch genome info for specified taxonomy node
00145   Returntype : arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00146   Exceptions : none
00147   Caller     : general
00148   Status     : Stable
00149 =cut
00150 
00151 sub fetch_all_by_taxonomy_id {
00152   my ( $self, $id, $keen ) = @_;
00153   return $self->_fetch_generic_with_args( { 'taxonomy_id', $id }, $keen );
00154 }
00155 
00156 =head2 fetch_by_taxonomy_ids
00157   Arg        : Arrayref of Taxonomy ID
00158   Description: Fetch genome info for specified taxonomy nodes (batch)
00159   Returntype : arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00160   Exceptions : none
00161   Caller     : general
00162   Status     : Stable
00163 =cut
00164 
00165 sub fetch_all_by_taxonomy_ids {
00166   my ( $self, $ids ) = @_;
00167 
00168   # filter list down
00169   my %ids = map { $_ => 1 } @$ids;
00170   my @tids =
00171     grep { defined $ids{$_} }
00172     @{ $self->dbc()->sql_helper()
00173       ->execute_simple( -SQL => q/select distinct taxonomy_id from organism/ )
00174     };
00175   my @genomes = ();
00176   my $it = natatime 1000, @tids;
00177   while ( my @vals = $it->() ) {
00178     my $sql =
00179       _get_base_sql() . ' where taxonomy_id in (' . join( ',', @vals ) . ')';
00180     @genomes = ( @genomes, @{ $self->_fetch_generic( $sql, [] ) } );
00181   }
00182   return \@genomes;
00183 }
00184 
00185 =head2 fetch_all_by_taxonomy_branch
00186   Arg        : Bio::EnsEMBL::TaxonomyNode
00187   Description: Fetch organism info for specified taxonomy node and its children
00188   Returntype : arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00189   Exceptions : none
00190   Caller     : general
00191   Status     : Stable
00192 =cut
00193 
00194 sub fetch_all_by_taxonomy_branch {
00195   my ( $self, $root, $keen ) = @_;
00196   if ( ref($root) ne 'Bio::EnsEMBL::Taxonomy::TaxonomyNode' ) {
00197     if ( looks_like_number($root) ) {
00198       $root = $self->taxonomy_adaptor()->fetch_by_taxon_id($root);
00199     }
00200     else {
00201       ($root) = @{ $self->taxonomy_adaptor()->fetch_all_by_name($root) };
00202     }
00203   }
00204   my @taxids =
00205     ( $root->taxon_id(), @{ $root->adaptor()->fetch_descendant_ids($root) } );
00206   return $self->fetch_all_by_taxonomy_ids( \@taxids );
00207 }
00208 
00209 =head2 fetch_by_display_name
00210   Arg        : Name of organism
00211   Description: Fetch info for specified organism
00212   Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00213   Exceptions : none
00214   Caller     : general
00215   Status     : Stable
00216 =cut
00217 
00218 sub fetch_by_display_name {
00219   my ( $self, $display_name, $keen ) = @_;
00220   return $self->_first_element(
00221      $self->_fetch_generic_with_args( { 'display_name', $display_name }, $keen )
00222   );
00223 }
00224 
00225 =head2 fetch_by_name
00226   Arg        : Display name of organism 
00227   Description: Fetch info for specified organism
00228   Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00229   Exceptions : none
00230   Caller     : general
00231   Status     : Stable
00232 =cut
00233 
00234 sub fetch_by_name {
00235   my ( $self, $name, $keen ) = @_;
00236   return $self->_first_element(
00237                   $self->_fetch_generic_with_args( { 'name', $name }, $keen ) );
00238 }
00239 
00240 =head2 fetch_any_by_name
00241   Arg        : Name of organism (display, species, alias etc)
00242   Description: Fetch info for specified organism
00243   Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00244   Exceptions : none
00245   Caller     : general
00246   Status     : Stable
00247 =cut
00248 
00249 sub fetch_by_any_name {
00250   my ( $self, $name, $keen ) = @_;
00251   my $dba = $self->fetch_by_name( $name, $keen );
00252   if ( !defined $dba ) {
00253     $dba = $self->fetch_by_display_name( $name, $keen );
00254   }
00255   if ( !defined $dba ) {
00256     $dba = $self->fetch_by_alias( $name, $keen );
00257   }
00258   return $dba;
00259 }
00260 
00261 =head2 fetch_all_by_name_pattern
00262   Arg        : Regular expression matching of organism
00263   Description: Fetch info for specified organism
00264   Returntype : Arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00265   Exceptions : none
00266   Caller     : general
00267   Status     : Stable
00268 =cut
00269 
00270 sub fetch_all_by_name_pattern {
00271   my ( $self, $name, $keen ) = @_;
00272   return
00273     $self->_fetch_generic(
00274             _get_base_sql() . q/ where display_name REGEXP ? or name REGEXP ? /,
00275             [ $name, $name ], $keen );
00276 }
00277 
00278 =head2 fetch_by_alias
00279   Arg        : Alias of organism
00280   Description: Fetch info for specified organism
00281   Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00282   Exceptions : none
00283   Caller     : general
00284   Status     : Stable
00285 =cut
00286 
00287 sub fetch_by_alias {
00288   my ( $self, $name, $keen ) = @_;
00289   return
00290     $self->_first_element(
00291                   $self->_fetch_generic(
00292                     _get_base_sql() .
00293                       q/ join organism_alias using (organism_id) where alias=?/,
00294                     [$name],
00295                     $keen ) );
00296 }
00297 
00298 =head1 INTERNAL METHODS
00299 =head2 _fetch_publications
00300   Arg        : Bio::EnsEMBL::MetaData::GenomeOrganismInfo 
00301   Description: Add publications to supplied object
00302   Returntype : none
00303   Exceptions : none
00304   Caller     : internal
00305   Status     : Stable
00306 =cut
00307 
00308 sub _fetch_publications {
00309   my ( $self, $org ) = @_;
00310   croak "Cannot fetch publications for an object that has not been stored"
00311     if !defined $org->dbID();
00312   my $pubs =
00313     $self->dbc()->sql_helper()->execute_simple(
00314      -SQL => 'select publication from organism_publication where organism_id=?',
00315      -PARAMS => [ $org->dbID() ] );
00316   $org->publications($pubs);
00317   return;
00318 }
00319 
00320 =head2 _fetch_aliases
00321   Arg        : Bio::EnsEMBL::MetaData::GenomeOrganismInfo 
00322   Description: Add aliases to supplied object
00323   Returntype : none
00324   Exceptions : none
00325   Caller     : internal
00326   Status     : Stable
00327 =cut
00328 
00329 sub _fetch_aliases {
00330   my ( $self, $org ) = @_;
00331   croak "Cannot fetch aliases for a GenomeInfo object that has not been stored"
00332     if !defined $org->dbID();
00333   my $aliases =
00334     $self->dbc()->sql_helper()->execute_simple(
00335                  -SQL => 'select alias from organism_alias where organism_id=?',
00336                  -PARAMS => [ $org->dbID() ] );
00337   $org->aliases($aliases);
00338   return;
00339 }
00340 
00341 =head2 _fetch_children
00342   Arg        : Arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00343   Description: Fetch all children of specified info object
00344   Returntype : none
00345   Exceptions : none
00346   Caller     : internal
00347   Status     : Stable
00348 =cut
00349 
00350 sub _fetch_children {
00351   my ( $self, $md ) = @_;
00352   $self->_fetch_aliases($md);
00353   $self->_fetch_publications($md);
00354   return;
00355 }
00356 
00357 =head2 _store_aliases
00358   Arg        : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00359   Description: Stores the aliases for the supplied object
00360   Returntype : None
00361   Exceptions : none
00362   Caller     : internal
00363   Status     : Stable
00364 =cut
00365 
00366 sub _store_aliases {
00367   my ( $self, $organism ) = @_;
00368 
00369   $self->dbc()->sql_helper()->execute_update(
00370                       -SQL => q/delete from organism_alias where organism_id=?/,
00371                       -PARAMS => [ $organism->dbID() ] );
00372   if ( defined $organism->aliases() ) {
00373     for my $alias ( @{ $organism->aliases() } ) {
00374 
00375       $self->dbc()->sql_helper()->execute_update(
00376         -SQL => q/insert into organism_alias(organism_id,alias)
00377         values(?,?)/,
00378         -PARAMS => [ $organism->dbID(), $alias ] );
00379     }
00380   }
00381   return;
00382 }
00383 
00384 =head2 _store_publications
00385   Arg        : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00386   Description: Stores the publications for the supplied object
00387   Returntype : None
00388   Exceptions : none
00389   Caller     : internal
00390   Status     : Stable
00391 =cut
00392 
00393 sub _store_publications {
00394   my ( $self, $organism ) = @_;
00395 
00396   $self->dbc()->sql_helper()->execute_update(
00397                 -SQL => q/delete from organism_publication where organism_id=?/,
00398                 -PARAMS => [ $organism->dbID() ] );
00399 
00400   if ( defined $organism->publications() ) {
00401     for my $pub ( @{ $organism->publications() } ) {
00402       $self->dbc()->sql_helper()->execute_update(
00403         -SQL => q/insert into organism_publication(organism_id,publication)
00404         values(?,?)/,
00405         -PARAMS => [ $organism->dbID(), $pub ] );
00406     }
00407   }
00408   return;
00409 }
00410 
00411 =head2 taxonomy_adaptor
00412   Arg        : Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyNodeAdaptor
00413   Description: Get/set the taxonomy adaptor
00414   Returntype : Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyNodeAdaptor
00415   Exceptions : none
00416   Caller     : internal
00417   Status     : Stable
00418 =cut
00419 
00420 sub taxonomy_adaptor {
00421   my ( $self, $adaptor ) = @_;
00422   if ( defined $adaptor ) {
00423     $self->{taxonomy_adaptor} = $adaptor;
00424   }
00425   else {
00426     if ( !defined $self->{taxonomy_adaptor} ) {
00427       my $tax_dba;
00428       eval {
00429         # try and get from the registry
00430         $tax_dba = Bio::EnsEMBL::Registry->get_DBAdaptor( "multi", "taxonomy" );
00431       };
00432       if ( !defined $tax_dba ) {
00433         # can't find, so try and create alongside metadata
00434         my $args = { -USER   => $self->db()->dbc()->user(),
00435                      -PORT   => $self->db()->dbc()->port(),
00436                      -PASS   => $self->db()->dbc()->pass(),
00437                      -HOST   => $self->db()->dbc()->host(),
00438                      -DBNAME => 'ncbi_taxonomy' };
00439         $tax_dba =
00440           Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyDBAdaptor->new(%$args);
00441       }
00442       if ( defined $tax_dba ) {
00443         $self->{taxonomy_adaptor} = $tax_dba->get_TaxonomyNodeAdaptor();
00444       }
00445     }
00446   }
00447   return $self->{taxonomy_adaptor};
00448 } ## end sub taxonomy_adaptor
00449 
00450 # internal implementation
00451 
00452 my $base_organism_fetch_sql =
00453 q/select organism_id as dbID, name, display_name, taxonomy_id, species_taxonomy_id, strain, serotype, is_reference from organism/;
00454 
00455 sub _get_base_sql {
00456   return $base_organism_fetch_sql;
00457 }
00458 
00459 sub _get_id_field {
00460   return 'organism_id';
00461 }
00462 
00463 sub _get_obj_class {
00464   return 'Bio::EnsEMBL::MetaData::GenomeOrganismInfo';
00465 }
00466 
00467 1;