00001
00002 =head1 LICENSE
00003
00004 Copyright [1999-2014] EMBL-European Bioinformatics Institute
00005
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009
00010 http:
00011
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017
00018 =cut
00019
00020 =pod
00021
00022 =head1 NAME
00023
00024 Bio::EnsEMBL::MetaData::DBSQL::GenomeOrganismInfoAdaptor
00025
00026 =head1 SYNOPSIS
00027
00028 # metadata_db is an instance of MetaDataDBAdaptor
00029 my $adaptor = $metadata_db->get_GenomeOrganismInfoAdaptor();
00030 my $assembly = $adaptor->fetch_by_name('homo_sapiens');
00031
00032 =head1 DESCRIPTION
00033
00034 Adaptor for storing and retrieving GenomeOrganismInfo objects from MySQL ensembl_metadata database
00035
00036 =head1 SEE ALSO
00037
00038 Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00039
00040 =head1 Author
00041
00042 Dan Staines
00043
00044 =cut
00045
00046 package Bio::EnsEMBL::MetaData::DBSQL::GenomeOrganismInfoAdaptor;
00047
00048 use strict;
00049 use warnings;
00050
00051 use base qw/Bio::EnsEMBL::MetaData::DBSQL::BaseInfoAdaptor/;
00052
00053 use Carp qw(cluck croak);
00054 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
00055 use Bio::EnsEMBL::MetaData::GenomeOrganismInfo;
00056 use Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyDBAdaptor;
00057 use List::MoreUtils qw(natatime);
00058 use Scalar::Util qw(looks_like_number);
00059
00060 =head1 METHODS
00061 =cut
00062
00063 =head2 store
00064 Arg : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00065 Description: Store the supplied object
00066 Returntype : none
00067 Exceptions : none
00068 Caller : general
00069 Status : Stable
00070 =cut
00071
00072 sub store {
00073 my ( $self, $organism ) = @_;
00074 if ( !defined $organism->dbID() ) {
00075 # find out if organism exists first
00076 my ($dbID) =
00077 @{
00078 $self->dbc()->sql_helper()->execute_simple(
00079 -SQL => "select organism_id from organism where name=?",
00080 -PARAMS => [ $organism->name() ] ) };
00081
00082 if ( defined $dbID ) {
00083 $organism->dbID($dbID);
00084 $organism->adaptor($self);
00085 }
00086 }
00087 if ( defined $organism->dbID() ) {
00088 $self->update($organism);
00089 }
00090 else {
00091 $self->dbc()->sql_helper()->execute_update(
00092 -SQL =>
00093 q/insert into organism(name,display_name,strain,serotype,taxonomy_id,
00094 species_taxonomy_id,is_reference)
00095 values(?,?,?,?,?,?,?)/,
00096 -PARAMS => [ $organism->name(), $organism->display_name(),
00097 $organism->strain(), $organism->serotype(),
00098 $organism->taxonomy_id(), $organism->species_taxonomy_id(),
00099 $organism->is_reference() ],
00100 -CALLBACK => sub {
00101 my ( $sth, $dbh, $rv ) = @_;
00102 $organism->dbID( $dbh->{mysql_insertid} );
00103 } );
00104 $self->_store_aliases($organism);
00105 $self->_store_publications($organism);
00106 $organism->adaptor($self);
00107 $self->_store_cached_obj($organism);
00108 }
00109 return;
00110 } ## end sub store
00111
00112 =head2 update
00113 Arg : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00114 Description: Update the supplied object (must be previously stored)
00115 Returntype : none
00116 Exceptions : none
00117 Caller : general
00118 Status : Stable
00119 =cut
00120
00121 sub update {
00122 my ( $self, $organism ) = @_;
00123 if ( !defined $organism->dbID() ) {
00124 croak "Cannot update an object that has not already been stored";
00125 }
00126
00127 $self->dbc()->sql_helper()->execute_update(
00128 -SQL =>
00129 q/update organism set name=?,display_name=?,strain=?,serotype=?,taxonomy_id=?,species_taxonomy_id=?,
00130 is_reference=? where organism_id=?/,
00131 -PARAMS => [ $organism->name(), $organism->display_name(),
00132 $organism->strain(), $organism->serotype(),
00133 $organism->taxonomy_id(), $organism->species_taxonomy_id(),
00134 $organism->is_reference(), $organism->dbID() ] );
00135
00136 $self->_store_aliases($organism);
00137 $self->_store_publications($organism);
00138 return;
00139 }
00140
00141 =head2 fetch_by_taxonomy_id
00142 Arg : Taxonomy ID
00143 Arg : (optional) if 1, expand children of genome info
00144 Description: Fetch genome info for specified taxonomy node
00145 Returntype : arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00146 Exceptions : none
00147 Caller : general
00148 Status : Stable
00149 =cut
00150
00151 sub fetch_all_by_taxonomy_id {
00152 my ( $self, $id, $keen ) = @_;
00153 return $self->_fetch_generic_with_args( { 'taxonomy_id', $id }, $keen );
00154 }
00155
00156 =head2 fetch_by_taxonomy_ids
00157 Arg : Arrayref of Taxonomy ID
00158 Description: Fetch genome info for specified taxonomy nodes (batch)
00159 Returntype : arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00160 Exceptions : none
00161 Caller : general
00162 Status : Stable
00163 =cut
00164
00165 sub fetch_all_by_taxonomy_ids {
00166 my ( $self, $ids ) = @_;
00167
00168 # filter list down
00169 my %ids = map { $_ => 1 } @$ids;
00170 my @tids =
00171 grep { defined $ids{$_} }
00172 @{ $self->dbc()->sql_helper()
00173 ->execute_simple( -SQL => q/select distinct taxonomy_id from organism/ )
00174 };
00175 my @genomes = ();
00176 my $it = natatime 1000, @tids;
00177 while ( my @vals = $it->() ) {
00178 my $sql =
00179 _get_base_sql() . ' where taxonomy_id in (' . join( ',', @vals ) . ')';
00180 @genomes = ( @genomes, @{ $self->_fetch_generic( $sql, [] ) } );
00181 }
00182 return \@genomes;
00183 }
00184
00185 =head2 fetch_all_by_taxonomy_branch
00186 Arg : Bio::EnsEMBL::TaxonomyNode
00187 Description: Fetch organism info for specified taxonomy node and its children
00188 Returntype : arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00189 Exceptions : none
00190 Caller : general
00191 Status : Stable
00192 =cut
00193
00194 sub fetch_all_by_taxonomy_branch {
00195 my ( $self, $root, $keen ) = @_;
00196 if ( ref($root) ne 'Bio::EnsEMBL::Taxonomy::TaxonomyNode' ) {
00197 if ( looks_like_number($root) ) {
00198 $root = $self->taxonomy_adaptor()->fetch_by_taxon_id($root);
00199 }
00200 else {
00201 ($root) = @{ $self->taxonomy_adaptor()->fetch_all_by_name($root) };
00202 }
00203 }
00204 my @taxids =
00205 ( $root->taxon_id(), @{ $root->adaptor()->fetch_descendant_ids($root) } );
00206 return $self->fetch_all_by_taxonomy_ids( \@taxids );
00207 }
00208
00209 =head2 fetch_by_display_name
00210 Arg : Name of organism
00211 Description: Fetch info for specified organism
00212 Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00213 Exceptions : none
00214 Caller : general
00215 Status : Stable
00216 =cut
00217
00218 sub fetch_by_display_name {
00219 my ( $self, $display_name, $keen ) = @_;
00220 return $self->_first_element(
00221 $self->_fetch_generic_with_args( { 'display_name', $display_name }, $keen )
00222 );
00223 }
00224
00225 =head2 fetch_by_name
00226 Arg : Display name of organism
00227 Description: Fetch info for specified organism
00228 Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00229 Exceptions : none
00230 Caller : general
00231 Status : Stable
00232 =cut
00233
00234 sub fetch_by_name {
00235 my ( $self, $name, $keen ) = @_;
00236 return $self->_first_element(
00237 $self->_fetch_generic_with_args( { 'name', $name }, $keen ) );
00238 }
00239
00240 =head2 fetch_any_by_name
00241 Arg : Name of organism (display, species, alias etc)
00242 Description: Fetch info for specified organism
00243 Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00244 Exceptions : none
00245 Caller : general
00246 Status : Stable
00247 =cut
00248
00249 sub fetch_by_any_name {
00250 my ( $self, $name, $keen ) = @_;
00251 my $dba = $self->fetch_by_name( $name, $keen );
00252 if ( !defined $dba ) {
00253 $dba = $self->fetch_by_display_name( $name, $keen );
00254 }
00255 if ( !defined $dba ) {
00256 $dba = $self->fetch_by_alias( $name, $keen );
00257 }
00258 return $dba;
00259 }
00260
00261 =head2 fetch_all_by_name_pattern
00262 Arg : Regular expression matching of organism
00263 Description: Fetch info for specified organism
00264 Returntype : Arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00265 Exceptions : none
00266 Caller : general
00267 Status : Stable
00268 =cut
00269
00270 sub fetch_all_by_name_pattern {
00271 my ( $self, $name, $keen ) = @_;
00272 return
00273 $self->_fetch_generic(
00274 _get_base_sql() . q/ where display_name REGEXP ? or name REGEXP ? /,
00275 [ $name, $name ], $keen );
00276 }
00277
00278 =head2 fetch_by_alias
00279 Arg : Alias of organism
00280 Description: Fetch info for specified organism
00281 Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00282 Exceptions : none
00283 Caller : general
00284 Status : Stable
00285 =cut
00286
00287 sub fetch_by_alias {
00288 my ( $self, $name, $keen ) = @_;
00289 return
00290 $self->_first_element(
00291 $self->_fetch_generic(
00292 _get_base_sql() .
00293 q/ join organism_alias using (organism_id) where alias=?/,
00294 [$name],
00295 $keen ) );
00296 }
00297
00298 =head1 INTERNAL METHODS
00299 =head2 _fetch_publications
00300 Arg : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00301 Description: Add publications to supplied object
00302 Returntype : none
00303 Exceptions : none
00304 Caller : internal
00305 Status : Stable
00306 =cut
00307
00308 sub _fetch_publications {
00309 my ( $self, $org ) = @_;
00310 croak "Cannot fetch publications for an object that has not been stored"
00311 if !defined $org->dbID();
00312 my $pubs =
00313 $self->dbc()->sql_helper()->execute_simple(
00314 -SQL => 'select publication from organism_publication where organism_id=?',
00315 -PARAMS => [ $org->dbID() ] );
00316 $org->publications($pubs);
00317 return;
00318 }
00319
00320 =head2 _fetch_aliases
00321 Arg : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00322 Description: Add aliases to supplied object
00323 Returntype : none
00324 Exceptions : none
00325 Caller : internal
00326 Status : Stable
00327 =cut
00328
00329 sub _fetch_aliases {
00330 my ( $self, $org ) = @_;
00331 croak "Cannot fetch aliases for a GenomeInfo object that has not been stored"
00332 if !defined $org->dbID();
00333 my $aliases =
00334 $self->dbc()->sql_helper()->execute_simple(
00335 -SQL => 'select alias from organism_alias where organism_id=?',
00336 -PARAMS => [ $org->dbID() ] );
00337 $org->aliases($aliases);
00338 return;
00339 }
00340
00341 =head2 _fetch_children
00342 Arg : Arrayref of Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00343 Description: Fetch all children of specified info object
00344 Returntype : none
00345 Exceptions : none
00346 Caller : internal
00347 Status : Stable
00348 =cut
00349
00350 sub _fetch_children {
00351 my ( $self, $md ) = @_;
00352 $self->_fetch_aliases($md);
00353 $self->_fetch_publications($md);
00354 return;
00355 }
00356
00357 =head2 _store_aliases
00358 Arg : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00359 Description: Stores the aliases for the supplied object
00360 Returntype : None
00361 Exceptions : none
00362 Caller : internal
00363 Status : Stable
00364 =cut
00365
00366 sub _store_aliases {
00367 my ( $self, $organism ) = @_;
00368
00369 $self->dbc()->sql_helper()->execute_update(
00370 -SQL => q/delete from organism_alias where organism_id=?/,
00371 -PARAMS => [ $organism->dbID() ] );
00372 if ( defined $organism->aliases() ) {
00373 for my $alias ( @{ $organism->aliases() } ) {
00374
00375 $self->dbc()->sql_helper()->execute_update(
00376 -SQL => q/insert into organism_alias(organism_id,alias)
00377 values(?,?)/,
00378 -PARAMS => [ $organism->dbID(), $alias ] );
00379 }
00380 }
00381 return;
00382 }
00383
00384 =head2 _store_publications
00385 Arg : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00386 Description: Stores the publications for the supplied object
00387 Returntype : None
00388 Exceptions : none
00389 Caller : internal
00390 Status : Stable
00391 =cut
00392
00393 sub _store_publications {
00394 my ( $self, $organism ) = @_;
00395
00396 $self->dbc()->sql_helper()->execute_update(
00397 -SQL => q/delete from organism_publication where organism_id=?/,
00398 -PARAMS => [ $organism->dbID() ] );
00399
00400 if ( defined $organism->publications() ) {
00401 for my $pub ( @{ $organism->publications() } ) {
00402 $self->dbc()->sql_helper()->execute_update(
00403 -SQL => q/insert into organism_publication(organism_id,publication)
00404 values(?,?)/,
00405 -PARAMS => [ $organism->dbID(), $pub ] );
00406 }
00407 }
00408 return;
00409 }
00410
00411 =head2 taxonomy_adaptor
00412 Arg : Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyNodeAdaptor
00413 Description: Get/set the taxonomy adaptor
00414 Returntype : Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyNodeAdaptor
00415 Exceptions : none
00416 Caller : internal
00417 Status : Stable
00418 =cut
00419
00420 sub taxonomy_adaptor {
00421 my ( $self, $adaptor ) = @_;
00422 if ( defined $adaptor ) {
00423 $self->{taxonomy_adaptor} = $adaptor;
00424 }
00425 else {
00426 if ( !defined $self->{taxonomy_adaptor} ) {
00427 my $tax_dba;
00428 eval {
00429 # try and get from the registry
00430 $tax_dba = Bio::EnsEMBL::Registry->get_DBAdaptor( "multi", "taxonomy" );
00431 };
00432 if ( !defined $tax_dba ) {
00433 # can't find, so try and create alongside metadata
00434 my $args = { -USER => $self->db()->dbc()->user(),
00435 -PORT => $self->db()->dbc()->port(),
00436 -PASS => $self->db()->dbc()->pass(),
00437 -HOST => $self->db()->dbc()->host(),
00438 -DBNAME => 'ncbi_taxonomy' };
00439 $tax_dba =
00440 Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyDBAdaptor->new(%$args);
00441 }
00442 if ( defined $tax_dba ) {
00443 $self->{taxonomy_adaptor} = $tax_dba->get_TaxonomyNodeAdaptor();
00444 }
00445 }
00446 }
00447 return $self->{taxonomy_adaptor};
00448 } ## end sub taxonomy_adaptor
00449
00450 # internal implementation
00451
00452 my $base_organism_fetch_sql =
00453 q/select organism_id as dbID, name, display_name, taxonomy_id, species_taxonomy_id, strain, serotype, is_reference from organism/;
00454
00455 sub _get_base_sql {
00456 return $base_organism_fetch_sql;
00457 }
00458
00459 sub _get_id_field {
00460 return 'organism_id';
00461 }
00462
00463 sub _get_obj_class {
00464 return 'Bio::EnsEMBL::MetaData::GenomeOrganismInfo';
00465 }
00466
00467 1;