00001
00002 =head1 LICENSE
00003
00004 Copyright [1999-2014] EMBL-European Bioinformatics Institute
00005
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009
00010 http:
00011
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017
00018 =cut
00019
00020 =pod
00021
00022 =head1 NAME
00023
00024 Bio::EnsEMBL::MetaData::DBSQL::GenomeAssemblyInfoAdaptor
00025
00026 =head1 SYNOPSIS
00027
00028 # metadata_db is an instance of MetaDataDBAdaptor
00029 my $adaptor = $metadata_db->get_GenomeAssemblyInfoAdaptor();
00030 my $assembly = $adaptor->fetch_by_assembly_accession('GCA_000001405.15');
00031
00032 =head1 DESCRIPTION
00033
00034 Adaptor for storing and retrieving GenomeAssemblyInfo objects from MySQL ensembl_metadata database
00035
00036 =head1 SEE ALSO
00037
00038 Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00039 Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00040
00041 =head1 Author
00042
00043 Dan Staines
00044
00045 =cut
00046
00047 package Bio::EnsEMBL::MetaData::DBSQL::GenomeAssemblyInfoAdaptor;
00048
00049 use strict;
00050 use warnings;
00051
00052 use base qw/Bio::EnsEMBL::MetaData::DBSQL::BaseInfoAdaptor/;
00053
00054 use Carp qw(cluck croak);
00055 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
00056 use Scalar::Util qw(looks_like_number);
00057 use Bio::EnsEMBL::DBSQL::DBConnection;
00058 use Bio::EnsEMBL::MetaData::GenomeAssemblyInfo;
00059 use List::MoreUtils qw(natatime);
00060
00061 =head1 METHODS
00062 =cut
00063
00064 =head2 store
00065 Arg : Bio::EnsEMBL::MetaData::DatabaseInfo
00066 Description: Store the supplied object
00067 Returntype : none
00068 Exceptions : none
00069 Caller : general
00070 Status : Stable
00071 =cut
00072
00073 sub store {
00074 my ( $self, $assembly ) = @_;
00075 if ( !defined $assembly->organism() ) {
00076 throw("Assembly must be associated with an organism");
00077 }
00078 if ( !defined $assembly->organism()->dbID() ) {
00079 $self->db()->get_GenomeOrganismInfoAdaptor()
00080 ->store( $assembly->organism() );
00081 }
00082 if ( !defined $assembly->dbID() ) {
00083 # find out if organism exists first
00084 my ($dbID) =
00085 @{
00086 $self->dbc()->sql_helper()->execute_simple(
00087 -SQL =>
00088 "select assembly_id from assembly where organism_id=? and assembly_name=?",
00089 -PARAMS => [ $assembly->organism()->dbID(), $assembly->assembly_name() ]
00090 ) };
00091 if ( defined $dbID ) {
00092 $assembly->dbID($dbID);
00093 $assembly->adaptor($self);
00094 }
00095 }
00096 if ( defined $assembly->dbID() ) {
00097 $self->update($assembly);
00098 }
00099 else {
00100 $self->dbc()->sql_helper()->execute_update(
00101 -SQL =>
00102 q/insert into assembly(assembly_accession,assembly_name,assembly_level,base_count,organism_id)
00103 values(?,?,?,?,?)/,
00104 -PARAMS => [ $assembly->assembly_accession(),
00105 $assembly->assembly_name(),
00106 $assembly->assembly_level(),
00107 $assembly->base_count(),
00108 $assembly->organism()->dbID() ],
00109 -CALLBACK => sub {
00110 my ( $sth, $dbh, $rv ) = @_;
00111 $assembly->dbID( $dbh->{mysql_insertid} );
00112 } );
00113 $self->_store_sequences($assembly);
00114 $assembly->adaptor($self);
00115 $self->_store_cached_obj($assembly);
00116 }
00117 return;
00118 } ## end sub store
00119
00120 =head2 update
00121 Arg : Bio::EnsEMBL::MetaData::DatabaseInfo
00122 Description: Update the supplied object (must be previously stored)
00123 Returntype : none
00124 Exceptions : none
00125 Caller : general
00126 Status : Stable
00127 =cut
00128
00129 sub update {
00130 my ( $self, $assembly ) = @_;
00131 if ( !defined $assembly->dbID() ) {
00132 croak "Cannot update an object that has not already been stored";
00133 }
00134
00135 $self->dbc()->sql_helper()->execute_update(
00136 -SQL =>
00137 q/update assembly set assembly_accession=?,assembly_name=?,assembly_level=?,base_count=?,organism_id=? where assembly_id=?/,
00138 -PARAMS => [ $assembly->assembly_accession(), $assembly->assembly_name(),
00139 $assembly->assembly_level(), $assembly->base_count(),
00140 $assembly->organism()->dbID(), $assembly->dbID() ] );
00141
00142 return;
00143 }
00144
00145 =head2 fetch_all_by_sequence_accession
00146 Arg : INSDC sequence accession e.g. U00096.1 or U00096
00147 Arg : (optional) if 1, expand children of genome info
00148 Description: Fetch genome info for specified sequence accession
00149 Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00150 Exceptions : none
00151 Caller : general
00152 Status : Stable
00153 =cut
00154
00155 sub fetch_all_by_sequence_accession {
00156 my ( $self, $id, $keen ) = @_;
00157 if ( $id =~ m/\.[0-9]+$/ ) {
00158 return $self->fetch_all_by_sequence_accession_versioned( $id, $keen );
00159 }
00160 else {
00161 return $self->fetch_all_by_sequence_accession_unversioned( $id, $keen );
00162 }
00163 }
00164
00165 =head2 fetch_all_by_sequence_accession_unversioned
00166 Arg : INSDC sequence accession e.g. U00096
00167 Arg : (optional) if 1, expand children of genome info
00168 Description: Fetch genome info for specified sequence accession
00169 Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00170 Exceptions : none
00171 Caller : general
00172 Status : Stable
00173 =cut
00174
00175 sub fetch_all_by_sequence_accession_unversioned {
00176 my ( $self, $id, $keen ) = @_;
00177 return
00178 $self->_fetch_generic(
00179 $self->_get_base_sql() .
00180 ' where assembly_id in (select distinct(assembly_id) from assembly_sequence where acc like ? or name like ?)',
00181 [ $id . '.%', $id . '.%' ],
00182 $keen );
00183 }
00184
00185 =head2 fetch_all_by_sequence_accession_versioned
00186 Arg : INSDC sequence accession e.g. U00096.1
00187 Arg : (optional) if 1, expand children of genome info
00188 Description: Fetch genome info for specified sequence accession
00189 Returntype : Bio::EnsEMBL::MetaData::GenomeInfo
00190 Exceptions : none
00191 Caller : general
00192 Status : Stable
00193 =cut
00194
00195 sub fetch_all_by_sequence_accession_versioned {
00196 my ( $self, $id, $keen ) = @_;
00197 return
00198 $self->_fetch_generic(
00199 $self->_get_base_sql() .
00200 ' where assembly_id in (select distinct(assembly_id) from assembly_sequence where acc=? or name=?)',
00201 [ $id, $id ],
00202 $keen );
00203 }
00204
00205 =head2 fetch_by_assembly_accession
00206 Arg : INSDC assembly accession
00207 Arg : (optional) if 1, expand children of genome info
00208 Description: Fetch genome info for specified assembly ID (versioned or unversioned)
00209 Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00210 Exceptions : none
00211 Caller : general
00212 Status : Stable
00213 =cut
00214
00215 sub fetch_by_assembly_accession {
00216 my ( $self, $id, $keen ) = @_;
00217 return
00218 $self->_first_element(
00219 $self->_fetch_generic(
00220 $self->_get_base_sql . ' where assembly_accession=?',
00221 [$id], $keen ) );
00222
00223 }
00224
00225 =head2 fetch_all_by_assembly_set_chain
00226 Arg : INSDC assembly set chain (unversioned accession)
00227 Arg : (optional) if 1, expand children of genome info
00228 Description: Fetch genome info for specified assembly set chain
00229 Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00230 Exceptions : none
00231 Caller : general
00232 Status : Stable
00233 =cut
00234
00235 sub fetch_all_by_assembly_set_chain {
00236 my ( $self, $id, $keen ) = @_;
00237 return
00238 $self->_fetch_generic(
00239 $self->_get_base_sql . ' where assembly_accession like ?',
00240 [ $id . '.%' ], $keen );
00241 }
00242
00243 =head2 fetch_all_by_organism
00244 Arg : GenomeOrganismInfo object
00245 Arg : (optional) if 1, expand children of genome info
00246 Description: Fetch genome info for specified organism
00247 Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00248 Exceptions : none
00249 Caller : general
00250 Status : Stable
00251 =cut
00252
00253 sub fetch_all_by_organism {
00254 my ( $self, $organism_id, $keen ) = @_;
00255 if ( ref($organism_id) eq 'Bio::EnsEMBL::MetaData::GenomeOrganismInfo' ) {
00256 $organism_id = $organism_id->dbID();
00257 }
00258 return
00259 $self->_fetch_generic( $self->_get_base_sql() . ' where organism_id = ?',
00260 [$organism_id], $keen );
00261 }
00262
00263 =head1 INTERNAL METHODS
00264 =head2 _store_sequences
00265 Arg : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00266 Description: Stores the sequences for the supplied object
00267 Returntype : None
00268 Exceptions : none
00269 Caller : internal
00270 Status : Stable
00271 =cut
00272
00273 sub _store_sequences {
00274 my ( $self, $assembly ) = @_;
00275
00276 $self->{dbc}->sql_helper()->execute_update(
00277 -SQL => q/delete from assembly_sequence where assembly_id=?/,
00278 -PARAMS => [ $assembly->dbID() ] );
00279
00280 return if !defined $assembly->sequences();
00281
00282 my $it = natatime 1000, @{ $assembly->sequences() };
00283 while ( my @vals = $it->() ) {
00284 my $sql =
00285 'insert ignore into assembly_sequence(assembly_id,name,acc) values ' .
00286 join(
00287 ',',
00288 map {
00289 '(' . $assembly->dbID() . ',"' . $_->{name} . '",' .
00290 ( $_->{acc} ? ( '"' . $_->{acc} . '"' ) : ('NULL') ) . ')'
00291 } @vals );
00292 $self->dbc()->sql_helper()->execute_update( -SQL => $sql );
00293 }
00294 return;
00295 }
00296
00297 =head2 _fetch_sequences
00298 Arg : Bio::EnsEMBL::MetaData::GenomeInfo
00299 Description: Add sequences to supplied object
00300 Returntype : none
00301 Exceptions : none
00302 Caller : internal
00303 Status : Stable
00304 =cut
00305
00306 sub _fetch_sequences {
00307 my ( $self, $genome ) = @_;
00308 croak
00309 "Cannot fetch sequences for a GenomeAssemblyInfo object that has not been stored"
00310 if !defined $genome->dbID();
00311 my $sequences =
00312 $self->dbc()->sql_helper()->execute(
00313 -USE_HASHREFS => 1,
00314 -SQL => 'select name,acc from assembly_sequence where assembly_id=?',
00315 -PARAMS => [ $genome->dbID() ] );
00316 $genome->sequences($sequences);
00317 return;
00318 }
00319
00320 sub _fetch_organism {
00321 my ( $self, $md ) = @_;
00322 if ( defined $md->{organism_id} ) {
00323 $md->organism( $self->db()->get_GenomeOrganismInfoAdaptor()
00324 ->fetch_by_dbID( $md->{organism_id} ) );
00325 }
00326 return;
00327 }
00328
00329 =head2 _fetch_children
00330 Arg : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo
00331 Description: Fetch all children of specified genome info object
00332 Returntype : none
00333 Exceptions : none
00334 Caller : internal
00335 Status : Stable
00336 =cut
00337
00338 sub _fetch_children {
00339 my ( $self, $md ) = @_;
00340 $self->_fetch_sequences($md);
00341 $self->_fetch_organism($md);
00342 return;
00343 }
00344
00345 my $base_organism_fetch_sql =
00346 q/select assembly_id as dbID, organism_id, assembly_accession, assembly_name, assembly_level, base_count from assembly/;
00347
00348 sub _get_base_sql {
00349 return $base_organism_fetch_sql;
00350 }
00351
00352 sub _get_id_field {
00353 return 'assembly_id';
00354 }
00355
00356 sub _get_obj_class {
00357 return 'Bio::EnsEMBL::MetaData::GenomeAssemblyInfo';
00358 }
00359
00360 1;