00001
00002 =head1 LICENSE
00003
00004 Copyright [1999-2014] EMBL-European Bioinformatics Institute
00005
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009
00010 http:
00011
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017
00018 =head1 CONTACT
00019
00020 Please email comments or questions to the public Ensembl
00021 developers list at <dev@ensembl.org>.
00022
00023 Questions may also be sent to the Ensembl help desk at
00024 <helpdesk@ensembl.org>.
00025
00026 =head1 NAME
00027
00028 Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00029
00030 =head1 SYNOPSIS
00031
00032 my $assembly_info =
00033 Bio::EnsEMBL::MetaData::GenomeAssemblyInfo->new(
00034 -ASSEMBLY_NAME => $assembly_name,
00035 -ASSEMBLY_ACCESSION => $assembly_accession,
00036 -ASSEMBLY_LEVEL => $assembly_level,
00037 -ORGANISM => $organism
00038 );
00039
00040 =head1 DESCRIPTION
00041
00042 Object encapsulating information about a particular assembly. Release-independent.
00043
00044 =head1 SEE ALSO
00045
00046 Bio::EnsEMBL::MetaData::BaseInfo
00047 Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00048 Bio::EnsEMBL::MetaData::DBSQL::GenomeAssemblyInfoAdaptor
00049
00050 =head1 Author
00051
00052 Dan Staines
00053
00054 =cut
00055
00056 package Bio::EnsEMBL::MetaData::GenomeAssemblyInfo;
00057 use strict;
00058 use warnings;
00059 use base qw/Bio::EnsEMBL::MetaData::BaseInfo/;
00060 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00061 use Bio::EnsEMBL::MetaData::GenomeOrganismInfo;
00062
00063 =head1 CONSTRUCTOR
00064 =head2 new
00065 Arg [-ASSEMBLY_NAME] :
00066 string - name of the assembly
00067 Arg [-ASSEMBLY_ID] :
00068 string - INSDC assembly accession
00069 Arg [-ASSEMBLY_LEVEL] :
00070 string - highest assembly level (chromosome, supercontig etc.)
00071 Arg [-BASE_COUNT] :
00072 long - total number of bases in toplevel
00073 Arg [-ORGANISM] :
00074 Bio::EnsEMBL::MetaData::GenomeOrganismInfo - organism to which this assembly belongs
00075 Arg [-NAME] :
00076 string - human readable version of the name of the organism
00077 Arg [-SPECIES] :
00078 string - computable version of the name of the organism (lower case, no spaces)
00079 Arg [-TAXONOMY_ID] :
00080 string - NCBI taxonomy identifier
00081 Arg [-SPECIES_TAXONOMY_ID] :
00082 string - NCBI taxonomy identifier of species to which this organism belongs
00083 Arg [-STRAIN]:
00084 string - name of strain to which organism belongs
00085 Arg [-SEROTYPE]:
00086 string - name of serotype to which organism belongs
00087 Arg [-IS_REFERENCE]:
00088 bool - 1 if this organism is the reference for its species
00089 Example : $info = Bio::EnsEMBL::MetaData::GenomeAssemblyInfo->new(...);
00090 Description: Creates a new info object
00091 Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00092 Exceptions : none
00093 Caller : general
00094 Status : Stable
00095
00096 =cut
00097
00098 sub new {
00099 my ( $class, @args ) = @_;
00100 my $self = $class->SUPER::new(@args);
00101
00102 my ( $name, $display_name, $taxonomy_id, $species_taxonomy_id, $strain,
00103 $serotype, $is_reference );
00104
00105 ( $self->{assembly_name}, $self->{assembly_accession},
00106 $self->{assembly_level}, $self->{base_count},
00107 $self->{organism}, $name,
00108 $display_name, $taxonomy_id,
00109 $species_taxonomy_id, $strain,
00110 $serotype, $is_reference,
00111 $self->{organism} )
00112 = rearrange( [ 'ASSEMBLY_NAME', 'ASSEMBLY_ACCESSION',
00113 'ASSEMBLY_LEVEL', 'BASE_COUNT',
00114 'ORGANISM', 'NAME',
00115 'DISPLAY_NAME', 'TAXONOMY_ID',
00116 'SPECIES_TAXONOMY_ID', 'STRAIN',
00117 'SEROTYPE', 'IS_REFERENCE',
00118 'ORGANISM' ],
00119 @args );
00120
00121 if ( !defined $self->{organism} ) {
00122 my $organism =
00123 Bio::EnsEMBL::MetaData::GenomeOrganismInfo->new(
00124 -NAME => $name,
00125 -DISPLAY_NAME => $display_name,
00126 -TAXONOMY_ID => $taxonomy_id,
00127 -SPECIES_TAXONOMY_ID => $species_taxonomy_id,
00128 -STRAIN => $strain,
00129 -SEROTYPE => $serotype,
00130 -IS_REFERENCE => $is_reference );
00131 $organism->adaptor( $self->adaptor() ) if defined $self->adaptor();
00132 $self->organism($organism);
00133 }
00134
00135 return $self;
00136 } ## end sub new
00137
00138 =head1 ATTRIBUTE METHODS
00139 =head2 assembly_name
00140 Arg : (optional) assembly_name to set
00141 Description: Gets/sets name of assembly
00142 Returntype : string
00143 Exceptions : none
00144 Caller : general
00145 Status : Stable
00146 =cut
00147
00148 sub assembly_name {
00149 my ( $self, $assembly_name ) = @_;
00150 $self->{assembly_name} = $assembly_name if ( defined $assembly_name );
00151 return $self->{assembly_name};
00152 }
00153
00154 =head2 assembly_accession
00155 Arg : (optional) assembly_accession to set
00156 Description: Gets/sets INSDC accession for assembly
00157 Returntype : string
00158 Exceptions : none
00159 Caller : general
00160 Status : Stable
00161 =cut
00162
00163 sub assembly_accession {
00164 my ( $self, $assembly_accession ) = @_;
00165 $self->{assembly_accession} = $assembly_accession
00166 if ( defined $assembly_accession );
00167 return $self->{assembly_accession};
00168 }
00169
00170 =head2 assembly_level
00171 Arg : (optional) assembly_level to set
00172 Description: Gets/sets highest level of assembly (chromosome, supercontig etc.)
00173 Returntype : string
00174 Exceptions : none
00175 Caller : general
00176 Status : Stable
00177 =cut
00178
00179 sub assembly_level {
00180 my ( $self, $assembly_level ) = @_;
00181 $self->{assembly_level} = $assembly_level if ( defined $assembly_level );
00182 return $self->{assembly_level};
00183 }
00184
00185 =head2 base_count
00186 Arg : (optional) base_count to set
00187 Description: Gets/sets total number of bases in assembled genome
00188 Returntype : integer
00189 Exceptions : none
00190 Caller : general
00191 Status : Stable
00192 =cut
00193
00194 sub base_count {
00195 my ( $self, $base_count ) = @_;
00196 $self->{base_count} = $base_count if ( defined $base_count );
00197 return $self->{base_count};
00198 }
00199
00200 =head2 sequences
00201 Arg : (optional) arrayref of sequences to set
00202 Description: Gets/sets array of hashrefs describing sequences from the assembly. Elements are hashrefs with name and acc as keys
00203 Returntype : Arrayref
00204 Exceptions : none
00205 Caller : general
00206 Status : Stable
00207 =cut
00208
00209 sub sequences {
00210 my ( $self, $sequences ) = @_;
00211 if ( defined $sequences ) {
00212 $self->{sequences} = $sequences;
00213 }
00214 $self->_load_child( 'sequences', '_fetch_sequences' );
00215 return $self->{sequences};
00216 }
00217
00218 =head2 organism
00219 Arg : (optional) organism object to set
00220 Description: Gets/sets organism to which genome belongs
00221 Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00222 Exceptions : none
00223 Caller : general
00224 Status : Stable
00225 =cut
00226
00227 sub organism {
00228 my ( $self, $organism ) = @_;
00229 if ( defined $organism ) {
00230 $self->{organism} = $organism;
00231 }
00232 $self->_load_child( 'organism', '_fetch_organism' );
00233 return $self->{organism};
00234 }
00235
00236 =head2 display_name
00237 Description: Gets display_name
00238 Returntype : string
00239 Exceptions : none
00240 Caller : general
00241 Status : Stable
00242 =cut
00243
00244 sub display_name {
00245 my ( $self) = @_;
00246 return $self->organism()->display_name();
00247 }
00248
00249 =head2 strain
00250 Description: Gets strain of genome
00251 Returntype : string
00252 Exceptions : none
00253 Caller : general
00254 Status : Stable
00255 =cut
00256
00257 sub strain {
00258 my ( $self ) = @_;
00259 return $self->organism()->strain();
00260 }
00261
00262 =head2 serotype
00263 Description: Gets serotype
00264 Returntype : string
00265 Exceptions : none
00266 Caller : general
00267 Status : Stable
00268 =cut
00269
00270 sub serotype {
00271 my ( $self ) = @_;
00272 return $self->organism()->serotype();
00273 }
00274
00275 =head2 name
00276 Description: Gets name for genome
00277 Returntype : string
00278 Exceptions : none
00279 Caller : general
00280 Status : Stable
00281 =cut
00282
00283 sub name {
00284 my ( $self ) = @_;
00285 return $self->organism()->name();
00286 }
00287
00288 =head2 taxonomy_id
00289 Description: Gets NCBI taxonomy ID
00290 Returntype : string
00291 Exceptions : none
00292 Caller : general
00293 Status : Stable
00294 =cut
00295
00296 sub taxonomy_id {
00297 my ( $self ) = @_;
00298 return $self->organism()->taxonomy_id();
00299 }
00300
00301 =head2 species_taxonomy_id
00302 Description: Gets NCBI taxonomy ID for species to which this organism belongs
00303 Returntype : string
00304 Exceptions : none
00305 Caller : general
00306 Status : Stable
00307 =cut
00308
00309 sub species_taxonomy_id {
00310 my ( $self ) = @_;
00311 return $self->organism()->species_taxonomy_id();
00312 }
00313
00314 =head2 is_reference
00315 Description: Gets whether this is a reference for the species
00316 Returntype : bool
00317 Exceptions : none
00318 Caller : general
00319 Status : Stable
00320 =cut
00321
00322 sub is_reference {
00323 my ( $self ) = @_;
00324 return $self->organism()->is_reference();
00325 }
00326
00327 =head1 UTILITY METHODS
00328 =head2 to_string
00329 Description: Render as plain string
00330 Returntype : string
00331 Exceptions : none
00332 Caller : general
00333 Status : Stable
00334 =cut
00335
00336 sub to_string {
00337 my ($self) = @_;
00338 return
00339 join( '/',
00340 $self->division(), $self->method(), ( $self->set_name() || '-' ) );
00341 }
00342
00343 =head2 _preload
00344 Description: Ensure all children are loaded (used for hash transformation)
00345 Returntype : none
00346 Exceptions : none
00347 Caller : Internal
00348 Status : Stable
00349 =cut
00350
00351 sub _preload {
00352 my ($self) = @_;
00353 $self->sequences();
00354 return;
00355 }
00356
00357 =head2 _preload
00358 Description: Remove all children (used after hash transformation to ensure object is minimised)
00359 Returntype : none
00360 Exceptions : none
00361 Caller : dump_metadata.pl
00362 Status : Stable
00363 =cut
00364
00365 sub _unload {
00366 my ($self) = @_;
00367 $self->{sequences} = undef;
00368 return;
00369 }
00370
00371 1;
00372