GenomeAssemblyInfo.pm
Go to the documentation of this file.
00001 
00002 =head1 LICENSE
00003 
00004 Copyright [1999-2014] EMBL-European Bioinformatics Institute
00005 
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009 
00010      http://www.apache.org/licenses/LICENSE-2.0
00011 
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017 
00018 =head1 CONTACT
00019 
00020   Please email comments or questions to the public Ensembl
00021   developers list at <dev@ensembl.org>.
00022 
00023   Questions may also be sent to the Ensembl help desk at
00024   <helpdesk@ensembl.org>.
00025   
00026 =head1 NAME
00027 
00028 Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00029 
00030 =head1 SYNOPSIS
00031 
00032       my $assembly_info =
00033         Bio::EnsEMBL::MetaData::GenomeAssemblyInfo->new(
00034                                               -ASSEMBLY_NAME      => $assembly_name,
00035                                               -ASSEMBLY_ACCESSION => $assembly_accession,
00036                                               -ASSEMBLY_LEVEL     => $assembly_level,
00037                                               -ORGANISM           => $organism
00038         );
00039 
00040 =head1 DESCRIPTION
00041 
00042 Object encapsulating information about a particular assembly. Release-independent.
00043 
00044 =head1 SEE ALSO
00045 
00046 Bio::EnsEMBL::MetaData::BaseInfo
00047 Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00048 Bio::EnsEMBL::MetaData::DBSQL::GenomeAssemblyInfoAdaptor
00049 
00050 =head1 Author
00051 
00052 Dan Staines
00053 
00054 =cut
00055 
00056 package Bio::EnsEMBL::MetaData::GenomeAssemblyInfo;
00057 use strict;
00058 use warnings;
00059 use base qw/Bio::EnsEMBL::MetaData::BaseInfo/;
00060 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00061 use Bio::EnsEMBL::MetaData::GenomeOrganismInfo;
00062 
00063 =head1 CONSTRUCTOR
00064 =head2 new
00065   Arg [-ASSEMBLY_NAME] :
00066         string - name of the assembly
00067   Arg [-ASSEMBLY_ID] :
00068         string - INSDC assembly accession
00069   Arg [-ASSEMBLY_LEVEL] :
00070         string - highest assembly level (chromosome, supercontig etc.)
00071   Arg [-BASE_COUNT] :
00072         long - total number of bases in toplevel
00073   Arg [-ORGANISM] :
00074         Bio::EnsEMBL::MetaData::GenomeOrganismInfo - organism to which this assembly belongs
00075   Arg [-NAME]  : 
00076        string - human readable version of the name of the organism
00077   Arg [-SPECIES]    : 
00078        string - computable version of the name of the organism (lower case, no spaces)
00079    Arg [-TAXONOMY_ID] :
00080         string - NCBI taxonomy identifier
00081   Arg [-SPECIES_TAXONOMY_ID] :
00082         string - NCBI taxonomy identifier of species to which this organism belongs
00083   Arg [-STRAIN]:
00084         string - name of strain to which organism belongs
00085   Arg [-SEROTYPE]:
00086         string - name of serotype to which organism belongs
00087   Arg [-IS_REFERENCE]:
00088         bool - 1 if this organism is the reference for its species
00089   Example    : $info = Bio::EnsEMBL::MetaData::GenomeAssemblyInfo->new(...);
00090   Description: Creates a new info object
00091   Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00092   Exceptions : none
00093   Caller     : general
00094   Status     : Stable
00095 
00096 =cut
00097 
00098 sub new {
00099   my ( $class, @args ) = @_;
00100   my $self = $class->SUPER::new(@args);
00101 
00102   my ( $name, $display_name, $taxonomy_id, $species_taxonomy_id, $strain,
00103        $serotype, $is_reference );
00104 
00105   ( $self->{assembly_name},  $self->{assembly_accession},
00106     $self->{assembly_level}, $self->{base_count},
00107     $self->{organism},       $name,
00108     $display_name,           $taxonomy_id,
00109     $species_taxonomy_id,    $strain,
00110     $serotype,               $is_reference,
00111     $self->{organism} )
00112     = rearrange( [ 'ASSEMBLY_NAME',       'ASSEMBLY_ACCESSION',
00113                    'ASSEMBLY_LEVEL',      'BASE_COUNT',
00114                    'ORGANISM',            'NAME',
00115                    'DISPLAY_NAME',        'TAXONOMY_ID',
00116                    'SPECIES_TAXONOMY_ID', 'STRAIN',
00117                    'SEROTYPE',            'IS_REFERENCE',
00118                    'ORGANISM' ],
00119                  @args );
00120 
00121   if ( !defined $self->{organism} ) {
00122     my $organism =
00123       Bio::EnsEMBL::MetaData::GenomeOrganismInfo->new(
00124                                    -NAME                => $name,
00125                                    -DISPLAY_NAME        => $display_name,
00126                                    -TAXONOMY_ID         => $taxonomy_id,
00127                                    -SPECIES_TAXONOMY_ID => $species_taxonomy_id,
00128                                    -STRAIN              => $strain,
00129                                    -SEROTYPE            => $serotype,
00130                                    -IS_REFERENCE        => $is_reference );
00131     $organism->adaptor( $self->adaptor() ) if defined $self->adaptor();
00132     $self->organism($organism);
00133   }
00134 
00135   return $self;
00136 } ## end sub new
00137 
00138 =head1 ATTRIBUTE METHODS
00139 =head2 assembly_name
00140   Arg        : (optional) assembly_name to set
00141   Description: Gets/sets name of assembly
00142   Returntype : string
00143   Exceptions : none
00144   Caller     : general
00145   Status     : Stable
00146 =cut
00147 
00148 sub assembly_name {
00149   my ( $self, $assembly_name ) = @_;
00150   $self->{assembly_name} = $assembly_name if ( defined $assembly_name );
00151   return $self->{assembly_name};
00152 }
00153 
00154 =head2 assembly_accession
00155   Arg        : (optional) assembly_accession to set
00156   Description: Gets/sets INSDC accession for assembly
00157   Returntype : string
00158   Exceptions : none
00159   Caller     : general
00160   Status     : Stable
00161 =cut
00162 
00163 sub assembly_accession {
00164   my ( $self, $assembly_accession ) = @_;
00165   $self->{assembly_accession} = $assembly_accession
00166     if ( defined $assembly_accession );
00167   return $self->{assembly_accession};
00168 }
00169 
00170 =head2 assembly_level
00171   Arg        : (optional) assembly_level to set
00172   Description: Gets/sets highest level of assembly (chromosome, supercontig etc.)
00173   Returntype : string
00174   Exceptions : none
00175   Caller     : general
00176   Status     : Stable
00177 =cut
00178 
00179 sub assembly_level {
00180   my ( $self, $assembly_level ) = @_;
00181   $self->{assembly_level} = $assembly_level if ( defined $assembly_level );
00182   return $self->{assembly_level};
00183 }
00184 
00185 =head2 base_count
00186   Arg        : (optional) base_count to set
00187   Description: Gets/sets total number of bases in assembled genome
00188   Returntype : integer
00189   Exceptions : none
00190   Caller     : general
00191   Status     : Stable
00192 =cut
00193 
00194 sub base_count {
00195   my ( $self, $base_count ) = @_;
00196   $self->{base_count} = $base_count if ( defined $base_count );
00197   return $self->{base_count};
00198 }
00199 
00200 =head2 sequences
00201   Arg        : (optional) arrayref of sequences to set
00202   Description: Gets/sets array of hashrefs describing sequences from the assembly. Elements are hashrefs with name and acc as keys
00203   Returntype : Arrayref
00204   Exceptions : none
00205   Caller     : general
00206   Status     : Stable
00207 =cut
00208 
00209 sub sequences {
00210   my ( $self, $sequences ) = @_;
00211   if ( defined $sequences ) {
00212     $self->{sequences} = $sequences;
00213   }
00214   $self->_load_child( 'sequences', '_fetch_sequences' );
00215   return $self->{sequences};
00216 }
00217 
00218 =head2 organism
00219   Arg        : (optional) organism object to set
00220   Description: Gets/sets organism to which genome belongs
00221   Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00222   Exceptions : none
00223   Caller     : general
00224   Status     : Stable
00225 =cut
00226 
00227 sub organism {
00228   my ( $self, $organism ) = @_;
00229   if ( defined $organism ) {
00230     $self->{organism} = $organism;
00231   }
00232   $self->_load_child( 'organism', '_fetch_organism' );
00233   return $self->{organism};
00234 }
00235 
00236 =head2 display_name
00237   Description: Gets display_name
00238   Returntype : string
00239   Exceptions : none
00240   Caller     : general
00241   Status     : Stable
00242 =cut
00243 
00244 sub display_name {
00245   my ( $self) = @_;
00246   return $self->organism()->display_name();
00247 }
00248 
00249 =head2 strain
00250   Description: Gets strain of genome
00251   Returntype : string
00252   Exceptions : none
00253   Caller     : general
00254   Status     : Stable
00255 =cut
00256 
00257 sub strain {
00258   my ( $self ) = @_;
00259   return $self->organism()->strain();
00260 }
00261 
00262 =head2 serotype
00263   Description: Gets serotype
00264   Returntype : string
00265   Exceptions : none
00266   Caller     : general
00267   Status     : Stable
00268 =cut
00269 
00270 sub serotype {
00271   my ( $self ) = @_;
00272   return $self->organism()->serotype();
00273 }
00274 
00275 =head2 name
00276   Description: Gets name for genome
00277   Returntype : string
00278   Exceptions : none
00279   Caller     : general
00280   Status     : Stable
00281 =cut
00282 
00283 sub name {
00284   my ( $self ) = @_;
00285   return $self->organism()->name();
00286 }
00287 
00288 =head2 taxonomy_id
00289   Description: Gets NCBI taxonomy ID
00290   Returntype : string
00291   Exceptions : none
00292   Caller     : general
00293   Status     : Stable
00294 =cut
00295 
00296 sub taxonomy_id {
00297   my ( $self ) = @_;
00298   return $self->organism()->taxonomy_id();
00299 }
00300 
00301 =head2 species_taxonomy_id
00302   Description: Gets NCBI taxonomy ID for species to which this organism belongs
00303   Returntype : string
00304   Exceptions : none
00305   Caller     : general
00306   Status     : Stable
00307 =cut
00308 
00309 sub species_taxonomy_id {
00310   my ( $self ) = @_;
00311   return $self->organism()->species_taxonomy_id();
00312 }
00313 
00314 =head2 is_reference
00315   Description: Gets whether this is a reference for the species
00316   Returntype : bool
00317   Exceptions : none
00318   Caller     : general
00319   Status     : Stable
00320 =cut
00321 
00322 sub is_reference {
00323   my ( $self ) = @_;
00324   return $self->organism()->is_reference();
00325 }
00326 
00327 =head1 UTILITY METHODS
00328 =head2 to_string
00329   Description: Render as plain string
00330   Returntype : string
00331   Exceptions : none
00332   Caller     : general
00333   Status     : Stable
00334 =cut
00335 
00336 sub to_string {
00337   my ($self) = @_;
00338   return
00339     join( '/',
00340           $self->division(), $self->method(), ( $self->set_name() || '-' ) );
00341 }
00342 
00343 =head2 _preload
00344   Description: Ensure all children are loaded (used for hash transformation)
00345   Returntype : none
00346   Exceptions : none
00347   Caller     : Internal
00348   Status     : Stable
00349 =cut
00350 
00351 sub _preload {
00352   my ($self) = @_;
00353   $self->sequences();
00354   return;
00355 }
00356 
00357 =head2 _preload
00358   Description: Remove all children (used after hash transformation to ensure object is minimised)
00359   Returntype : none
00360   Exceptions : none
00361   Caller     : dump_metadata.pl
00362   Status     : Stable
00363 =cut
00364 
00365 sub _unload {
00366   my ($self) = @_;
00367   $self->{sequences} = undef;
00368   return;
00369 }
00370 
00371 1;
00372