GenomeInfo.pm
Go to the documentation of this file.
00001 
00002 =head1 LICENSE
00003 
00004 Copyright [1999-2016] EMBL-European Bioinformatics Institute
00005 
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009 
00010      http://www.apache.org/licenses/LICENSE-2.0
00011 
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017 
00018 =head1 CONTACT
00019 
00020   Please email comments or questions to the public Ensembl
00021   developers list at <dev@ensembl.org>.
00022 
00023   Questions may also be sent to the Ensembl help desk at
00024   <helpdesk@ensembl.org>.
00025 
00026 =head1 NAME
00027 
00028 Bio::EnsEMBL::MetaData::GenomeInfo
00029 
00030 =head1 SYNOPSIS
00031 
00032   my $genome = Bio::EnsEMBL::MetaData::GenomeInfo->new(
00033       -species    => $dba->species(),
00034       -species_id => $dba->species_id(),
00035       -division   => $meta->get_division() || 'Ensembl',
00036       -dbname     => $dbname);
00037       
00038   print Dumper($genome->to_hash());
00039 
00040 =head1 DESCRIPTION
00041 
00042 Object encapsulating meta information about a genome in Ensembl Genomes. 
00043 
00044 Can be used to render information about a genome e.g.
00045 
00046 print $genome->name()." (".$genome->species.")\n";
00047 print "Sequences: ".scalar(@{$genome->sequences()})."\n";
00048 if($genome->has_variations()) {
00049     print "Variations: \n";
00050     # variations is a hash with type as the key
00051     while(my ($type,$value) = each %{$genome->variations()}) {
00052         print "- $type\n";
00053     }
00054 }
00055 print "Compara analyses: ".scalar(@{$genome->compara()})."\n";
00056 
00057 =head1 SEE ALSO
00058 
00059 Bio::EnsEMBL::MetaData::BaseInfo
00060 Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00061 Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00062 Bio::EnsEMBL::MetaData::GenomeComparaInfo
00063 Bio::EnsEMBL::MetaData::DataReleaseInfo
00064 Bio::EnsEMBL::MetaData::DatabaseInfo
00065 Bio::EnsEMBL::MetaData::EventInfo
00066 Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00067 
00068 =head1 AUTHOR
00069 
00070 Dan Staines
00071 
00072 =cut
00073 
00074 use strict;
00075 use warnings;
00076 
00077 package Bio::EnsEMBL::MetaData::GenomeInfo;
00078 use base qw/Bio::EnsEMBL::MetaData::BaseInfo/;
00079 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00080 use Bio::EnsEMBL::Utils::Exception qw/throw/;
00081 use Bio::EnsEMBL::MetaData::DataReleaseInfo;
00082 use Bio::EnsEMBL::MetaData::GenomeAssemblyInfo;
00083 use Bio::EnsEMBL::MetaData::GenomeOrganismInfo;
00084 
00085 =head1 CONSTRUCTOR
00086 =head2 new
00087   Arg [-DISPLAY_NAME]  : 
00088        string - human readable version of the name of the genome
00089   Arg [-NAME]    : 
00090        string - computable version of the name of the genome (lower case, no spaces)
00091   Arg [-DBNAME] : 
00092        string - name of the core database in which the genome can be found
00093   Arg [-SPECIES_ID]  : 
00094        int - identifier of the species within the core database for this genome
00095   Arg [-TAXONOMY_ID] :
00096         string - NCBI taxonomy identifier
00097   Arg [-SPECIES_TAXONOMY_ID] :
00098         string - NCBI taxonomy identifier of species to which this genome belongs
00099   Arg [-ASSEMBLY_NAME] :
00100         string - name of the assembly
00101   Arg [-ASSEMBLY_ID] :
00102         string - INSDC assembly accession
00103   Arg [-ASSEMBLY_LEVEL] :
00104         string - highest assembly level (chromosome, supercontig etc.)
00105   Arg [-GENEBUILD]:
00106         string - identifier for genebuild
00107   Arg [-DIVISION]:
00108         string - name of Ensembl Genomes division (e.g. EnsemblBacteria, EnsemblPlants)
00109   Arg [-STRAIN]:
00110         string - name of strain to which genome belongs
00111   Arg [-SEROTYPE]:
00112         string - name of serotype to which genome belongs
00113   Arg [-IS_REFERENCE]:
00114         bool - 1 if this genome is the reference for its species
00115 
00116   Example    : $info = Bio::EnsEMBL::MetaData::GenomeInfo->new(...);
00117   Description: Creates a new info object
00118   Returntype : Bio::EnsEMBL::MetaData::GenomeInfo
00119   Exceptions : none
00120   Caller     : general
00121   Status     : Stable
00122 
00123 =cut
00124 
00125 sub new {
00126   my ( $class, @args ) = @_;
00127   my $self = $class->SUPER::new(@args);
00128   my ( $name,          $display_name, $dbname,
00129        $species_id,    $taxonomy_id,  $species_taxonomy_id,
00130        $assembly_name, $assembly_id,  $assembly_level,
00131        $strain,        $serotype,     $is_reference,
00132        $organism );
00133   ( $name,              $display_name,     $dbname,
00134     $species_id,        $taxonomy_id,      $species_taxonomy_id,
00135     $assembly_name,     $assembly_id,      $assembly_level,
00136     $self->{genebuild}, $self->{division}, $strain,
00137     $serotype,          $is_reference,     $self->{assembly},
00138     $organism,          $self->{data_release} )
00139     = rearrange( [ 'NAME',           'DISPLAY_NAME',
00140                    'DBNAME',         'SPECIES_ID',
00141                    'TAXONOMY_ID',    'SPECIES_TAXONOMY_ID',
00142                    'ASSEMBLY_NAME',  'ASSEMBLY_ID',
00143                    'ASSEMBLY_LEVEL', 'GENEBUILD',
00144                    'DIVISION',       'STRAIN',
00145                    'SEROTYPE',       'IS_REFERENCE',
00146                    'ASSEMBLY',       'ORGANISM',
00147                    'DATA_RELEASE' ],
00148                  @args );
00149 
00150   if ( defined $dbname ) {
00151     $self->add_database( $dbname, $species_id );
00152   }
00153 
00154   if ( !defined $self->assembly() ) {
00155     my $ass =
00156       Bio::EnsEMBL::MetaData::GenomeAssemblyInfo->new(
00157                                    -ASSEMBLY_NAME       => $assembly_name,
00158                                    -ASSEMBLY_ID         => $assembly_id,
00159                                    -ASSEMBLY_LEVEL      => $assembly_level,
00160                                    -DISPLAY_NAME        => $display_name,
00161                                    -NAME                => $name,
00162                                    -STRAIN              => $strain,
00163                                    -SEROTYPE            => $serotype,
00164                                    -TAXONOMY_ID         => $taxonomy_id,
00165                                    -SPECIES_TAXONOMY_ID => $species_taxonomy_id,
00166                                    -IS_REFERENCE        => $is_reference,
00167                                    -ORGANISM            => $organism );
00168     $ass->adaptor( $self->adaptor() ) if defined $self->adaptor();
00169     $self->assembly($ass);
00170   }
00171   return $self;
00172 } ## end sub new
00173 
00174 =head1 ATTRIBUTE METHODS
00175 
00176 =head2 dbname
00177   Description: Gets name of core database from which genome comes
00178   Returntype : string
00179   Exceptions : none
00180   Caller     : general
00181   Status     : Stable
00182 =cut
00183 
00184 sub dbname {
00185   my ( $self ) = @_;
00186   return $self->_get_core()->dbname();
00187 }
00188 
00189 =head2 species_id
00190   Description: Gets species_id of genome within core database
00191   Returntype : string
00192   Exceptions : none
00193   Caller     : general
00194   Status     : Stable
00195 =cut
00196 
00197 sub species_id {
00198   my ( $self ) = @_;
00199   return $self->_get_core()->species_id();
00200 }
00201 
00202 =head2 data_release
00203   Arg        : (optional) Arrayref of DatabaseInfo objects to set
00204   Description: Gets/sets databases associated with the genome
00205   Returntype : Arrayref of Bio::EnsEMBL::MetaData::DatabaseInfo
00206   Exceptions : none
00207   Caller     : general
00208   Status     : Stable
00209 =cut
00210 
00211 sub databases {
00212   my ($self, $databases) = @_;
00213   if(defined $databases) {
00214     $self->{databases} = $databases;
00215   }
00216   $self->_load_child( 'databases', '_fetch_databases' );
00217   return $self->{databases};
00218 }
00219 
00220 =head2 data_release
00221   Arg        : (optional) data_release object to set
00222   Description: Gets/sets data_release to which genome belongs
00223   Returntype : Bio::EnsEMBL::MetaData::DataReleaseInfo
00224   Exceptions : none
00225   Caller     : general
00226   Status     : Stable
00227 =cut
00228 
00229 sub data_release {
00230   my ( $self, $data_release ) = @_;
00231   $self->{data_release} = $data_release if ( defined $data_release );
00232   $self->_load_child( 'data_release', '_fetch_data_release' );
00233   return $self->{data_release};
00234 }
00235 
00236 =head2 organism
00237   Arg        : (optional) organism object to set
00238   Description: Gets/sets organism to which genome belongs
00239   Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00240   Exceptions : none
00241   Caller     : general
00242   Status     : Stable
00243 =cut
00244 
00245 sub organism {
00246   my ( $self, $organism ) = @_;
00247   return $self->assembly()->organism($organism);
00248 }
00249 
00250 =head2 display_name
00251   Description: Gets readable name
00252   Returntype : string
00253   Exceptions : none
00254   Caller     : general
00255   Status     : Stable
00256 =cut
00257 
00258 sub display_name {
00259   my ( $self, $name ) = @_;
00260   return $self->organism()->display_name($name);
00261 }
00262 
00263 =head2 strain
00264   Description: Gets/sets strain of genome
00265   Returntype : string
00266   Exceptions : none
00267   Caller     : general
00268   Status     : Stable
00269 =cut
00270 
00271 sub strain {
00272   my ( $self ) = @_;
00273   return $self->organism()->strain();
00274 }
00275 
00276 =head2 serotype
00277   Description: Gets serotype
00278   Returntype : string
00279   Exceptions : none
00280   Caller     : general
00281   Status     : Stable
00282 =cut
00283 
00284 sub serotype {
00285   my ( $self ) = @_;
00286   return $self->organism()->serotype();
00287 }
00288 
00289 =head2 name
00290   Description: Gets unique, compute-safe name for genome
00291   Returntype : string
00292   Exceptions : none
00293   Caller     : general
00294   Status     : Stable
00295 =cut
00296 
00297 sub name {
00298   my ( $self ) = @_;
00299   return $self->organism()->name();
00300 }
00301 
00302 =head2 taxonomy_id
00303   Description: Gets NCBI taxonomy ID
00304   Returntype : string
00305   Exceptions : none
00306   Caller     : general
00307   Status     : Stable
00308 =cut
00309 
00310 sub taxonomy_id {
00311   my ( $self ) = @_;
00312   return $self->organism()->taxonomy_id();
00313 }
00314 
00315 =head2 species_taxonomy_id
00316   Description: Gets NCBI taxonomy ID of species to which this belongs
00317   Returntype : string
00318   Exceptions : none
00319   Caller     : general
00320   Status     : Stable
00321 =cut
00322 
00323 sub species_taxonomy_id {
00324   my ( $self ) = @_;
00325   return $self->organism()->species_taxonomy_id();
00326 }
00327 
00328 =head2 assembly
00329   Arg        : (optional) assembly to set
00330   Description: Gets/sets assembly object
00331   Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00332   Exceptions : none
00333   Caller     : general
00334   Status     : Stable
00335 =cut
00336 
00337 sub assembly {
00338   my ( $self, $assembly ) = @_;
00339   $self->{assembly} = $assembly if ( defined $assembly );
00340   $self->_load_child( 'assembly', '_fetch_assembly' );
00341   return $self->{assembly};
00342 }
00343 
00344 =head2 assembly_name
00345   Description: Gets name of assembly
00346   Returntype : string
00347   Exceptions : none
00348   Caller     : general
00349   Status     : Stable
00350 =cut
00351 
00352 sub assembly_name {
00353   my ( $self ) = @_;
00354   return $self->assembly()->assembly_name();
00355 }
00356 
00357 =head2 assembly_accession
00358   Description: Gets INSDC accession for assembly
00359   Returntype : string
00360   Exceptions : none
00361   Caller     : general
00362   Status     : Stable
00363 =cut
00364 
00365 sub assembly_accession {
00366   my ( $self ) = @_;
00367   return $self->assembly()->assembly_accession();
00368 }
00369 
00370 =head2 assembly_level
00371   Description: Gets highest level of assembly (chromosome, supercontig etc.)
00372   Returntype : string
00373   Exceptions : none
00374   Caller     : general
00375   Status     : Stable
00376 =cut
00377 
00378 sub assembly_level {
00379   my ( $self ) = @_;
00380   return $self->assembly()->assembly_level();
00381 }
00382 
00383 =head2 genebuild
00384   Arg        : (optional) genebuild to set
00385   Description: Gets/sets identifier for genebuild
00386   Returntype : string
00387   Exceptions : none
00388   Caller     : general
00389   Status     : Stable
00390 =cut
00391 
00392 sub genebuild {
00393   my ( $self, $genebuild ) = @_;
00394   $self->{genebuild} = $genebuild if ( defined $genebuild );
00395   return $self->{genebuild};
00396 }
00397 
00398 =head2 division
00399   Arg        : (optional) division to set
00400   Description: Gets/sets Ensembl Genomes division
00401   Returntype : string
00402   Exceptions : none
00403   Caller     : general
00404   Status     : Stable
00405 =cut
00406 
00407 sub division {
00408   my ( $self, $division ) = @_;
00409   $self->{division} = $division if ( defined $division );
00410   return $self->{division};
00411 }
00412 
00413 =head2 is_reference
00414   Arg        : (optional) value of is_reference
00415   Description: Gets/sets whether this is a reference for the species
00416   Returntype : bool
00417   Exceptions : none
00418   Caller     : general
00419   Status     : Stable
00420 =cut
00421 
00422 sub is_reference {
00423   my ( $self, $is_ref ) = @_;
00424   return $self->organism()->is_reference($is_ref);
00425 }
00426 
00427 =head2 db_size
00428   Arg        : (optional) db_size to set
00429   Description: Gets/sets size of database containing core
00430   Returntype : string
00431   Exceptions : none
00432   Caller     : general
00433   Status     : Stable
00434 =cut
00435 
00436 sub db_size {
00437   my ( $self, $arg ) = @_;
00438   $self->{db_size} = $arg if ( defined $arg );
00439   return $self->{db_size};
00440 }
00441 
00442 =head2 base_count
00443   Description: Gets total number of bases in assembled genome
00444   Returntype : integer
00445   Exceptions : none
00446   Caller     : general
00447   Status     : Stable
00448 =cut
00449 
00450 sub base_count {
00451   my ( $self ) = @_;
00452   return $self->assembly()->base_count();
00453 }
00454 
00455 =head2 aliases
00456   Description: Gets aliases by which the genome is also known 
00457   Returntype : Arrayref of aliases
00458   Exceptions : none
00459   Caller     : general
00460   Status     : Stable
00461 =cut
00462 
00463 sub aliases {
00464   my ( $self ) = @_;
00465   return $self->organism()->aliases();
00466 }
00467 
00468 =head2 compara
00469   Arg        : (optional) arrayref of GenomeComparaInfo objects to set
00470   Description: Gets/sets GenomeComparaInfo describing comparative analyses applied to the genome
00471   Returntype : Arrayref of Bio::EnsEMBL::MetaData::GenomeComparaInfo
00472   Exceptions : none
00473   Caller     : general
00474   Status     : Stable
00475 =cut
00476 
00477 sub compara {
00478   my ( $self, $compara ) = @_;
00479   if ( defined $compara ) {
00480     $self->{compara}               = $compara;
00481     $self->{has_peptide_compara}   = undef;
00482     $self->{has_synteny}           = undef;
00483     $self->{has_genome_alignments} = undef;
00484     $self->{has_pan_compara}       = undef;
00485   }
00486   $self->_load_child( 'compara', '_fetch_comparas' );
00487   return $self->{compara};
00488 }
00489 
00490 =head2 sequences
00491   Description: Gets array of hashrefs describing sequences from the assembly. Elements are hashrefs with name and acc as keys
00492   Returntype : Arrayref
00493   Exceptions : none
00494   Caller     : general
00495   Status     : Stable
00496 =cut
00497 
00498 sub sequences {
00499   my ( $self ) = @_;
00500   return $self->assembly()->sequences();
00501 }
00502 
00503 =head2 publications
00504   Description: Gets PubMed IDs for publications associated with the genome
00505   Returntype : Arrayref of PubMed IDs
00506   Exceptions : none
00507   Caller     : general
00508   Status     : Stable
00509 =cut
00510 
00511 sub publications {
00512   my ( $self ) = @_;
00513   return $self->organism()->publications();
00514 }
00515 
00516 =head2 variations
00517   Arg        : (optional) variations to set
00518   Description: Gets/sets variations associated with genomes as hashref 
00519                (variations,structural variations,genotypes,phenotypes), 
00520                further broken down into counts by type/source
00521   Returntype : Arrayref
00522   Exceptions : none
00523   Caller     : general
00524   Status     : Stable
00525 =cut
00526 
00527 sub variations {
00528   my ( $self, $variations ) = @_;
00529   if ( defined $variations ) {
00530     $self->{variations}     = $variations;
00531     $self->{has_variations} = undef;
00532   }
00533   $self->_load_child( 'variations', '_fetch_variations' );
00534   return $self->{variations};
00535 }
00536 
00537 =head2 features
00538   Arg        : (optional) features to set
00539   Description: Gets/sets general genomic features associated with the genome as hashref
00540                keyed by type (e.g. repeatFeatures,simpleFeatures), further broken down into
00541                counts by analysis
00542   Returntype : Hashref
00543   Exceptions : none
00544   Caller     : general
00545   Status     : Stable
00546 =cut
00547 
00548 sub features {
00549   my ( $self, $features ) = @_;
00550   if ( defined $features ) {
00551     $self->{features} = $features;
00552   }
00553   $self->_load_child( 'features', '_fetch_features' );
00554   return $self->{features};
00555 }
00556 
00557 =head2 annotations
00558   Arg        : (optional) annotations to set
00559   Description: Gets/sets summary information about gene annotation as hashref, with
00560                annotation type as key and count as value
00561   Returntype : hashref
00562   Exceptions : none
00563   Caller     : general
00564   Status     : Stable
00565 =cut
00566 
00567 sub annotations {
00568   my ( $self, $annotation ) = @_;
00569   if ( defined $annotation ) {
00570     $self->{annotations} = $annotation;
00571   }
00572   $self->_load_child( 'annotations', '_fetch_annotations' );
00573   return $self->{annotations};
00574 }
00575 
00576 =head2 other_alignments
00577   Arg        : (optional) other alignments to set
00578   Description: Gets/sets other alignments as hashref, keyed by type (dnaAlignFeatures,proteinAlignFeatures)
00579                with values as logic_name-count pairs 
00580   Returntype : Hashref
00581   Exceptions : none
00582   Caller     : general
00583   Status     : Stable
00584 =cut
00585 
00586 sub other_alignments {
00587   my ( $self, $other_alignments ) = @_;
00588   if ( defined $other_alignments ) {
00589     $self->{other_alignments}     = $other_alignments;
00590     $self->{has_other_alignments} = undef;
00591   }
00592   $self->_load_child( 'other_alignments', '_fetch_other_alignments' );
00593   return $self->{other_alignments} || 0;
00594 }
00595 
00596 =head1 utility methods
00597 =head2 has_variations
00598   Arg        : (optional) 1/0 to set if genome has variation
00599   Description: Boolean-style method, returns 1 if genome has variation, 0 if not
00600   Returntype : 1 or 0
00601   Exceptions : none
00602   Caller     : general
00603   Status     : Stable
00604 =cut
00605 
00606 sub has_variations {
00607   my ( $self, $arg ) = @_;
00608   if ( defined $arg ) {
00609     $self->{has_variations} = $arg;
00610   }
00611   elsif ( !defined( $self->{has_variations} ) ) {
00612     $self->{has_variations} = $self->count_variation() > 0 ? 1 : 0;
00613   }
00614   return $self->{has_variations};
00615 }
00616 
00617 =head2 has_genome_alignments
00618   Arg        : (optional) 1/0 to set if genome has genome alignments
00619   Description: Boolean-style method, returns 1 if genome has genome alignments, 0 if not
00620   Returntype : 1 or 0
00621   Exceptions : none
00622   Caller     : general
00623   Status     : Stable
00624 =cut
00625 
00626 sub has_genome_alignments {
00627   my ( $self, $arg ) = @_;
00628   if ( defined $arg ) {
00629     $self->{has_genome_alignments} = $arg;
00630   }
00631   elsif ( !defined( $self->{has_genome_alignments} ) &&
00632           defined $self->compara() )
00633   {
00634     $self->{has_genome_alignments} = 0;
00635     for my $compara ( @{ $self->compara() } ) {
00636       if ( $compara->is_dna_compara() ) {
00637         $self->{has_genome_alignments} = 1;
00638         last;
00639       }
00640     }
00641   }
00642   return $self->{has_genome_alignments} || 0;
00643 }
00644 
00645 =head2 has_synteny
00646   Arg        : (optional) 1/0 to set if genome has synteny
00647   Description: Boolean-style method, returns 1 if genome has synteny, 0 if not
00648   Returntype : 1 or 0
00649   Exceptions : none
00650   Caller     : general
00651   Status     : Stable
00652 =cut
00653 
00654 sub has_synteny {
00655   my ( $self, $arg ) = @_;
00656   if ( defined $arg ) {
00657     $self->{has_synteny} = $arg;
00658   }
00659   elsif ( !defined( $self->{has_synteny} ) && defined $self->compara() ) {
00660     $self->{has_synteny} = 0;
00661     for my $compara ( @{ $self->compara() } ) {
00662       if ( $compara->is_synteny() ) {
00663         $self->{has_synteny} = 1;
00664         last;
00665       }
00666     }
00667   }
00668   return $self->{has_synteny} || 0;
00669 }
00670 
00671 =head2 has_peptide_compara
00672   Arg        : (optional) 1/0 to set if genome has peptide compara
00673   Description: Boolean-style method, returns 1 if genome has peptide, 0 if not
00674   Returntype : 1 or 0
00675   Exceptions : none
00676   Caller     : general
00677   Status     : Stable
00678 =cut
00679 
00680 sub has_peptide_compara {
00681   my ( $self, $arg ) = @_;
00682   if ( defined $arg ) {
00683     $self->{has_peptide_compara} = $arg;
00684   }
00685   elsif ( !defined( $self->{has_peptide_compara} ) && defined $self->compara() )
00686   {
00687     $self->{has_peptide_compara} = 0;
00688     for my $compara ( @{ $self->compara() } ) {
00689       if ( $compara->is_peptide_compara() && !$compara->is_pan_compara() ) {
00690         $self->{has_peptide_compara} = 1;
00691         last;
00692       }
00693     }
00694   }
00695   return $self->{has_peptide_compara} || 0;
00696 }
00697 
00698 =head2 has_pan_compara
00699   Arg        : (optional) 1/0 to set if genome is included in pan compara
00700   Description: Boolean-style method, returns 1 if genome is in pan compara, 0 if not
00701   Returntype : 1 or 0
00702   Exceptions : none
00703   Caller     : general
00704   Status     : Stable
00705 =cut
00706 
00707 sub has_pan_compara {
00708   my ( $self, $arg ) = @_;
00709   if ( defined $arg ) {
00710     $self->{has_pan_compara} = $arg;
00711   }
00712   elsif ( !defined( $self->{has_pan_compara} ) && defined $self->compara() ) {
00713     $self->{has_pan_compara} = 0;
00714     for my $compara ( @{ $self->compara() } ) {
00715       if ( $compara->is_pan_compara() ) {
00716         $self->{has_pan_compara} = 1;
00717         last;
00718       }
00719     }
00720   }
00721   return $self->{has_pan_compara} || 0;
00722 }
00723 
00724 =head2 has_other_alignments
00725   Arg        : (optional) 1/0 to set if genome has other alignments
00726   Description: Boolean-style method, returns 1 if genome has other alignments, 0 if not
00727   Returntype : 1 or 0
00728   Exceptions : none
00729   Caller     : general
00730   Status     : Stable
00731 =cut
00732 
00733 sub has_other_alignments {
00734   my ( $self, $arg ) = @_;
00735   if ( defined $arg ) {
00736     $self->{has_other_alignments} = $arg;
00737   }
00738   elsif ( !defined( $self->{has_other_alignments} ) ) {
00739     $self->{has_other_alignments} = $self->count_alignments() > 0 ? 1 : 0;
00740   }
00741   return $self->{has_other_alignments} || 0;
00742 }
00743 
00744 =head2 count_variation
00745   Description: Returns total number of variations and structural variations mapped to genome
00746   Returntype : integer
00747   Exceptions : none
00748   Caller     : general
00749   Status     : Stable
00750 =cut
00751 
00752 sub count_variation {
00753   my ($self) = @_;
00754   return $self->count_hash_values( $self->{variations}{variations} ) +
00755     $self->count_hash_values( $self->{variations}{structural_variations} );
00756 }
00757 
00758 =head2 count_alignments
00759   Description: Returns total number of alignments to genome
00760   Returntype : integer
00761   Exceptions : none
00762   Caller     : general
00763   Status     : Stable
00764 =cut
00765 
00766 sub count_alignments {
00767   my ($self) = @_;
00768   return $self->count_hash_values( $self->{other_alignments}{bam} ) +
00769     $self->count_hash_values( $self->{other_alignments}{proteinAlignFeatures} )
00770     + $self->count_hash_values( $self->{other_alignments}{dnaAlignFeatures} );
00771 }
00772 
00773 =head2 get_uniprot_coverage
00774   Description: Get % of protein coding genes with a UniProt cross-reference
00775   Returntype : uniprot coverage as percentage
00776   Exceptions : none
00777   Caller     : general
00778   Status     : Stable
00779 =cut
00780 
00781 sub get_uniprot_coverage {
00782   my ($self) = @_;
00783   return 100.0*
00784     ( $self->annotations()->{nProteinCodingUniProtKB} )/
00785     $self->annotations()->{nProteinCoding};
00786 }
00787 
00788 =head2 add_database
00789   Arg        : Name of database
00790   Arg        : (Optional) species_id (defaults to 1)
00791   Description: Add a database associated with this genome
00792   Returntype : string
00793   Exceptions : none
00794   Caller     : general
00795   Status     : Stable
00796 =cut
00797 
00798 sub add_database {
00799   my ( $self, $dbname, $species_id ) = @_;
00800   $self->{species_id}||=1;
00801   push @{$self->{databases}}, Bio::EnsEMBL::MetaData::DatabaseInfo->new(-SUBJECT=>$self, -DBNAME=>$dbname, -SPECIES_ID=>$species_id);
00802   return;
00803 }
00804 
00805 
00806 =head2 to_string
00807   Description: Render genome as string for display
00808   Returntype : String
00809   Exceptions : none
00810   Caller     : general
00811   Status     : Stable
00812 =cut
00813 
00814 sub to_string {
00815    my ($self) = @_;
00816    return join(':',($self->dbID()||'-'),$self->name(),$self->dbname(),$self->species_id());
00817 }
00818 
00819 =head2 to_hash
00820   Description: Render genome as plain hash suitable for export as JSON/XML
00821   Argument   : (optional) if set to 1, force expansion of children
00822   Returntype : Hashref
00823   Exceptions : none
00824   Caller     : general
00825   Status     : Stable
00826 =cut
00827 
00828 sub to_hash {
00829   my ( $in, $keen ) = @_;
00830   my $out;
00831   my $type = ref $in;
00832   if ( defined $keen &&
00833        $keen == 1 &&
00834        $type =~ m/Bio::EnsEMBL::MetaData::[A-z]+Info/ )
00835   {
00836     $in->_preload();
00837   }
00838   if ( $type eq 'ARRAY' ) {
00839     $out = [];
00840     for my $item ( @{$in} ) {
00841       push @{$out}, to_hash( $item, $keen );
00842     }
00843   }
00844   elsif ( $type eq 'HASH' || $type eq 'Bio::EnsEMBL::MetaData::GenomeInfo' ) {
00845     $out = {};
00846     while ( my ( $key, $val ) = each %$in ) {
00847       if ( $key ne 'dbID' && $key ne 'adaptor' && $key ne 'logger' ) {
00848 
00849 # deal with keys starting with numbers, which are not valid element names in XML
00850         if ( $key =~ m/^[0-9].*/ ) {
00851           $key = '_' . $key;
00852         }
00853         $out->{$key} = to_hash( $val, $keen );
00854       }
00855 
00856     }
00857   }
00858   elsif ( $type =~ m/Bio::EnsEMBL::MetaData::[A-z]+Info/ ) {
00859     $out = $in->to_hash($keen);
00860   }
00861   else {
00862     $out = $in;
00863   }
00864   if ( defined $keen &&
00865        $keen == 1 &&
00866        $type =~ m/Bio::EnsEMBL::MetaData::[A-z]+Info/ )
00867   {
00868     $in->_preload();
00869   }
00870   return $out;
00871 } ## end sub to_hash
00872 
00873 =head1 INTERNAL METHODS
00874 =head2 _get_core
00875   Description: Convenience method to find core database
00876   Returntype : DatabaseInfo
00877   Exceptions : none
00878   Caller     : internal
00879   Status     : Stable
00880 =cut
00881 sub _get_core {
00882   my ($self) = @_;
00883   if(!defined $self->{core}) {
00884     ($self->{core}) = grep {$_->type() eq 'core'}  @{$self->databases()}; 
00885   }
00886   return $self->{core};
00887 }
00888 
00889 
00890 =head2 count_hash_values
00891   Description: Sums values found in hash
00892   Arg        : hashref
00893   Returntype : integer
00894   Exceptions : none
00895   Caller     : internal
00896   Status     : Stable
00897 =cut
00898 sub count_hash_values {
00899   my ( $self, $hash ) = @_;
00900   my $tot = 0;
00901   if ( defined $hash ) {
00902     for my $v ( values %{$hash} ) {
00903       $tot += $v;
00904     }
00905   }
00906   return $tot;
00907 }
00908 
00909 =head2 count_hash_lengths
00910   Description: Sums sizes of arrays found in hash as values
00911   Arg        : hashref
00912   Returntype : integer
00913   Exceptions : none
00914   Caller     : internal
00915   Status     : Stable
00916 =cut
00917 
00918 sub count_hash_lengths {
00919   my ( $self, $hash ) = @_;
00920   my $tot = 0;
00921   if ( defined $hash ) {
00922     for my $v ( values %{$hash} ) {
00923       $tot += scalar(@$v);
00924     }
00925   }
00926   return $tot;
00927 }
00928 
00929 =head2 dbID
00930   Arg        : (optional) dbID to set set
00931   Description: Gets/sets internal genome_id used as database primary key
00932   Returntype : dbID string
00933   Exceptions : none
00934   Caller     : Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00935   Status     : Stable
00936 =cut
00937 
00938 sub dbID {
00939   my ( $self, $id ) = @_;
00940   $self->{dbID} = $id if ( defined $id );
00941   return $self->{dbID};
00942 }
00943 
00944 =head2 adaptor
00945   Arg        : (optional) adaptor to set set
00946   Description: Gets/sets GenomeInfoAdaptor
00947   Returntype : Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00948   Exceptions : none
00949   Caller     : Internal
00950   Status     : Stable
00951 =cut
00952 
00953 sub adaptor {
00954   my ( $self, $adaptor ) = @_;
00955   $self->{adaptor} = $adaptor if ( defined $adaptor );
00956   return $self->{adaptor};
00957 }
00958 
00959 =head2 _preload
00960   Description: Ensure all children are loaded (used for hash transformation)
00961   Returntype : none
00962   Exceptions : none
00963   Caller     : Internal
00964   Status     : Stable
00965 =cut
00966 
00967 sub _preload {
00968   my ($self) = @_;
00969   $self->annotations();
00970   $self->compara();
00971   $self->features();
00972   $self->other_alignments();
00973   $self->variations();
00974   return;
00975 }
00976 
00977 =head2 _preload
00978   Description: Remove all children (used after hash transformation to ensure object is minimised)
00979   Returntype : none
00980   Exceptions : none
00981   Caller     : dump_metadata.pl
00982   Status     : Stable
00983 =cut
00984 
00985 sub _unload {
00986   my ($self) = @_;
00987   $self->{annotations}      = undef;
00988   $self->{compara}          = undef;
00989   $self->{features}         = undef;
00990   $self->{other_alignments} = undef;
00991   $self->{variations}       = undef;
00992   return;
00993 }
00994 
00995 1;