00001
00002 =head1 LICENSE
00003
00004 Copyright [1999-2016] EMBL-European Bioinformatics Institute
00005
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009
00010 http:
00011
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017
00018 =head1 CONTACT
00019
00020 Please email comments or questions to the public Ensembl
00021 developers list at <dev@ensembl.org>.
00022
00023 Questions may also be sent to the Ensembl help desk at
00024 <helpdesk@ensembl.org>.
00025
00026 =head1 NAME
00027
00028 Bio::EnsEMBL::MetaData::GenomeInfo
00029
00030 =head1 SYNOPSIS
00031
00032 my $genome = Bio::EnsEMBL::MetaData::GenomeInfo->new(
00033 -species => $dba->species(),
00034 -species_id => $dba->species_id(),
00035 -division => $meta->get_division() || 'Ensembl',
00036 -dbname => $dbname);
00037
00038 print Dumper($genome->to_hash());
00039
00040 =head1 DESCRIPTION
00041
00042 Object encapsulating meta information about a genome in Ensembl Genomes.
00043
00044 Can be used to render information about a genome e.g.
00045
00046 print $genome->name()." (".$genome->species.")\n";
00047 print "Sequences: ".scalar(@{$genome->sequences()})."\n";
00048 if($genome->has_variations()) {
00049 print "Variations: \n";
00050 # variations is a hash with type as the key
00051 while(my ($type,$value) = each %{$genome->variations()}) {
00052 print "- $type\n";
00053 }
00054 }
00055 print "Compara analyses: ".scalar(@{$genome->compara()})."\n";
00056
00057 =head1 SEE ALSO
00058
00059 Bio::EnsEMBL::MetaData::BaseInfo
00060 Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00061 Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00062 Bio::EnsEMBL::MetaData::GenomeComparaInfo
00063 Bio::EnsEMBL::MetaData::DataReleaseInfo
00064 Bio::EnsEMBL::MetaData::DatabaseInfo
00065 Bio::EnsEMBL::MetaData::EventInfo
00066 Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00067
00068 =head1 AUTHOR
00069
00070 Dan Staines
00071
00072 =cut
00073
00074 use strict;
00075 use warnings;
00076
00077 package Bio::EnsEMBL::MetaData::GenomeInfo;
00078 use base qw/Bio::EnsEMBL::MetaData::BaseInfo/;
00079 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00080 use Bio::EnsEMBL::Utils::Exception qw/throw/;
00081 use Bio::EnsEMBL::MetaData::DataReleaseInfo;
00082 use Bio::EnsEMBL::MetaData::GenomeAssemblyInfo;
00083 use Bio::EnsEMBL::MetaData::GenomeOrganismInfo;
00084
00085 =head1 CONSTRUCTOR
00086 =head2 new
00087 Arg [-DISPLAY_NAME] :
00088 string - human readable version of the name of the genome
00089 Arg [-NAME] :
00090 string - computable version of the name of the genome (lower case, no spaces)
00091 Arg [-DBNAME] :
00092 string - name of the core database in which the genome can be found
00093 Arg [-SPECIES_ID] :
00094 int - identifier of the species within the core database for this genome
00095 Arg [-TAXONOMY_ID] :
00096 string - NCBI taxonomy identifier
00097 Arg [-SPECIES_TAXONOMY_ID] :
00098 string - NCBI taxonomy identifier of species to which this genome belongs
00099 Arg [-ASSEMBLY_NAME] :
00100 string - name of the assembly
00101 Arg [-ASSEMBLY_ID] :
00102 string - INSDC assembly accession
00103 Arg [-ASSEMBLY_LEVEL] :
00104 string - highest assembly level (chromosome, supercontig etc.)
00105 Arg [-GENEBUILD]:
00106 string - identifier for genebuild
00107 Arg [-DIVISION]:
00108 string - name of Ensembl Genomes division (e.g. EnsemblBacteria, EnsemblPlants)
00109 Arg [-STRAIN]:
00110 string - name of strain to which genome belongs
00111 Arg [-SEROTYPE]:
00112 string - name of serotype to which genome belongs
00113 Arg [-IS_REFERENCE]:
00114 bool - 1 if this genome is the reference for its species
00115
00116 Example : $info = Bio::EnsEMBL::MetaData::GenomeInfo->new(...);
00117 Description: Creates a new info object
00118 Returntype : Bio::EnsEMBL::MetaData::GenomeInfo
00119 Exceptions : none
00120 Caller : general
00121 Status : Stable
00122
00123 =cut
00124
00125 sub new {
00126 my ( $class, @args ) = @_;
00127 my $self = $class->SUPER::new(@args);
00128 my ( $name, $display_name, $dbname,
00129 $species_id, $taxonomy_id, $species_taxonomy_id,
00130 $assembly_name, $assembly_id, $assembly_level,
00131 $strain, $serotype, $is_reference,
00132 $organism );
00133 ( $name, $display_name, $dbname,
00134 $species_id, $taxonomy_id, $species_taxonomy_id,
00135 $assembly_name, $assembly_id, $assembly_level,
00136 $self->{genebuild}, $self->{division}, $strain,
00137 $serotype, $is_reference, $self->{assembly},
00138 $organism, $self->{data_release} )
00139 = rearrange( [ 'NAME', 'DISPLAY_NAME',
00140 'DBNAME', 'SPECIES_ID',
00141 'TAXONOMY_ID', 'SPECIES_TAXONOMY_ID',
00142 'ASSEMBLY_NAME', 'ASSEMBLY_ID',
00143 'ASSEMBLY_LEVEL', 'GENEBUILD',
00144 'DIVISION', 'STRAIN',
00145 'SEROTYPE', 'IS_REFERENCE',
00146 'ASSEMBLY', 'ORGANISM',
00147 'DATA_RELEASE' ],
00148 @args );
00149
00150 if ( defined $dbname ) {
00151 $self->add_database( $dbname, $species_id );
00152 }
00153
00154 if ( !defined $self->assembly() ) {
00155 my $ass =
00156 Bio::EnsEMBL::MetaData::GenomeAssemblyInfo->new(
00157 -ASSEMBLY_NAME => $assembly_name,
00158 -ASSEMBLY_ID => $assembly_id,
00159 -ASSEMBLY_LEVEL => $assembly_level,
00160 -DISPLAY_NAME => $display_name,
00161 -NAME => $name,
00162 -STRAIN => $strain,
00163 -SEROTYPE => $serotype,
00164 -TAXONOMY_ID => $taxonomy_id,
00165 -SPECIES_TAXONOMY_ID => $species_taxonomy_id,
00166 -IS_REFERENCE => $is_reference,
00167 -ORGANISM => $organism );
00168 $ass->adaptor( $self->adaptor() ) if defined $self->adaptor();
00169 $self->assembly($ass);
00170 }
00171 return $self;
00172 } ## end sub new
00173
00174 =head1 ATTRIBUTE METHODS
00175
00176 =head2 dbname
00177 Description: Gets name of core database from which genome comes
00178 Returntype : string
00179 Exceptions : none
00180 Caller : general
00181 Status : Stable
00182 =cut
00183
00184 sub dbname {
00185 my ( $self ) = @_;
00186 return $self->_get_core()->dbname();
00187 }
00188
00189 =head2 species_id
00190 Description: Gets species_id of genome within core database
00191 Returntype : string
00192 Exceptions : none
00193 Caller : general
00194 Status : Stable
00195 =cut
00196
00197 sub species_id {
00198 my ( $self ) = @_;
00199 return $self->_get_core()->species_id();
00200 }
00201
00202 =head2 data_release
00203 Arg : (optional) Arrayref of DatabaseInfo objects to set
00204 Description: Gets/sets databases associated with the genome
00205 Returntype : Arrayref of Bio::EnsEMBL::MetaData::DatabaseInfo
00206 Exceptions : none
00207 Caller : general
00208 Status : Stable
00209 =cut
00210
00211 sub databases {
00212 my ($self, $databases) = @_;
00213 if(defined $databases) {
00214 $self->{databases} = $databases;
00215 }
00216 $self->_load_child( 'databases', '_fetch_databases' );
00217 return $self->{databases};
00218 }
00219
00220 =head2 data_release
00221 Arg : (optional) data_release object to set
00222 Description: Gets/sets data_release to which genome belongs
00223 Returntype : Bio::EnsEMBL::MetaData::DataReleaseInfo
00224 Exceptions : none
00225 Caller : general
00226 Status : Stable
00227 =cut
00228
00229 sub data_release {
00230 my ( $self, $data_release ) = @_;
00231 $self->{data_release} = $data_release if ( defined $data_release );
00232 $self->_load_child( 'data_release', '_fetch_data_release' );
00233 return $self->{data_release};
00234 }
00235
00236 =head2 organism
00237 Arg : (optional) organism object to set
00238 Description: Gets/sets organism to which genome belongs
00239 Returntype : Bio::EnsEMBL::MetaData::GenomeOrganismInfo
00240 Exceptions : none
00241 Caller : general
00242 Status : Stable
00243 =cut
00244
00245 sub organism {
00246 my ( $self, $organism ) = @_;
00247 return $self->assembly()->organism($organism);
00248 }
00249
00250 =head2 display_name
00251 Description: Gets readable name
00252 Returntype : string
00253 Exceptions : none
00254 Caller : general
00255 Status : Stable
00256 =cut
00257
00258 sub display_name {
00259 my ( $self, $name ) = @_;
00260 return $self->organism()->display_name($name);
00261 }
00262
00263 =head2 strain
00264 Description: Gets/sets strain of genome
00265 Returntype : string
00266 Exceptions : none
00267 Caller : general
00268 Status : Stable
00269 =cut
00270
00271 sub strain {
00272 my ( $self ) = @_;
00273 return $self->organism()->strain();
00274 }
00275
00276 =head2 serotype
00277 Description: Gets serotype
00278 Returntype : string
00279 Exceptions : none
00280 Caller : general
00281 Status : Stable
00282 =cut
00283
00284 sub serotype {
00285 my ( $self ) = @_;
00286 return $self->organism()->serotype();
00287 }
00288
00289 =head2 name
00290 Description: Gets unique, compute-safe name for genome
00291 Returntype : string
00292 Exceptions : none
00293 Caller : general
00294 Status : Stable
00295 =cut
00296
00297 sub name {
00298 my ( $self ) = @_;
00299 return $self->organism()->name();
00300 }
00301
00302 =head2 taxonomy_id
00303 Description: Gets NCBI taxonomy ID
00304 Returntype : string
00305 Exceptions : none
00306 Caller : general
00307 Status : Stable
00308 =cut
00309
00310 sub taxonomy_id {
00311 my ( $self ) = @_;
00312 return $self->organism()->taxonomy_id();
00313 }
00314
00315 =head2 species_taxonomy_id
00316 Description: Gets NCBI taxonomy ID of species to which this belongs
00317 Returntype : string
00318 Exceptions : none
00319 Caller : general
00320 Status : Stable
00321 =cut
00322
00323 sub species_taxonomy_id {
00324 my ( $self ) = @_;
00325 return $self->organism()->species_taxonomy_id();
00326 }
00327
00328 =head2 assembly
00329 Arg : (optional) assembly to set
00330 Description: Gets/sets assembly object
00331 Returntype : Bio::EnsEMBL::MetaData::GenomeAssemblyInfo
00332 Exceptions : none
00333 Caller : general
00334 Status : Stable
00335 =cut
00336
00337 sub assembly {
00338 my ( $self, $assembly ) = @_;
00339 $self->{assembly} = $assembly if ( defined $assembly );
00340 $self->_load_child( 'assembly', '_fetch_assembly' );
00341 return $self->{assembly};
00342 }
00343
00344 =head2 assembly_name
00345 Description: Gets name of assembly
00346 Returntype : string
00347 Exceptions : none
00348 Caller : general
00349 Status : Stable
00350 =cut
00351
00352 sub assembly_name {
00353 my ( $self ) = @_;
00354 return $self->assembly()->assembly_name();
00355 }
00356
00357 =head2 assembly_accession
00358 Description: Gets INSDC accession for assembly
00359 Returntype : string
00360 Exceptions : none
00361 Caller : general
00362 Status : Stable
00363 =cut
00364
00365 sub assembly_accession {
00366 my ( $self ) = @_;
00367 return $self->assembly()->assembly_accession();
00368 }
00369
00370 =head2 assembly_level
00371 Description: Gets highest level of assembly (chromosome, supercontig etc.)
00372 Returntype : string
00373 Exceptions : none
00374 Caller : general
00375 Status : Stable
00376 =cut
00377
00378 sub assembly_level {
00379 my ( $self ) = @_;
00380 return $self->assembly()->assembly_level();
00381 }
00382
00383 =head2 genebuild
00384 Arg : (optional) genebuild to set
00385 Description: Gets/sets identifier for genebuild
00386 Returntype : string
00387 Exceptions : none
00388 Caller : general
00389 Status : Stable
00390 =cut
00391
00392 sub genebuild {
00393 my ( $self, $genebuild ) = @_;
00394 $self->{genebuild} = $genebuild if ( defined $genebuild );
00395 return $self->{genebuild};
00396 }
00397
00398 =head2 division
00399 Arg : (optional) division to set
00400 Description: Gets/sets Ensembl Genomes division
00401 Returntype : string
00402 Exceptions : none
00403 Caller : general
00404 Status : Stable
00405 =cut
00406
00407 sub division {
00408 my ( $self, $division ) = @_;
00409 $self->{division} = $division if ( defined $division );
00410 return $self->{division};
00411 }
00412
00413 =head2 is_reference
00414 Arg : (optional) value of is_reference
00415 Description: Gets/sets whether this is a reference for the species
00416 Returntype : bool
00417 Exceptions : none
00418 Caller : general
00419 Status : Stable
00420 =cut
00421
00422 sub is_reference {
00423 my ( $self, $is_ref ) = @_;
00424 return $self->organism()->is_reference($is_ref);
00425 }
00426
00427 =head2 db_size
00428 Arg : (optional) db_size to set
00429 Description: Gets/sets size of database containing core
00430 Returntype : string
00431 Exceptions : none
00432 Caller : general
00433 Status : Stable
00434 =cut
00435
00436 sub db_size {
00437 my ( $self, $arg ) = @_;
00438 $self->{db_size} = $arg if ( defined $arg );
00439 return $self->{db_size};
00440 }
00441
00442 =head2 base_count
00443 Description: Gets total number of bases in assembled genome
00444 Returntype : integer
00445 Exceptions : none
00446 Caller : general
00447 Status : Stable
00448 =cut
00449
00450 sub base_count {
00451 my ( $self ) = @_;
00452 return $self->assembly()->base_count();
00453 }
00454
00455 =head2 aliases
00456 Description: Gets aliases by which the genome is also known
00457 Returntype : Arrayref of aliases
00458 Exceptions : none
00459 Caller : general
00460 Status : Stable
00461 =cut
00462
00463 sub aliases {
00464 my ( $self ) = @_;
00465 return $self->organism()->aliases();
00466 }
00467
00468 =head2 compara
00469 Arg : (optional) arrayref of GenomeComparaInfo objects to set
00470 Description: Gets/sets GenomeComparaInfo describing comparative analyses applied to the genome
00471 Returntype : Arrayref of Bio::EnsEMBL::MetaData::GenomeComparaInfo
00472 Exceptions : none
00473 Caller : general
00474 Status : Stable
00475 =cut
00476
00477 sub compara {
00478 my ( $self, $compara ) = @_;
00479 if ( defined $compara ) {
00480 $self->{compara} = $compara;
00481 $self->{has_peptide_compara} = undef;
00482 $self->{has_synteny} = undef;
00483 $self->{has_genome_alignments} = undef;
00484 $self->{has_pan_compara} = undef;
00485 }
00486 $self->_load_child( 'compara', '_fetch_comparas' );
00487 return $self->{compara};
00488 }
00489
00490 =head2 sequences
00491 Description: Gets array of hashrefs describing sequences from the assembly. Elements are hashrefs with name and acc as keys
00492 Returntype : Arrayref
00493 Exceptions : none
00494 Caller : general
00495 Status : Stable
00496 =cut
00497
00498 sub sequences {
00499 my ( $self ) = @_;
00500 return $self->assembly()->sequences();
00501 }
00502
00503 =head2 publications
00504 Description: Gets PubMed IDs for publications associated with the genome
00505 Returntype : Arrayref of PubMed IDs
00506 Exceptions : none
00507 Caller : general
00508 Status : Stable
00509 =cut
00510
00511 sub publications {
00512 my ( $self ) = @_;
00513 return $self->organism()->publications();
00514 }
00515
00516 =head2 variations
00517 Arg : (optional) variations to set
00518 Description: Gets/sets variations associated with genomes as hashref
00519 (variations,structural variations,genotypes,phenotypes),
00520 further broken down into counts by type/source
00521 Returntype : Arrayref
00522 Exceptions : none
00523 Caller : general
00524 Status : Stable
00525 =cut
00526
00527 sub variations {
00528 my ( $self, $variations ) = @_;
00529 if ( defined $variations ) {
00530 $self->{variations} = $variations;
00531 $self->{has_variations} = undef;
00532 }
00533 $self->_load_child( 'variations', '_fetch_variations' );
00534 return $self->{variations};
00535 }
00536
00537 =head2 features
00538 Arg : (optional) features to set
00539 Description: Gets/sets general genomic features associated with the genome as hashref
00540 keyed by type (e.g. repeatFeatures,simpleFeatures), further broken down into
00541 counts by analysis
00542 Returntype : Hashref
00543 Exceptions : none
00544 Caller : general
00545 Status : Stable
00546 =cut
00547
00548 sub features {
00549 my ( $self, $features ) = @_;
00550 if ( defined $features ) {
00551 $self->{features} = $features;
00552 }
00553 $self->_load_child( 'features', '_fetch_features' );
00554 return $self->{features};
00555 }
00556
00557 =head2 annotations
00558 Arg : (optional) annotations to set
00559 Description: Gets/sets summary information about gene annotation as hashref, with
00560 annotation type as key and count as value
00561 Returntype : hashref
00562 Exceptions : none
00563 Caller : general
00564 Status : Stable
00565 =cut
00566
00567 sub annotations {
00568 my ( $self, $annotation ) = @_;
00569 if ( defined $annotation ) {
00570 $self->{annotations} = $annotation;
00571 }
00572 $self->_load_child( 'annotations', '_fetch_annotations' );
00573 return $self->{annotations};
00574 }
00575
00576 =head2 other_alignments
00577 Arg : (optional) other alignments to set
00578 Description: Gets/sets other alignments as hashref, keyed by type (dnaAlignFeatures,proteinAlignFeatures)
00579 with values as logic_name-count pairs
00580 Returntype : Hashref
00581 Exceptions : none
00582 Caller : general
00583 Status : Stable
00584 =cut
00585
00586 sub other_alignments {
00587 my ( $self, $other_alignments ) = @_;
00588 if ( defined $other_alignments ) {
00589 $self->{other_alignments} = $other_alignments;
00590 $self->{has_other_alignments} = undef;
00591 }
00592 $self->_load_child( 'other_alignments', '_fetch_other_alignments' );
00593 return $self->{other_alignments} || 0;
00594 }
00595
00596 =head1 utility methods
00597 =head2 has_variations
00598 Arg : (optional) 1/0 to set if genome has variation
00599 Description: Boolean-style method, returns 1 if genome has variation, 0 if not
00600 Returntype : 1 or 0
00601 Exceptions : none
00602 Caller : general
00603 Status : Stable
00604 =cut
00605
00606 sub has_variations {
00607 my ( $self, $arg ) = @_;
00608 if ( defined $arg ) {
00609 $self->{has_variations} = $arg;
00610 }
00611 elsif ( !defined( $self->{has_variations} ) ) {
00612 $self->{has_variations} = $self->count_variation() > 0 ? 1 : 0;
00613 }
00614 return $self->{has_variations};
00615 }
00616
00617 =head2 has_genome_alignments
00618 Arg : (optional) 1/0 to set if genome has genome alignments
00619 Description: Boolean-style method, returns 1 if genome has genome alignments, 0 if not
00620 Returntype : 1 or 0
00621 Exceptions : none
00622 Caller : general
00623 Status : Stable
00624 =cut
00625
00626 sub has_genome_alignments {
00627 my ( $self, $arg ) = @_;
00628 if ( defined $arg ) {
00629 $self->{has_genome_alignments} = $arg;
00630 }
00631 elsif ( !defined( $self->{has_genome_alignments} ) &&
00632 defined $self->compara() )
00633 {
00634 $self->{has_genome_alignments} = 0;
00635 for my $compara ( @{ $self->compara() } ) {
00636 if ( $compara->is_dna_compara() ) {
00637 $self->{has_genome_alignments} = 1;
00638 last;
00639 }
00640 }
00641 }
00642 return $self->{has_genome_alignments} || 0;
00643 }
00644
00645 =head2 has_synteny
00646 Arg : (optional) 1/0 to set if genome has synteny
00647 Description: Boolean-style method, returns 1 if genome has synteny, 0 if not
00648 Returntype : 1 or 0
00649 Exceptions : none
00650 Caller : general
00651 Status : Stable
00652 =cut
00653
00654 sub has_synteny {
00655 my ( $self, $arg ) = @_;
00656 if ( defined $arg ) {
00657 $self->{has_synteny} = $arg;
00658 }
00659 elsif ( !defined( $self->{has_synteny} ) && defined $self->compara() ) {
00660 $self->{has_synteny} = 0;
00661 for my $compara ( @{ $self->compara() } ) {
00662 if ( $compara->is_synteny() ) {
00663 $self->{has_synteny} = 1;
00664 last;
00665 }
00666 }
00667 }
00668 return $self->{has_synteny} || 0;
00669 }
00670
00671 =head2 has_peptide_compara
00672 Arg : (optional) 1/0 to set if genome has peptide compara
00673 Description: Boolean-style method, returns 1 if genome has peptide, 0 if not
00674 Returntype : 1 or 0
00675 Exceptions : none
00676 Caller : general
00677 Status : Stable
00678 =cut
00679
00680 sub has_peptide_compara {
00681 my ( $self, $arg ) = @_;
00682 if ( defined $arg ) {
00683 $self->{has_peptide_compara} = $arg;
00684 }
00685 elsif ( !defined( $self->{has_peptide_compara} ) && defined $self->compara() )
00686 {
00687 $self->{has_peptide_compara} = 0;
00688 for my $compara ( @{ $self->compara() } ) {
00689 if ( $compara->is_peptide_compara() && !$compara->is_pan_compara() ) {
00690 $self->{has_peptide_compara} = 1;
00691 last;
00692 }
00693 }
00694 }
00695 return $self->{has_peptide_compara} || 0;
00696 }
00697
00698 =head2 has_pan_compara
00699 Arg : (optional) 1/0 to set if genome is included in pan compara
00700 Description: Boolean-style method, returns 1 if genome is in pan compara, 0 if not
00701 Returntype : 1 or 0
00702 Exceptions : none
00703 Caller : general
00704 Status : Stable
00705 =cut
00706
00707 sub has_pan_compara {
00708 my ( $self, $arg ) = @_;
00709 if ( defined $arg ) {
00710 $self->{has_pan_compara} = $arg;
00711 }
00712 elsif ( !defined( $self->{has_pan_compara} ) && defined $self->compara() ) {
00713 $self->{has_pan_compara} = 0;
00714 for my $compara ( @{ $self->compara() } ) {
00715 if ( $compara->is_pan_compara() ) {
00716 $self->{has_pan_compara} = 1;
00717 last;
00718 }
00719 }
00720 }
00721 return $self->{has_pan_compara} || 0;
00722 }
00723
00724 =head2 has_other_alignments
00725 Arg : (optional) 1/0 to set if genome has other alignments
00726 Description: Boolean-style method, returns 1 if genome has other alignments, 0 if not
00727 Returntype : 1 or 0
00728 Exceptions : none
00729 Caller : general
00730 Status : Stable
00731 =cut
00732
00733 sub has_other_alignments {
00734 my ( $self, $arg ) = @_;
00735 if ( defined $arg ) {
00736 $self->{has_other_alignments} = $arg;
00737 }
00738 elsif ( !defined( $self->{has_other_alignments} ) ) {
00739 $self->{has_other_alignments} = $self->count_alignments() > 0 ? 1 : 0;
00740 }
00741 return $self->{has_other_alignments} || 0;
00742 }
00743
00744 =head2 count_variation
00745 Description: Returns total number of variations and structural variations mapped to genome
00746 Returntype : integer
00747 Exceptions : none
00748 Caller : general
00749 Status : Stable
00750 =cut
00751
00752 sub count_variation {
00753 my ($self) = @_;
00754 return $self->count_hash_values( $self->{variations}{variations} ) +
00755 $self->count_hash_values( $self->{variations}{structural_variations} );
00756 }
00757
00758 =head2 count_alignments
00759 Description: Returns total number of alignments to genome
00760 Returntype : integer
00761 Exceptions : none
00762 Caller : general
00763 Status : Stable
00764 =cut
00765
00766 sub count_alignments {
00767 my ($self) = @_;
00768 return $self->count_hash_values( $self->{other_alignments}{bam} ) +
00769 $self->count_hash_values( $self->{other_alignments}{proteinAlignFeatures} )
00770 + $self->count_hash_values( $self->{other_alignments}{dnaAlignFeatures} );
00771 }
00772
00773 =head2 get_uniprot_coverage
00774 Description: Get % of protein coding genes with a UniProt cross-reference
00775 Returntype : uniprot coverage as percentage
00776 Exceptions : none
00777 Caller : general
00778 Status : Stable
00779 =cut
00780
00781 sub get_uniprot_coverage {
00782 my ($self) = @_;
00783 return 100.0*
00784 ( $self->annotations()->{nProteinCodingUniProtKB} )/
00785 $self->annotations()->{nProteinCoding};
00786 }
00787
00788 =head2 add_database
00789 Arg : Name of database
00790 Arg : (Optional) species_id (defaults to 1)
00791 Description: Add a database associated with this genome
00792 Returntype : string
00793 Exceptions : none
00794 Caller : general
00795 Status : Stable
00796 =cut
00797
00798 sub add_database {
00799 my ( $self, $dbname, $species_id ) = @_;
00800 $self->{species_id}||=1;
00801 push @{$self->{databases}}, Bio::EnsEMBL::MetaData::DatabaseInfo->new(-SUBJECT=>$self, -DBNAME=>$dbname, -SPECIES_ID=>$species_id);
00802 return;
00803 }
00804
00805
00806 =head2 to_string
00807 Description: Render genome as string for display
00808 Returntype : String
00809 Exceptions : none
00810 Caller : general
00811 Status : Stable
00812 =cut
00813
00814 sub to_string {
00815 my ($self) = @_;
00816 return join(':',($self->dbID()||'-'),$self->name(),$self->dbname(),$self->species_id());
00817 }
00818
00819 =head2 to_hash
00820 Description: Render genome as plain hash suitable for export as JSON/XML
00821 Argument : (optional) if set to 1, force expansion of children
00822 Returntype : Hashref
00823 Exceptions : none
00824 Caller : general
00825 Status : Stable
00826 =cut
00827
00828 sub to_hash {
00829 my ( $in, $keen ) = @_;
00830 my $out;
00831 my $type = ref $in;
00832 if ( defined $keen &&
00833 $keen == 1 &&
00834 $type =~ m/Bio::EnsEMBL::MetaData::[A-z]+Info/ )
00835 {
00836 $in->_preload();
00837 }
00838 if ( $type eq 'ARRAY' ) {
00839 $out = [];
00840 for my $item ( @{$in} ) {
00841 push @{$out}, to_hash( $item, $keen );
00842 }
00843 }
00844 elsif ( $type eq 'HASH' || $type eq 'Bio::EnsEMBL::MetaData::GenomeInfo' ) {
00845 $out = {};
00846 while ( my ( $key, $val ) = each %$in ) {
00847 if ( $key ne 'dbID' && $key ne 'adaptor' && $key ne 'logger' ) {
00848
00849 # deal with keys starting with numbers, which are not valid element names in XML
00850 if ( $key =~ m/^[0-9].*/ ) {
00851 $key = '_' . $key;
00852 }
00853 $out->{$key} = to_hash( $val, $keen );
00854 }
00855
00856 }
00857 }
00858 elsif ( $type =~ m/Bio::EnsEMBL::MetaData::[A-z]+Info/ ) {
00859 $out = $in->to_hash($keen);
00860 }
00861 else {
00862 $out = $in;
00863 }
00864 if ( defined $keen &&
00865 $keen == 1 &&
00866 $type =~ m/Bio::EnsEMBL::MetaData::[A-z]+Info/ )
00867 {
00868 $in->_preload();
00869 }
00870 return $out;
00871 } ## end sub to_hash
00872
00873 =head1 INTERNAL METHODS
00874 =head2 _get_core
00875 Description: Convenience method to find core database
00876 Returntype : DatabaseInfo
00877 Exceptions : none
00878 Caller : internal
00879 Status : Stable
00880 =cut
00881 sub _get_core {
00882 my ($self) = @_;
00883 if(!defined $self->{core}) {
00884 ($self->{core}) = grep {$_->type() eq 'core'} @{$self->databases()};
00885 }
00886 return $self->{core};
00887 }
00888
00889
00890 =head2 count_hash_values
00891 Description: Sums values found in hash
00892 Arg : hashref
00893 Returntype : integer
00894 Exceptions : none
00895 Caller : internal
00896 Status : Stable
00897 =cut
00898 sub count_hash_values {
00899 my ( $self, $hash ) = @_;
00900 my $tot = 0;
00901 if ( defined $hash ) {
00902 for my $v ( values %{$hash} ) {
00903 $tot += $v;
00904 }
00905 }
00906 return $tot;
00907 }
00908
00909 =head2 count_hash_lengths
00910 Description: Sums sizes of arrays found in hash as values
00911 Arg : hashref
00912 Returntype : integer
00913 Exceptions : none
00914 Caller : internal
00915 Status : Stable
00916 =cut
00917
00918 sub count_hash_lengths {
00919 my ( $self, $hash ) = @_;
00920 my $tot = 0;
00921 if ( defined $hash ) {
00922 for my $v ( values %{$hash} ) {
00923 $tot += scalar(@$v);
00924 }
00925 }
00926 return $tot;
00927 }
00928
00929 =head2 dbID
00930 Arg : (optional) dbID to set set
00931 Description: Gets/sets internal genome_id used as database primary key
00932 Returntype : dbID string
00933 Exceptions : none
00934 Caller : Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00935 Status : Stable
00936 =cut
00937
00938 sub dbID {
00939 my ( $self, $id ) = @_;
00940 $self->{dbID} = $id if ( defined $id );
00941 return $self->{dbID};
00942 }
00943
00944 =head2 adaptor
00945 Arg : (optional) adaptor to set set
00946 Description: Gets/sets GenomeInfoAdaptor
00947 Returntype : Bio::EnsEMBL::MetaData::DBSQL::GenomeInfoAdaptor
00948 Exceptions : none
00949 Caller : Internal
00950 Status : Stable
00951 =cut
00952
00953 sub adaptor {
00954 my ( $self, $adaptor ) = @_;
00955 $self->{adaptor} = $adaptor if ( defined $adaptor );
00956 return $self->{adaptor};
00957 }
00958
00959 =head2 _preload
00960 Description: Ensure all children are loaded (used for hash transformation)
00961 Returntype : none
00962 Exceptions : none
00963 Caller : Internal
00964 Status : Stable
00965 =cut
00966
00967 sub _preload {
00968 my ($self) = @_;
00969 $self->annotations();
00970 $self->compara();
00971 $self->features();
00972 $self->other_alignments();
00973 $self->variations();
00974 return;
00975 }
00976
00977 =head2 _preload
00978 Description: Remove all children (used after hash transformation to ensure object is minimised)
00979 Returntype : none
00980 Exceptions : none
00981 Caller : dump_metadata.pl
00982 Status : Stable
00983 =cut
00984
00985 sub _unload {
00986 my ($self) = @_;
00987 $self->{annotations} = undef;
00988 $self->{compara} = undef;
00989 $self->{features} = undef;
00990 $self->{other_alignments} = undef;
00991 $self->{variations} = undef;
00992 return;
00993 }
00994
00995 1;