MetaDataDumper.pm
Go to the documentation of this file.
00001 
00002 =head1 LICENSE
00003 
00004 Copyright [2009-2016] EMBL-European Bioinformatics Institute
00005 
00006 Licensed under the Apache License, Version 2.0 (the "License");
00007 you may not use this file except in compliance with the License.
00008 You may obtain a copy of the License at
00009 
00010      http://www.apache.org/licenses/LICENSE-2.0
00011 
00012 Unless required by applicable law or agreed to in writing, software
00013 distributed under the License is distributed on an "AS IS" BASIS,
00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 See the License for the specific language governing permissions and
00016 limitations under the License.
00017 
00018 =head1 CONTACT
00019 
00020   Please email comments or questions to the public Ensembl
00021   developers list at <dev@ensembl.org>.
00022 
00023   Questions may also be sent to the Ensembl help desk at
00024   <helpdesk@ensembl.org>.
00025 
00026 =head1 NAME
00027 
00028 Bio::EnsEMBL::MetaData::MetaDataDumper
00029 
00030 =head1 SYNOPSIS
00031 
00032 # usage of concrete implementations
00033 my $dumper = Bio::EnsEMBL::MetaData::MetaDataDumper::MyDumper->new();
00034 
00035 # sequential dumping to multiple files in parallel
00036 my $opts = {
00037   division => ['EnsemblFungi','EnsemblProtists'];
00038   file => 'MyFile.txt'
00039 };
00040 # set to dump to common "all" file as well as division
00041 $dump_all = 1;
00042 # open files
00043 $dumper->start($opts->{division}, $dumper->{file}, $dump_all);
00044 for my $md (@metadata) {
00045   $dumper->write_metadata($md, $dump_all); # dump to divisions and 
00046 }
00047 # close files
00048 $dumper->end($opts->{division}, $dumper->{file}, $dump_all);
00049 
00050 # dumping in one go (can be expensive to render)
00051 $dumper->dump_metadata(\@metadata, "my_file.txt", ['EnsemblMetazoa'], $dump_all);
00052 
00053 =head1 DESCRIPTION
00054 
00055 Base class for rendering details from an instance of Bio::EnsEMBL::MetaData::GenomeInfo. 
00056 Designed for dumping to multiple per-division files in parallel including a common "all" 
00057 file, (using start, write_metadata and end) or for one-off use (using dump_metadata).
00058 
00059 =head1 SEE ALSO
00060 
00061 Bio::EnsEMBL::MetaData::MetaDataDumper::JsonMetaDataDumper
00062 Bio::EnsEMBL::MetaData::MetaDataDumper::TextMetaDataDumper
00063 Bio::EnsEMBL::MetaData::MetaDataDumper::TT2MetaDataDumper
00064 Bio::EnsEMBL::MetaData::MetaDataDumper::XMLMetaDataDumper
00065 Bio::EnsEMBL::MetaData::MetaDataDumper::UniProtReportDumper
00066 
00067 =head1 AUTHOR
00068 
00069 Dan Staines
00070 
00071 =cut
00072 
00073 package Bio::EnsEMBL::MetaData::MetaDataDumper;
00074 use Bio::EnsEMBL::Utils::Exception qw/throw/;
00075 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
00076 use Log::Log4perl qw(get_logger);
00077 use Data::Dumper;
00078 use Carp qw(croak cluck);
00079 use strict;
00080 use warnings;
00081 
00082 =head1 SUBROUTINES/METHODS
00083 
00084 =head2 new
00085   Description: Creates a new dumper object
00086   Returntype : Bio::EnsEMBL::MetaData::MetaDataDumper
00087   Exceptions : none
00088   Caller     : internal
00089   Status     : Stable
00090 
00091 =cut
00092 
00093 sub new {
00094   my ( $proto, @args ) = @_;
00095   my $class = ref($proto) || $proto;
00096   my $self = bless( {}, $class );
00097   $self->{logger} = get_logger();
00098   $self->{all}    = 'all';
00099   return $self;
00100 }
00101 
00102 =head2 dump_metadata
00103   Description: Write supplied metadata to file
00104   Arg        : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo
00105   Arg        : File to write to
00106   Arg        : Arrayref of divisions
00107   Arg        : Whether to dump to "all" file
00108   Returntype : none
00109   Exceptions : none
00110   Caller     : general
00111   Status     : Stable
00112 =cut
00113 
00114 sub dump_metadata {
00115   my ( $self, $metadata, $file, $divisions, $dump_all ) = @_;
00116   # start
00117   $self->start( $file, $divisions, $dump_all );
00118   # iterate
00119   for my $md (@$metadata) {
00120     if ( scalar(@$divisions) > 1 ) {
00121       $self->write_metadata( $md, $self->{all} );
00122     }
00123     $self->write_metadata( $md, $md->{division} );
00124   }
00125   # end
00126   $self->end();
00127   return;
00128 
00129 }
00130 
00131 =head2 write_metadata
00132   Arg        : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo
00133   Arg        : Division to write to
00134   Description: Write metadata to division files
00135   Returntype : none
00136   Exceptions : none
00137   Caller     : general
00138   Status     : Stable
00139 =cut
00140 
00141 sub write_metadata {
00142   my ( $self, $metadata, $division ) = @_;
00143   my $fh = $self->{files}{$division};
00144   if ( defined $fh ) {
00145     $self->_write_metadata_to_file( $metadata, $fh,
00146                                     $self->{count}->{$division} );
00147     $self->{count}->{$division} += 1;
00148   }
00149   return;
00150 }
00151 
00152 =head2 start
00153   Description: Start writing to output file(s)
00154   Arg        : Arrayref of strings representing divisions to dump
00155   Arg        : Basename of file to write to
00156   Arg        : Whether to dump to "all" file
00157   Returntype : none
00158   Exceptions : none
00159   Caller     : internal
00160   Status     : Stable
00161 =cut
00162 
00163 sub start {
00164   my ( $self, $divisions, $file, $dump_all ) = @_;
00165   $self->{files}     = {};
00166   $self->{filenames} = {};
00167   $self->logger()->debug("Opening output files");
00168   for my $division ( @{$divisions} ) {
00169     ( my $out_file = $file ) =~ s/(.+)(\.[^.]+)$/$1_$division$2/;
00170     my $fh;
00171     $self->logger()
00172       ->debug("Opening output file $out_file for division $division");
00173     open( $fh, '>', $out_file ) || croak "Could not open $out_file for writing";
00174     $self->{files}->{$division}     = $fh;
00175     $self->{filenames}->{$division} = $out_file;
00176     $self->{count}{$division}       = 0;
00177   }
00178   if ( defined $dump_all && $dump_all == 1 ) {
00179     my $fh;
00180     $self->logger()->debug("Opening output file $file");
00181     open( $fh, '>', $file ) || croak "Could not open $file for writing";
00182     $self->{files}->{ $self->{all} }     = $fh;
00183     $self->{filenames}->{ $self->{all} } = $file;
00184     $self->{count}{ $self->{all} }       = 0;
00185   }
00186   $self->{files_handles} = {};
00187   $self->logger()
00188     ->debug(
00189            "Opened " . scalar( values %{ $self->{files} } ) . " output files" );
00190   return;
00191 } ## end sub start
00192 
00193 =head2 end
00194   Description: Stop writing to output file(s) and close handles
00195   Returntype : none
00196   Exceptions : none
00197   Caller     : internal
00198   Status     : Stable
00199 =cut
00200 
00201 sub end {
00202   my ($self) = @_;
00203   $self->logger()->debug("Closing all file handles");
00204   for my $fh ( values %{ $self->{files} } ) {
00205     $self->logger()->debug("Closing file handle");
00206     close($fh) || cluck "Could not close file handle for writing";
00207   }
00208   $self->logger()
00209     ->debug(
00210            "Closed " . scalar( values %{ $self->{files} } ) . " file handles" );
00211   return;
00212 }
00213 
00214 =head2 _write_metadata_to_file
00215   Arg        : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo
00216   Arg        : File handle to write to
00217   Description: Stub for writing to a file - implement in subclasses
00218   Returntype : none
00219   Exceptions : none
00220   Caller     : internal
00221   Status     : Stable
00222 =cut
00223 
00224 sub _write_metadata_to_file {
00225   my ( $self, $metadata, $fh ) = @_;
00226   throw "Unimplemented subroutine do_dump() in " . ref($self) .
00227     ". Please implement";
00228   return;
00229 }
00230 
00231 =head1 INTERNAL METHODS
00232 =head2 logger
00233   Description: Get logger
00234   Returntype : Log4perl::Logger
00235   Exceptions : none
00236   Caller     : internal
00237   Status     : Stable
00238 =cut
00239 
00240 sub logger {
00241   my ($self) = @_;
00242   return $self->{logger};
00243 }
00244 
00245 =head2 yesno
00246   Description: Turn defined/integer into Y/N
00247   Arg        : Integer 1/0
00248   Returntype : String
00249   Exceptions : none
00250   Caller     : internal
00251   Status     : Stable
00252 =cut
00253 
00254 sub yesno {
00255   my ( $self, $num ) = @_;
00256   return ( defined $num && $num + 0 > 0 ) ? 'Y' : 'N';
00257 }
00258 
00259 =head2 metadata_to_hash
00260   Description: Turn metadata into hash
00261   Arg        : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo
00262   Returntype : Hashref
00263   Exceptions : none
00264   Caller     : internal
00265   Status     : Stable
00266 =cut
00267 
00268 sub metadata_to_hash {
00269   my ( $self, $metadata ) = @_;
00270   my $genomes = [];
00271   for my $md ( @{$metadata} ) {
00272     push @$genomes, $md->to_hash(1);
00273   }
00274   return { genome => $genomes };
00275 }
00276 
00277 1;