00001 00002 =head1 LICENSE 00003 00004 Copyright [2009-2016] EMBL-European Bioinformatics Institute 00005 00006 Licensed under the Apache License, Version 2.0 (the "License"); 00007 you may not use this file except in compliance with the License. 00008 You may obtain a copy of the License at 00009 00010 http://www.apache.org/licenses/LICENSE-2.0 00011 00012 Unless required by applicable law or agreed to in writing, software 00013 distributed under the License is distributed on an "AS IS" BASIS, 00014 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 See the License for the specific language governing permissions and 00016 limitations under the License. 00017 00018 =head1 CONTACT 00019 00020 Please email comments or questions to the public Ensembl 00021 developers list at <dev@ensembl.org>. 00022 00023 Questions may also be sent to the Ensembl help desk at 00024 <helpdesk@ensembl.org>. 00025 00026 =head1 NAME 00027 00028 Bio::EnsEMBL::MetaData::MetaDataDumper 00029 00030 =head1 SYNOPSIS 00031 00032 # usage of concrete implementations 00033 my $dumper = Bio::EnsEMBL::MetaData::MetaDataDumper::MyDumper->new(); 00034 00035 # sequential dumping to multiple files in parallel 00036 my $opts = { 00037 division => ['EnsemblFungi','EnsemblProtists']; 00038 file => 'MyFile.txt' 00039 }; 00040 # set to dump to common "all" file as well as division 00041 $dump_all = 1; 00042 # open files 00043 $dumper->start($opts->{division}, $dumper->{file}, $dump_all); 00044 for my $md (@metadata) { 00045 $dumper->write_metadata($md, $dump_all); # dump to divisions and 00046 } 00047 # close files 00048 $dumper->end($opts->{division}, $dumper->{file}, $dump_all); 00049 00050 # dumping in one go (can be expensive to render) 00051 $dumper->dump_metadata(\@metadata, "my_file.txt", ['EnsemblMetazoa'], $dump_all); 00052 00053 =head1 DESCRIPTION 00054 00055 Base class for rendering details from an instance of Bio::EnsEMBL::MetaData::GenomeInfo. 00056 Designed for dumping to multiple per-division files in parallel including a common "all" 00057 file, (using start, write_metadata and end) or for one-off use (using dump_metadata). 00058 00059 =head1 SEE ALSO 00060 00061 Bio::EnsEMBL::MetaData::MetaDataDumper::JsonMetaDataDumper 00062 Bio::EnsEMBL::MetaData::MetaDataDumper::TextMetaDataDumper 00063 Bio::EnsEMBL::MetaData::MetaDataDumper::TT2MetaDataDumper 00064 Bio::EnsEMBL::MetaData::MetaDataDumper::XMLMetaDataDumper 00065 Bio::EnsEMBL::MetaData::MetaDataDumper::UniProtReportDumper 00066 00067 =head1 AUTHOR 00068 00069 Dan Staines 00070 00071 =cut 00072 00073 package Bio::EnsEMBL::MetaData::MetaDataDumper; 00074 use Bio::EnsEMBL::Utils::Exception qw/throw/; 00075 use Bio::EnsEMBL::Utils::Argument qw(rearrange); 00076 use Log::Log4perl qw(get_logger); 00077 use Data::Dumper; 00078 use Carp qw(croak cluck); 00079 use strict; 00080 use warnings; 00081 00082 =head1 SUBROUTINES/METHODS 00083 00084 =head2 new 00085 Description: Creates a new dumper object 00086 Returntype : Bio::EnsEMBL::MetaData::MetaDataDumper 00087 Exceptions : none 00088 Caller : internal 00089 Status : Stable 00090 00091 =cut 00092 00093 sub new { 00094 my ( $proto, @args ) = @_; 00095 my $class = ref($proto) || $proto; 00096 my $self = bless( {}, $class ); 00097 $self->{logger} = get_logger(); 00098 $self->{all} = 'all'; 00099 return $self; 00100 } 00101 00102 =head2 dump_metadata 00103 Description: Write supplied metadata to file 00104 Arg : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo 00105 Arg : File to write to 00106 Arg : Arrayref of divisions 00107 Arg : Whether to dump to "all" file 00108 Returntype : none 00109 Exceptions : none 00110 Caller : general 00111 Status : Stable 00112 =cut 00113 00114 sub dump_metadata { 00115 my ( $self, $metadata, $file, $divisions, $dump_all ) = @_; 00116 # start 00117 $self->start( $file, $divisions, $dump_all ); 00118 # iterate 00119 for my $md (@$metadata) { 00120 if ( scalar(@$divisions) > 1 ) { 00121 $self->write_metadata( $md, $self->{all} ); 00122 } 00123 $self->write_metadata( $md, $md->{division} ); 00124 } 00125 # end 00126 $self->end(); 00127 return; 00128 00129 } 00130 00131 =head2 write_metadata 00132 Arg : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo 00133 Arg : Division to write to 00134 Description: Write metadata to division files 00135 Returntype : none 00136 Exceptions : none 00137 Caller : general 00138 Status : Stable 00139 =cut 00140 00141 sub write_metadata { 00142 my ( $self, $metadata, $division ) = @_; 00143 my $fh = $self->{files}{$division}; 00144 if ( defined $fh ) { 00145 $self->_write_metadata_to_file( $metadata, $fh, 00146 $self->{count}->{$division} ); 00147 $self->{count}->{$division} += 1; 00148 } 00149 return; 00150 } 00151 00152 =head2 start 00153 Description: Start writing to output file(s) 00154 Arg : Arrayref of strings representing divisions to dump 00155 Arg : Basename of file to write to 00156 Arg : Whether to dump to "all" file 00157 Returntype : none 00158 Exceptions : none 00159 Caller : internal 00160 Status : Stable 00161 =cut 00162 00163 sub start { 00164 my ( $self, $divisions, $file, $dump_all ) = @_; 00165 $self->{files} = {}; 00166 $self->{filenames} = {}; 00167 $self->logger()->debug("Opening output files"); 00168 for my $division ( @{$divisions} ) { 00169 ( my $out_file = $file ) =~ s/(.+)(\.[^.]+)$/$1_$division$2/; 00170 my $fh; 00171 $self->logger() 00172 ->debug("Opening output file $out_file for division $division"); 00173 open( $fh, '>', $out_file ) || croak "Could not open $out_file for writing"; 00174 $self->{files}->{$division} = $fh; 00175 $self->{filenames}->{$division} = $out_file; 00176 $self->{count}{$division} = 0; 00177 } 00178 if ( defined $dump_all && $dump_all == 1 ) { 00179 my $fh; 00180 $self->logger()->debug("Opening output file $file"); 00181 open( $fh, '>', $file ) || croak "Could not open $file for writing"; 00182 $self->{files}->{ $self->{all} } = $fh; 00183 $self->{filenames}->{ $self->{all} } = $file; 00184 $self->{count}{ $self->{all} } = 0; 00185 } 00186 $self->{files_handles} = {}; 00187 $self->logger() 00188 ->debug( 00189 "Opened " . scalar( values %{ $self->{files} } ) . " output files" ); 00190 return; 00191 } ## end sub start 00192 00193 =head2 end 00194 Description: Stop writing to output file(s) and close handles 00195 Returntype : none 00196 Exceptions : none 00197 Caller : internal 00198 Status : Stable 00199 =cut 00200 00201 sub end { 00202 my ($self) = @_; 00203 $self->logger()->debug("Closing all file handles"); 00204 for my $fh ( values %{ $self->{files} } ) { 00205 $self->logger()->debug("Closing file handle"); 00206 close($fh) || cluck "Could not close file handle for writing"; 00207 } 00208 $self->logger() 00209 ->debug( 00210 "Closed " . scalar( values %{ $self->{files} } ) . " file handles" ); 00211 return; 00212 } 00213 00214 =head2 _write_metadata_to_file 00215 Arg : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo 00216 Arg : File handle to write to 00217 Description: Stub for writing to a file - implement in subclasses 00218 Returntype : none 00219 Exceptions : none 00220 Caller : internal 00221 Status : Stable 00222 =cut 00223 00224 sub _write_metadata_to_file { 00225 my ( $self, $metadata, $fh ) = @_; 00226 throw "Unimplemented subroutine do_dump() in " . ref($self) . 00227 ". Please implement"; 00228 return; 00229 } 00230 00231 =head1 INTERNAL METHODS 00232 =head2 logger 00233 Description: Get logger 00234 Returntype : Log4perl::Logger 00235 Exceptions : none 00236 Caller : internal 00237 Status : Stable 00238 =cut 00239 00240 sub logger { 00241 my ($self) = @_; 00242 return $self->{logger}; 00243 } 00244 00245 =head2 yesno 00246 Description: Turn defined/integer into Y/N 00247 Arg : Integer 1/0 00248 Returntype : String 00249 Exceptions : none 00250 Caller : internal 00251 Status : Stable 00252 =cut 00253 00254 sub yesno { 00255 my ( $self, $num ) = @_; 00256 return ( defined $num && $num + 0 > 0 ) ? 'Y' : 'N'; 00257 } 00258 00259 =head2 metadata_to_hash 00260 Description: Turn metadata into hash 00261 Arg : Arrayref of Bio::EnsEMBL::MetaData::GenomeInfo 00262 Returntype : Hashref 00263 Exceptions : none 00264 Caller : internal 00265 Status : Stable 00266 =cut 00267 00268 sub metadata_to_hash { 00269 my ( $self, $metadata ) = @_; 00270 my $genomes = []; 00271 for my $md ( @{$metadata} ) { 00272 push @$genomes, $md->to_hash(1); 00273 } 00274 return { genome => $genomes }; 00275 } 00276 00277 1;