This is the input file which is of excel format:
A | B | C | D |
---|---|---|---|
APP | 1 | 210101 | 8.1 |
APP | 2 | 210102 | 8.2 |
APP | 3 | 210103 | 8.3 |
Data | 4 | 210104 | 8.4 |
Data | 5 | 210105 | 8.5 |
Data | 6 | 210106 | 8.6 |
How to generate output files of excel in below way using Perl?
output file 1 (contains only data from rows of app):
A | B | C |
---|---|---|
1 | 210101 | 8.1 |
2 | 210102 | 8.2 |
3 | 210103 | 8.3 |
Output file 2 (contains only data from rows of data):
A | B | C |
---|---|---|
4 | 210104 | 8.4 |
5 | 210105 | 8.5 |
6 | 210106 | 8.6 |
CodePudding user response:
Here is an example of how you can split the input file into multiple output files based on the value in column 0 of the input file:
package Main;
use v5.22.0; # experimental signatures requires perl >= 5.22
use feature qw(say);
use strict;
use warnings;
use experimental qw(signatures);
use Spreadsheet::ParseXLSX;
use Excel::CloneXLSX::Format qw(translate_xlsx_format);
use Excel::Writer::XLSX;
{
my $self = Main->new(
input_file => 'input.xlsx',
output_prefix => 'out',
);
my $worksheet = $self->scan_input_file();
$self->open_output_files();
my ( $row_min, $row_max ) = $worksheet->row_range();
my $col0 = 0; # column number that contains the save type
my @cols_to_save = (1..3);
for my $row ( $row_min .. $row_max ) {
my $cell = $worksheet->get_cell( $row, $col0 );
my $save_type = $cell->unformatted();
my $row = $self->get_row($worksheet, $row, \@cols_to_save);
$self->save_row( $save_type, $row);
}
$self->close_output_files();
say "Done.";
}
sub close_output_files( $self ) {
for my $file (keys %{$self->{files}}) {
my $workbook = $self->{files}{$file}{workbook};
$workbook->close();
}
}
sub save_row( $self, $save_type, $cells ) {
my $file = $self->{save_types}{$save_type};
my $workbook = $self->{files}{$file}{workbook};
my $worksheet = $self->{files}{$file}{worksheet};
my $row = $self->{files}{$file}{row};
my $col = 0;
for my $cell (@$cells) {
my $fmt = $cell->get_format();
my $fmt_props = translate_xlsx_format( $fmt );
my $new_format = $workbook->add_format(%$fmt_props);
my $value = $cell->unformatted() || '';
$worksheet->write($row, $col, $value, $new_format);
$col ;
}
$self->{files}{$file}{row} ;
}
sub get_row( $self, $worksheet, $row, $cols_to_save ) {
my @row;
for my $col (@$cols_to_save) {
my $cell = $worksheet->get_cell( $row, $col );
push @row, $cell;
}
return \@row;
}
sub new( $class, %args ) { bless \%args, $class }
sub scan_input_file( $self ) {
my $parser = Spreadsheet::ParseXLSX->new;
my $workbook = $parser->parse($self->{input_file});
my $worksheet = $workbook->worksheet(0);
my ( $row_min, $row_max ) = $worksheet->row_range();
my %save_types;
for my $row ( $row_min .. $row_max ) {
my $col0 = 0;
my $cell0 = $worksheet->get_cell( $row, $col0 );
my $save_type = $cell0->unformatted();
$save_types{$save_type} = 1;
}
$self->{save_types} = \%save_types;
return $worksheet;
}
sub open_output_files( $self ) {
my $save_types = $self->{save_types};
my $prefix = $self->{output_prefix};
my %file_info;
for my $type (keys %$save_types) {
my $fn = $prefix . "_" . $type . '.xlsx';
$save_types->{$type} = $fn;
my $workbook = Excel::Writer::XLSX->new( $fn );
my $worksheet = $workbook->add_worksheet();
$file_info{$fn} = {
workbook => $workbook,
worksheet => $worksheet,
row => 0, # current row number
};
}
$self->{files} = \%file_info;
}