Permalink
Browse files

initial commit

  • Loading branch information...
0 parents commit 8eb0b708cc1d5f99415f7ec9c5e34d2cf01b645d @typester committed Jul 25, 2012
6 .gitignore
@@ -0,0 +1,6 @@
+Makefile
+README
+blib/
+inc/
+test.pl
+private-*
4 .shipit
@@ -0,0 +1,4 @@
+steps = FindVersion, ChangeVersion, CheckChangeLog, DistTest, Commit, Tag, MakeDist, UploadCPAN
+
+git.tagpattern = %v
+git.push_to = origin
3 .travis.yml
@@ -0,0 +1,3 @@
+language: "perl"
+before_install:
+ - perl Makefile.PL | cpanm
19 MANIFEST.SKIP
@@ -0,0 +1,19 @@
+\bRCS\b
+\bCVS\b
+^MANIFEST\.
+^Makefile$
+~$
+\.old$
+^blib/
+^pm_to_blib
+^MakeMaker-\d
+\.gz$
+\.cvsignore
+^9\d_.*\.t
+^\.git/
+^\.shipit$
+\.gitignore$
+/\.git/
+^\.gitmodules$
+^\.travis.yml$
+^README\.md$
38 Makefile.PL
@@ -0,0 +1,38 @@
+use strict;
+use warnings;
+
+BEGIN {
+ my @devmods = qw(
+ Module::Install::AuthorTests
+ Module::Install::ReadmeFromPod
+ Module::Install::Repository
+ );
+ my @not_available;
+
+ eval qq{use inc::Module::Install; 1;} or push @not_available, 'inc::Module::Install';
+ for my $mod (@devmods) {
+ eval qq{require $mod} or push @not_available, $mod;
+ }
+ if (@not_available) {
+ print qq{# The following modules are not available.\n};
+ print qq{# `$^X $0 | cpanm` will install them:\n};
+ print $_, "\n" for @not_available;
+ print "\n";
+ exit -1;
+ }
+}
+
+use inc::Module::Install;
+
+name 'Data-XLSX-Parser';
+all_from 'lib/Data/XLSX/Parser.pm';
+
+readme_from 'lib/Data/XLSX/Parser.pm';
+author_tests 'xt';
+auto_set_repository;
+
+requires 'Archive::Zip';
+requires 'XML::Parser::Expat';
+requires 'File::Temp';
+
+WriteAll;
61 lib/Data/XLSX/Parser.pm
@@ -0,0 +1,61 @@
+package Data::XLSX::Parser;
+use strict;
+use warnings;
+
+use Data::XLSX::Parser::DocumentArchive;
+use Data::XLSX::Parser::Workbook;
+use Data::XLSX::Parser::SharedStrings;
+use Data::XLSX::Parser::Styles;
+use Data::XLSX::Parser::Sheet;
+
+sub new {
+ my ($class) = @_;
+
+ bless {
+ _row_event_handler => [],
+ _archive => undef,
+ _workbook => undef,
+ _shared_strings => undef,
+ }, $class;
+}
+
+sub add_row_event_handler {
+ my ($self, $handler) = @_;
+ push @{ $self->{_row_event_handler} }, $handler;
+}
+
+sub open {
+ my ($self, $file) = @_;
+ $self->{_archive} = Data::XLSX::Parser::DocumentArchive->new($file);
+}
+
+sub workbook {
+ my ($self) = @_;
+ $self->{_workbook} ||= Data::XLSX::Parser::Workbook->new($self->{_archive});
+}
+
+sub shared_strings {
+ my ($self) = @_;
+ $self->{_shared_strings} ||= Data::XLSX::Parser::SharedStrings->new($self->{_archive});
+}
+
+sub styles {
+ my ($self) = @_;
+ $self->{_styles} ||= Data::XLSX::Parser::Styles->new($self->{_archive});
+}
+
+sub sheet {
+ my ($self, $sheet_id) = @_;
+ $self->{_sheet} ||= Data::XLSX::Parser::Sheet->new($self, $self->{_archive}, $sheet_id);
+}
+
+sub _row_event {
+ my ($self, $row) = @_;
+
+ my $row_vals = [map { $_->{v} } @$row];
+ for my $handler (@{ $self->{_row_event_handler} }) {
+ $handler->($row_vals);
+ }
+}
+
+1;
40 lib/Data/XLSX/Parser/DocumentArchive.pm
@@ -0,0 +1,40 @@
+package Data::XLSX::Parser::DocumentArchive;
+use strict;
+use warnings;
+
+use Archive::Zip;
+
+sub new {
+ my ($class, $filename) = @_;
+
+ my $zip = Archive::Zip->new;
+ if ($zip->read($filename) != Archive::Zip::AZ_OK) {
+ die "Cannot open file: $filename";
+ }
+
+ bless {
+ _zip => $zip,
+ }, $class;
+}
+
+sub workbook {
+ my ($self) = @_;
+ $self->{_zip}->memberNamed('xl/workbook.xml');
+}
+
+sub sheet {
+ my ($self, $id) = @_;
+ $self->{_zip}->memberNamed(sprintf 'xl/worksheets/sheet%s.xml', $id);
+}
+
+sub shared_strings {
+ my ($self) = @_;
+ $self->{_zip}->memberNamed('xl/sharedStrings.xml');
+}
+
+sub styles {
+ my ($self) = @_;
+ $self->{_zip}->memberNamed('xl/styles.xml');
+}
+
+1;
66 lib/Data/XLSX/Parser/SharedStrings.pm
@@ -0,0 +1,66 @@
+package Data::XLSX::Parser::SharedStrings;
+use strict;
+use warnings;
+
+use XML::Parser::Expat;
+use Archive::Zip ();
+use File::Temp;
+
+sub new {
+ my ($class, $archive) = @_;
+
+ my $self = bless {
+ _data => [],
+
+ _is_string => 0,
+ _buf => '',
+ }, $class;
+
+ my $fh = File::Temp->new( SUFFIX => '.xml' );
+
+ my $handle = $archive->shared_strings or return $self;
+ die 'Failed to write temporally file: ', $fh->filename
+ unless $handle->extractToFileNamed($fh->filename) == Archive::Zip::AZ_OK;
+
+ my $parser = XML::Parser::Expat->new;
+ $parser->setHandlers(
+ Start => sub { $self->_start(@_) },
+ End => sub { $self->_end(@_) },
+ Char => sub { $self->_char(@_) },
+ );
+ $parser->parse($fh);
+
+ $self;
+}
+
+sub count {
+ my ($self) = @_;
+ scalar @{ $self->{_data} };
+}
+
+sub get {
+ my ($self, $index) = @_;
+ $self->{_data}->[$index];
+}
+
+sub _start {
+ my ($self, $parser, $name, %attrs) = @_;
+ $self->{_is_string} = 1 if $name eq 'si';
+}
+
+sub _end {
+ my ($self, $parser, $name) = @_;
+ $self->{_is_string} = 0;
+
+ if ($name eq 'si') {
+ push @{ $self->{_data} }, $self->{_buf};
+ $self->{_buf} = '';
+ }
+}
+
+sub _char {
+ my ($self, $parser, $data) = @_;
+ $self->{_buf} .= $data if $self->{_is_string};
+}
+
+1;
158 lib/Data/XLSX/Parser/Sheet.pm
@@ -0,0 +1,158 @@
+package Data::XLSX::Parser::Sheet;
+use strict;
+use warnings;
+
+use File::Temp;
+use XML::Parser::Expat;
+use Archive::Zip ();
+
+use constant {
+ STYLE_IDX => 'i',
+ STYLE => 's',
+ FMT => 'f',
+ REF => 'r',
+ COLUMN => 'c',
+ VALUE => 'v',
+ TYPE => 't',
+ TYPE_SHARED_STRING => 's',
+ GENERATED_CELL => 'g',
+};
+
+sub new {
+ my ($class, $doc, $archive, $sheet_id) = @_;
+
+ my $self = bless {
+ _document => $doc,
+
+ _data => '',
+ _is_sheetdata => 0,
+ _row_count => 0,
+ _current_row => [],
+ _cell => undef,
+ _is_value => 0,
+
+ _shared_strings => $doc->shared_strings,
+ _styles => $doc->styles,
+
+ }, $class;
+
+ my $fh = File::Temp->new( SUFFIX => '.xml' );
+
+ my $handle = $archive->sheet($sheet_id);
+ die 'Failed to write temporally file: ', $fh->filename
+ unless $handle->extractToFileNamed($fh->filename) == Archive::Zip::AZ_OK;
+
+ my $parser = XML::Parser::Expat->new;
+ $parser->setHandlers(
+ Start => sub { $self->_start(@_) },
+ End => sub { $self->_end(@_) },
+ Char => sub { $self->_char(@_) },
+ );
+ $parser->parse($fh);
+
+ $self;
+}
+
+sub _start {
+ my ($self, $parser, $name, %attrs) = @_;
+
+ if ($name eq 'sheetData') {
+ $self->{_is_sheetdata} = 1;
+ }
+ elsif ($self->{_is_sheetdata} and $name eq 'row') {
+ $self->{_current_row} = [];
+ }
+ elsif ($name eq 'c') {
+ $self->{_cell} = {
+ STYLE_IDX() => $attrs{ STYLE_IDX() },
+ TYPE() => $attrs{ TYPE() },
+ REF() => $attrs{ REF() },
+ COLUMN() => scalar(@{ $self->{_current_row} }) + 1,
+ };
+ }
+ elsif ($name eq 'v') {
+ $self->{_is_value} = 1;
+ }
+}
+
+sub _end {
+ my ($self, $parser, $name) = @_;
+
+ if ($name eq 'sheetData') {
+ $self->{_is_sheetdata} = 0;
+ }
+ elsif ($self->{_is_sheetdata} and $name eq 'row') {
+ $self->{_row_count}++;
+ $self->{_document}->_row_event( delete $self->{_current_row} );
+ }
+ elsif ($name eq 'c') {
+ my $c = $self->{_cell};
+ $self->_parse_rel($c);
+
+ if (($c->{ TYPE() } || '') eq TYPE_SHARED_STRING()) {
+ my $idx = int($self->{_data});
+ $c->{ VALUE() } = $self->{_shared_strings}->get($idx);
+ }
+ else {
+ $c->{ VALUE() } = $self->{_data};
+ }
+
+ $c->{ STYLE() } = $self->{_styles}->cell_style( $c->{ STYLE_IDX() } );
+ $c->{ FMT() } = my $cell_type =
+ $self->{_styles}->cell_type_from_style($c->{ STYLE() });
+
+ my $v = $c->{ VALUE() };
+ if (defined $v and $c->{ FMT() } =~ /^datetime\.(date)?(time)?$/) {
+ # datetime
+ warn 'datetime';
+ }
+ else {
+ if (!defined $v) {
+ $c->{ VALUE() } = '';
+ }
+ elsif ($cell_type ne 'unicode') {
+ warn 'not unicode';
+ $c->{ VALUE() } = $v;
+ }
+ }
+
+ push @{ $self->{_current_row} }, $c;
+
+ $self->{_data} = '';
+ $self->{_cell} = undef;
+ }
+ elsif ($name eq 'v') {
+ $self->{_is_value} = 0;
+ }
+}
+
+sub _char {
+ my ($self, $parser, $data) = @_;
+
+ if ($self->{_is_value}) {
+ $self->{_data} .= $data;
+ }
+}
+
+sub _parse_rel {
+ my ($self, $cell) = @_;
+
+ my ($column, $row) = $cell->{ REF() } =~ /([A-Z]+)(\d+)/;
+
+ my $v = 0;
+ my $i = 0;
+ for my $ch (split '', $column) {
+ my $s = length($column) - $i++ - 1;
+ $v += (ord($ch) - ord('A') + 1) * (26**$s);
+ }
+
+ $cell->{ REF() } = [$v, $row];
+
+ if ($cell->{ COLUMN() } > $v) {
+ die sprintf 'Detected smaller index than current cell, something is wrong! (row %s): %s <> %s', $row, $v, $cell->{ COLUMN() };
+ }
+}
+
+1;
+
+
139 lib/Data/XLSX/Parser/Styles.pm
@@ -0,0 +1,139 @@
+package Data::XLSX::Parser::Styles;
+use strict;
+use warnings;
+
+use XML::Parser::Expat;
+use Archive::Zip ();
+use File::Temp;
+
+use constant BUILTIN_FMT => 0;
+use constant BUILTIN_TYPE => 1;
+
+use constant BUILTIN_NUM_FMTS => [
+ ['@', 'unicode'], # 0x00
+ ['0', 'int'], # 0x01
+ ['0.00', 'float'], # 0x02
+ ['#,##0', 'float'], # 0x03
+ ['#,##0.00', 'float'], # 0x04
+ ['($#,##0_);($#,##0)', 'float'], # 0x05
+ ['($#,##0_);[RED]($#,##0)', 'float'], # 0x06
+ ['($#,##0.00_);($#,##0.00_)', 'float'], # 0x07
+ ['($#,##0.00_);[RED]($#,##0.00_)', 'float'], # 0x08
+ ['0%', 'int'], # 0x09
+ ['0.00%', 'float'], # 0x0a
+ ['0.00E+00', 'float'], # 0x0b
+ ['# ?/?', 'float'], # 0x0c
+ ['# ??/??', 'float'], # 0x0d
+ ['m-d-yy', 'datetime.date'], # 0x0e
+ ['d-mmm-yy', 'datetime.date'], # 0x0f
+ ['d-mmm', 'datetime.date'], # 0x10
+ ['mmm-yy', 'datetime.date'], # 0x11
+ ['h:mm AM/PM', 'datetime.time'], # 0x12
+ ['h:mm:ss AM/PM', 'datetime.time'], # 0x13
+ ['h:mm', 'datetime.time'], # 0x14
+ ['h:mm:ss', 'datetime.time'], # 0x15
+ ['m-d-yy h:mm', 'datetime.datetime'], # 0x16
+ #0x17-0x24 -- Differs in Natinal
+ undef, # 0x17
+ undef, # 0x18
+ undef, # 0x19
+ undef, # 0x1a
+ undef, # 0x1b
+ undef, # 0x1c
+ undef, # 0x1d
+ undef, # 0x1e
+ undef, # 0x1f
+ undef, # 0x20
+ undef, # 0x21
+ undef, # 0x22
+ undef, # 0x23
+ undef, # 0x24
+ ['(#,##0_);(#,##0)', 'int'], # 0x25
+ ['(#,##0_);[RED](#,##0)', 'int'], # 0x26
+ ['(#,##0.00);(#,##0.00)', 'float'], # 0x27
+ ['(#,##0.00);[RED](#,##0.00)', 'float'], # 0x28
+ ['_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)', 'float'], # 0x29
+ ['_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)', 'float'], # 0x2a
+ ['_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)', 'float'], # 0x2b
+ ['_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)', 'float'], # 0x2c
+ ['mm:ss', 'datetime.timedelta'], # 0x2d
+ ['[h]:mm:ss', 'datetime.timedelta'], # 0x2e
+ ['mm:ss.0', 'datetime.timedelta'], # 0x2f
+ ['##0.0E+0', 'float'], # 0x30
+ ['@', 'unicode'], # 0x31
+];
+
+sub new {
+ my ($class, $archive) = @_;
+
+ my $self = bless {
+ _number_formats => [],
+
+ _is_cell_xfs => 0,
+ _current_style => undef,
+ }, $class;
+
+ my $fh = File::Temp->new( SUFFIX => '.xml' );
+
+ my $handle = $archive->styles;
+ die 'Failed to write temporally file: ', $fh->filename
+ unless $handle->extractToFileNamed($fh->filename) == Archive::Zip::AZ_OK;
+
+ my $parser = XML::Parser::Expat->new;
+ $parser->setHandlers(
+ Start => sub { $self->_start(@_) },
+ End => sub { $self->_end(@_) },
+ Char => sub { },
+ );
+ $parser->parse($fh);
+
+ $self;
+}
+
+sub cell_style {
+ my ($self, $style_id) = @_;
+ $style_id ||= 0;
+ $self->{_number_formats}[int $style_id];
+}
+
+sub cell_type_from_style {
+ my ($self, $style) = @_;
+ BUILTIN_NUM_FMTS->[ $style->{numFmt} ][BUILTIN_TYPE];
+}
+
+sub cell_format_from_style {
+ my ($self, $style) = @_;
+ BUILTIN_NUM_FMTS->[ $style->{numFmt} ][BUILTIN_FMT];
+}
+
+sub _start {
+ my ($self, $parser, $name, %attrs) = @_;
+
+ if ($name eq 'cellXfs') {
+ $self->{_is_cell_xfs} = 1;
+ }
+ elsif ($self->{_is_cell_xfs} and $name eq 'xf') {
+ $self->{_current_style} = {
+ numFmt => int($attrs{numFmtId}) || 0,
+ exists $attrs{fontId} ? ( font => $attrs{fontId} ) : (),
+ exists $attrs{fillId} ? ( fill => $attrs{fillId} ) : (),
+ exists $attrs{borderId} ? ( border => $attrs{borderId} ) : (),
+ exists $attrs{xfId} ? ( xf => $attrs{xfId} ) : (),
+ exists $attrs{applyFont} ? ( applyFont => $attrs{applyFont} ) : (),
+ exists $attrs{applyNumberFormat} ? ( applyNumFmt => $attrs{applyNumberFormat} ) : (),
+ };
+ }
+}
+
+sub _end {
+ my ($self, $parser, $name) = @_;
+
+ if ($name eq 'cellXfs') {
+ $self->{_is_cell_xfs} = 0;
+ }
+ elsif ($self->{_current_style} and $name eq 'xf') {
+ push @{ $self->{_number_formats } }, delete $self->{_current_style};
+ }
+}
+
+1;
56 lib/Data/XLSX/Parser/Workbook.pm
@@ -0,0 +1,56 @@
+package Data::XLSX::Parser::Workbook;
+use strict;
+use warnings;
+
+use XML::Parser::Expat;
+use Archive::Zip ();
+use File::Temp;
+
+sub new {
+ my ($class, $archive) = @_;
+
+ my $self = bless [], $class;
+
+ my $fh = File::Temp->new( SUFFIX => '.xml' );
+
+ my $handle = $archive->workbook;
+ die 'Failed to write temporally file: ', $fh->filename
+ unless $handle->extractToFileNamed($fh->filename) == Archive::Zip::AZ_OK;
+
+ my $parser = XML::Parser::Expat->new;
+ $parser->setHandlers(
+ Start => sub { $self->_start(@_) },
+ End => sub {},
+ Char => sub {},
+ );
+ $parser->parse($fh);
+
+ $self;
+}
+
+sub names {
+ my ($self) = @_;
+ map { $_->{name} } @$self;
+}
+
+sub sheet_id {
+ my ($self, $name) = @_;
+
+ my ($meta) = grep { $_->{name} eq $name } @$self
+ or return;
+
+ if ($meta->{'r:id'}) {
+ (my $r = $meta->{'r:id'}) =~ s/^rId//;
+ return $r;
+ }
+ else {
+ return $meta->{sheetId};
+ }
+}
+
+sub _start {
+ my ($self, $parser, $el, %attr) = @_;
+ push @$self, \%attr if $el eq 'sheet';
+}
+
+1;
33 t/1_____loreyna126.t
@@ -0,0 +1,33 @@
+use strict;
+use warnings;
+use utf8;
+
+use FindBin;
+use Test::More;
+
+use Data::XLSX::Parser;
+
+my $parser = Data::XLSX::Parser->new;
+
+my $fn = __FILE__;
+$fn =~ s{t$}{xlsx};
+
+$parser->open($fn);
+
+my @sheets = $parser->workbook->names;
+is scalar @sheets, 3, '3 worksheets ok';
+
+is $sheets[0], 'POST_DSENDS', 'sheet1 name ok';
+
+my $cells = [];
+$parser->add_row_event_handler(sub {
+ my ($row) = @_;
+ push @$cells, $row;
+});
+
+$parser->sheet(1);
+
+is $cells->[112][0], 'RCS Thrust Vector Uncertainties ', 'val ok';
+
+done_testing;
+
BIN t/1_____loreyna126.xlsx
Binary file not shown.
36 t/2_____with_chart.t
@@ -0,0 +1,36 @@
+use strict;
+use warnings;
+use utf8;
+
+use FindBin;
+use Test::More;
+
+use Data::XLSX::Parser;
+
+my $parser = Data::XLSX::Parser->new;
+
+my $fn = __FILE__;
+$fn =~ s{t$}{xlsx};
+
+$parser->open($fn);
+
+my @sheets = $parser->workbook->names;
+is scalar @sheets, 4, '4 worksheets ok';
+
+is $sheets[0], 'Tabelle1', 'sheet1 name ok';
+
+my $cells = [];
+$parser->add_row_event_handler(sub {
+ my ($row) = @_;
+ push @$cells, $row;
+});
+
+$parser->sheet($parser->workbook->sheet_id($sheets[0]));
+
+is $cells->[0][0], 1, 'val 0,0 ok';
+is $cells->[0][1], 10, 'val 0,1 ok';
+is $cells->[1][0], 2, 'val 1,0 ok';
+is $cells->[1][1], 20, 'val 1,1 ok';
+
+done_testing;
+
BIN t/2_____with_chart.xlsx
Binary file not shown.
23 xt/sharedstrings.t
@@ -0,0 +1,23 @@
+use strict;
+use warnings;
+use utf8;
+use FindBin;
+
+use Test::More;
+
+use_ok 'Data::XLSX::Parser';
+
+my $parser = Data::XLSX::Parser->new;
+isa_ok $parser, 'Data::XLSX::Parser';
+
+$parser->open("$FindBin::Bin/../private-data-20120717.xlsx");
+
+my $shared_strings = $parser->shared_strings;
+
+is $shared_strings->count, 8161, 'count ok';
+
+is $shared_strings->get(1), '問題文', 'get 1 ok';
+is $shared_strings->get(1000), 'リオデジャネイロ', 'get 1000 ok';
+
+
+done_testing;
24 xt/sheet.t
@@ -0,0 +1,24 @@
+use strict;
+use warnings;
+use utf8;
+use FindBin;
+
+use Test::More;
+
+use_ok 'Data::XLSX::Parser';
+
+my $parser = Data::XLSX::Parser->new;
+isa_ok $parser, 'Data::XLSX::Parser';
+
+$parser->open("$FindBin::Bin/../private-data-20120717.xlsx");
+
+my $workbook = $parser->workbook;
+isa_ok $workbook, 'Data::XLSX::Parser::Workbook';
+
+my @names = $workbook->names;
+is scalar @names, 2, '2 workbook ok';
+
+my $sheet_id = $workbook->sheet_id($names[0]);
+my $sheet = $parser->sheet($sheet_id);
+
+done_testing;
19 xt/styles.t
@@ -0,0 +1,19 @@
+use strict;
+use warnings;
+use utf8;
+use FindBin;
+
+use Test::More;
+
+use_ok 'Data::XLSX::Parser';
+
+my $parser = Data::XLSX::Parser->new;
+isa_ok $parser, 'Data::XLSX::Parser';
+
+$parser->open("$FindBin::Bin/../private-data-20120717.xlsx");
+
+my $styles = $parser->styles;
+
+
+
+done_testing;
22 xt/workbook.t
@@ -0,0 +1,22 @@
+use strict;
+use warnings;
+use Test::More;
+use FindBin;
+
+use_ok 'Data::XLSX::Parser';
+
+my $parser = Data::XLSX::Parser->new;
+isa_ok $parser, 'Data::XLSX::Parser';
+
+$parser->open("$FindBin::Bin/../private-data-20120717.xlsx");
+
+my $workbook = $parser->workbook;
+isa_ok $workbook, 'Data::XLSX::Parser::Workbook';
+
+my @names = $workbook->names;
+is scalar @names, 2, '2 workbook ok';
+
+is $workbook->sheet_id($names[0]), 1, 'sheet_id 1 ok';
+is $workbook->sheet_id($names[1]), 2, 'sheet_id 2 ok';
+
+done_testing;

0 comments on commit 8eb0b70

Please sign in to comment.