The example comes from here.
The source file is example.cc
.
I was able to follow these instructions to build Arrow C on Ubuntu 22.04.1 LTS: out-of-source build with cmake ..
and make ..
.
And despite getting errors from running build_arrow.sh
and build_example.sh
, I managed to compile and run example.cc
without using the bash scripts:
g example.cc -o example -larrow
However, I got stuck trying to modify the options for the csv_reader on lines 40-42 of the source file example.cc
.
I would like to know how the options could be set, in particularly, how to use a char delimiter other than the default char delimiter = ','
for arrow::csv::ParseOptions
.
A complete, tested minimalist solution that could be run with the same shell commands above or simiar would be much appreciated, as would be references for further study.
For completeness, the source code is copied below in full (and I will delete it if license infringement has been inadvertently committed):
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <arrow/csv/api.h>
#include <arrow/io/api.h>
#include <arrow/ipc/api.h>
#include <arrow/pretty_print.h>
#include <arrow/result.h>
#include <arrow/status.h>
#include <arrow/table.h>
#include <iostream>
using arrow::Status;
namespace {
Status RunMain(int argc, char** argv) {
const char* csv_filename = "test.csv";
const char* arrow_filename = "test.arrow";
std::cerr << "* Reading CSV file '" << csv_filename << "' into table" << std::endl;
ARROW_ASSIGN_OR_RAISE(auto input_file, arrow::io::ReadableFile::Open(csv_filename));
ARROW_ASSIGN_OR_RAISE(auto csv_reader, arrow::csv::TableReader::Make(
arrow::io::default_io_context(), input_file,
arrow::csv::ReadOptions::Defaults(),
arrow::csv::ParseOptions::Defaults(),
arrow::csv::ConvertOptions::Defaults()));
ARROW_ASSIGN_OR_RAISE(auto table, csv_reader->Read());
std::cerr << "* Read table:" << std::endl;
ARROW_RETURN_NOT_OK(arrow::PrettyPrint(*table, {}, &std::cerr));
std::cerr << "* Writing table into Arrow IPC file '" << arrow_filename << "'"
<< std::endl;
ARROW_ASSIGN_OR_RAISE(auto output_file,
arrow::io::FileOutputStream::Open(arrow_filename));
ARROW_ASSIGN_OR_RAISE(auto batch_writer,
arrow::ipc::MakeFileWriter(output_file, table->schema()));
ARROW_RETURN_NOT_OK(batch_writer->WriteTable(*table));
ARROW_RETURN_NOT_OK(batch_writer->Close());
return Status::OK();
}
} // namespace
int main(int argc, char** argv) {
Status st = RunMain(argc, argv);
if (!st.ok()) {
std::cerr << st << std::endl;
return 1;
}
return 0;
}
Mimimal Reproducible Example (as suggested by relent95)
Step 1: Building Arrow C
Cf the instructions.
$ git clone https://github.com/apache/arrow.git
$ cd arrow/cpp
$ mkdir build # from inside the `cpp` subdirectory
$ cd build
$ cmake ..
$ make ..
Step 2: Compile and run arrow/cpp/examples/minimal_build/example.cc
Make folder build
inside arrow/cpp/examples/minimal_build/
$ # from inside the `arrow/cpp/examples/minimal_build/build` subdirectory
$ g ../example.cc -o example -larrow
Stay inside the build
folder, copy test.csv
, and run the executable example
$ cp ../test.csv .
$ ./example
The executable example
would run to produce the expected output test.arrow
.
Step 3: Get error from modifying ParseOptions
Make a copy of
example.cc
in thearrow/cpp/examples/minimal_build/build
subdirectory, rename itmyexample.cc
As suggested, add, on line 29 (above
namespace
), the two statements below:
auto parse_opts = arrow::csv::ParseOptions::Defaults(); parse_opts.delimiter = '\t';
Inside the
.../miniml_build/build
subdirectory, compilingmyexample.cc
withg myexample.cc -o myexample -larrow
would throw an error'parse_opts does not name a type
Comments: One can comment out the second statement on line 29, include the header file
<typeinfo>
, and printtypeid(parse_opts).name()
to see thatparse_opts
is of typeN5arrow3csv12ParseOptionsE
.For completeness and clarity, the code in
myexample.cc
is copied below in full:
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <arrow/csv/api.h>
#include <arrow/io/api.h>
#include <arrow/ipc/api.h>
#include <arrow/pretty_print.h>
#include <arrow/result.h>
#include <arrow/status.h>
#include <arrow/table.h>
#include <iostream>
using arrow::Status;
auto parse_opts = arrow::csv::ParseOptions::Defaults(); parse_opts.delimiter = '\t';
namespace {
Status RunMain(int argc, char** argv) {
const char* csv_filename = "test.csv";
const char* arrow_filename = "test.arrow";
std::cerr << "* Reading CSV file '" << csv_filename << "' into table" << std::endl;
ARROW_ASSIGN_OR_RAISE(auto input_file, arrow::io::ReadableFile::Open(csv_filename));
ARROW_ASSIGN_OR_RAISE(auto csv_reader, arrow::csv::TableReader::Make(
arrow::io::default_io_context(), input_file,
arrow::csv::ReadOptions::Defaults(),
arrow::csv::ParseOptions::Defaults(),
arrow::csv::ConvertOptions::Defaults()));
ARROW_ASSIGN_OR_RAISE(auto table, csv_reader->Read());
std::cerr << "* Read table:" << std::endl;
ARROW_RETURN_NOT_OK(arrow::PrettyPrint(*table, {}, &std::cerr));
std::cerr << "* Writing table into Arrow IPC file '" << arrow_filename << "'"
<< std::endl;
ARROW_ASSIGN_OR_RAISE(auto output_file,
arrow::io::FileOutputStream::Open(arrow_filename));
ARROW_ASSIGN_OR_RAISE(auto batch_writer,
arrow::ipc::MakeFileWriter(output_file, table->schema()));
ARROW_RETURN_NOT_OK(batch_writer->WriteTable(*table));
ARROW_RETURN_NOT_OK(batch_writer->Close());
return Status::OK();
}
} // namespace
int main(int argc, char** argv) {
Status st = RunMain(argc, argv);
if (!st.ok()) {
std::cerr << st << std::endl;
return 1;
}
return 0;
}
Here is the error message: the type error
CodePudding user response:
The ParseOptions
is a struct, so use the plain C syntax, like this.
auto parse_opts = arrow::csv::ParseOptions::Defaults();
parse_opts.delimiter = '\t';
ARROW_ASSIGN_OR_RAISE(auto csv_reader, arrow::csv::TableReader::Make(
...
parse_opts,
arrow::csv::ConvertOptions::Defaults()));