[glom] Import: Made import working again, changed import logic
- From: Michael Hasselmann <mikhas src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [glom] Import: Made import working again, changed import logic
- Date: Fri, 2 Oct 2009 09:03:53 +0000 (UTC)
commit 88e168968b960b6eca94f10b003076730e7c62a4
Author: Michael Hasselmann <michaelh openismus com>
Date: Thu Oct 1 20:40:08 2009 +0200
Import: Made import working again, changed import logic
* glom/import_csv/csv_parser.[h|cc] (fetch_next_row): This method allows
iterating over the parser's cache without the need for a max row/column
count. Potential issue with valid empty rows during import needs to be fixed.
* glom/import_csv/dialog_import_csv.[h|cc]: Removed logic based on row counts.
It turned out that most places didn't need access to get_row_count() anyway.
* glom/import_csv/dialog_import_csv_progess.cc: Switched import to use the
parser's new fetch_next_row() API. Not tested for large files yet.
ChangeLog | 14 +++++
glom/import_csv/csv_parser.cc | 54 ++++++++++++++------
glom/import_csv/csv_parser.h | 12 ++++-
glom/import_csv/dialog_import_csv.cc | 65 ++++++++++++-------------
glom/import_csv/dialog_import_csv.h | 15 ++++--
glom/import_csv/dialog_import_csv_progress.cc | 27 ++++++-----
tests/import/test_parsing.cc | 2 +-
7 files changed, 122 insertions(+), 67 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 9b9c9ba..de1c401 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2009-10-01 Michael Hasselmann <michaelh openismus com>
+
+ Import: Made import working again, changed import logic
+
+ * glom/import_csv/csv_parser.[h|cc] (fetch_next_row): This method allows
+ iterating over the parser's cache without the need for a max row/column
+ count. Potential issue with valid empty rows during import needs to be fixed.
+
+ * glom/import_csv/dialog_import_csv.[h|cc]: Removed logic based on row counts.
+ It turned out that most places didn't need access to get_row_count() anyway.
+
+ * glom/import_csv/dialog_import_csv_progess.cc: Switched import to use the
+ parser's new fetch_next_row() API. Not tested for large files yet.
+
2009-09-29 Murray Cumming <murrayc murrayc com>
Doubly-related Records: Correct the SQL.
diff --git a/glom/import_csv/csv_parser.cc b/glom/import_csv/csv_parser.cc
index 938e2a1..363cddd 100644
--- a/glom/import_csv/csv_parser.cc
+++ b/glom/import_csv/csv_parser.cc
@@ -59,7 +59,8 @@ CsvParser::CsvParser(const std::string& encoding_charset)
m_line_number(0),
m_state(STATE_NONE),
m_stream(),
- m_rows()
+ m_rows(),
+ m_row_index(0)
{
}
@@ -87,30 +88,20 @@ CsvParser::State CsvParser::get_state() const
return m_state;
}
-guint CsvParser::get_rows_count() const
-{
- return m_rows.size();
-}
-
bool CsvParser::get_rows_empty() const
{
return m_rows.empty();
}
-
-/// Get the number of columns of data in this row.
-guint CsvParser::get_cols_count(guint row_number) const
-{
- if(row_number >= m_rows.size())
- return 0;
-
- return m_rows[row_number].size();
-}
-
const Glib::ustring& CsvParser::get_data(guint row, guint col)
{
static Glib::ustring empty_result;
+ // TODO: Why do we complain here? We cannot (and don't need to) restrict
+ // our clients to only call this method with valid rows & cols, it falls in
+ // our responsibility to check the access, which we do. Handing out an
+ // empty result is perfectly fine!
+
if(row >= m_rows.size())
{
std::cerr << "CsvParser::get_data(): row out of range." << std::endl;
@@ -127,6 +118,37 @@ const Glib::ustring& CsvParser::get_data(guint row, guint col)
return row_data[col];
}
+const CsvParser::type_row_strings CsvParser::fetch_next_row()
+{
+ const type_row_strings empty;
+
+ g_return_val_if_fail(m_state == (CsvParser::STATE_PARSING | CsvParser::STATE_PARSED), empty);
+
+ // We cannot fetch the next row, but since we are still parsing we might just have to parse a bit more!
+ if(m_state == CsvParser::STATE_PARSING && m_row_index >= m_rows.size())
+ {
+ on_idle_parse();
+ // The final recursion guard is m_state == CsvParser::STATE_PARSING
+ return fetch_next_row();
+ }
+
+ if(m_state == CsvParser::STATE_PARSED && m_row_index >= m_rows.size())
+ {
+ return empty;
+ }
+
+ if(m_row_index < m_rows.size())
+ {
+ return m_rows[m_row_index++];
+ }
+
+ g_return_val_if_reached(empty);
+}
+
+void CsvParser::reset_row_index()
+{
+ m_row_index = 0;
+}
CsvParser::type_signal_file_read_error CsvParser::signal_file_read_error() const
{
diff --git a/glom/import_csv/csv_parser.h b/glom/import_csv/csv_parser.h
index 93554aa..186fb5a 100644
--- a/glom/import_csv/csv_parser.h
+++ b/glom/import_csv/csv_parser.h
@@ -66,17 +66,25 @@ public:
/// Get the current state of the parser.
State get_state() const;
+/*
/// Get the number of rows parsed so far.
guint get_rows_count() const;
+*/
bool get_rows_empty() const;
+/*
/// Get the number of columns of data in this row.
guint get_cols_count(guint row_number) const;
+*/
- //The nasty reference return is for performance.
+ // The nasty reference return is for performance.
const Glib::ustring& get_data(guint row, guint col);
+ const type_row_strings fetch_next_row();
+
+ void reset_row_index();
+
// Signals:
typedef sigc::signal<void, const Glib::ustring&> type_signal_file_read_error;
@@ -178,6 +186,8 @@ private:
// Parsed data:
type_rows m_rows;
+ guint m_row_index;
+
type_signal_file_read_error m_signal_file_read_error;
type_signal_have_display_name m_signal_have_display_name;
type_signal_encoding_error m_signal_encoding_error;
diff --git a/glom/import_csv/dialog_import_csv.cc b/glom/import_csv/dialog_import_csv.cc
index a623539..cd2dedd 100644
--- a/glom/import_csv/dialog_import_csv.cc
+++ b/glom/import_csv/dialog_import_csv.cc
@@ -62,7 +62,8 @@ namespace Glom
Dialog_Import_CSV::Dialog_Import_CSV(BaseObjectType* cobject, const Glib::RefPtr<Gtk::Builder>& builder)
: Gtk::Dialog(cobject),
- m_auto_detect_encoding()
+ m_auto_detect_encoding(),
+ m_cols_count(-1)
{
builder->get_widget("import_csv_fields", m_sample_view);
builder->get_widget("import_csv_target_table", m_target_table);
@@ -207,23 +208,20 @@ void Dialog_Import_CSV::import(const Glib::ustring& uri, const Glib::ustring& in
m_field_model_sorted = Gtk::TreeModelSort::create(m_field_model);
m_field_model_sorted->set_sort_column(m_field_columns.m_col_field_name, Gtk::SORT_ASCENDING);
+ m_filename = uri;
m_parser->set_file_and_start_parsing(uri);
}
}
+// TODO:remove me.
guint Dialog_Import_CSV::get_row_count() const
{
- const guint parser_count = m_parser->get_rows_count();
-
- if(m_first_line_as_title->get_active() && parser_count > 1)
- return parser_count - 1;
- else
- return parser_count;
+ return -1;
}
guint Dialog_Import_CSV::get_column_count() const
{
- return m_fields.size();
+ return m_cols_count;
}
sharedptr<const Field> Dialog_Import_CSV::get_field_for_column(guint col) const
@@ -239,6 +237,11 @@ const Glib::ustring& Dialog_Import_CSV::get_data(guint row, guint col)
return m_parser->get_data(row, col);
}
+CsvParser& Dialog_Import_CSV::get_parser()
+{
+ return *(m_parser.get());
+}
+
void Dialog_Import_CSV::clear()
{
// TODO: Do we explicitely need to cancel async operations?
@@ -324,11 +327,8 @@ void Dialog_Import_CSV::on_first_line_as_title_toggled()
// Add another row to the end, if one is loaded.
const guint last_index = m_sample_model->children().size();
- if(last_index < m_parser->get_rows_count())
- {
- iter = m_sample_model->append();
- (*iter)[m_sample_columns.m_col_row] = last_index;
- }
+ iter = m_sample_model->append();
+ (*iter)[m_sample_columns.m_col_row] = last_index;
}
}
else
@@ -339,7 +339,9 @@ void Dialog_Import_CSV::on_first_line_as_title_toggled()
Gtk::TreeModel::Path path("1");
Gtk::TreeModel::iterator iter = m_sample_model->get_iter(path);
- if((!iter || (*iter)[m_sample_columns.m_col_row] != 0) && !m_parser->get_rows_empty() && m_sample_rows->get_value_as_int() > 0)
+ //if((!iter || (*iter)[m_sample_columns.m_col_row] != 0) && !m_parser->get_rows_empty() && m_sample_rows->get_value_as_int() > 0)
+ if((!iter || (*iter)[m_sample_columns.m_col_row] != 0) &&
+ m_sample_rows->get_value_as_int() > 0)
{
// Add first row to model
if(!iter)
@@ -389,8 +391,7 @@ void Dialog_Import_CSV::on_sample_rows_changed()
if(m_first_line_as_title->get_active())
++row_index;
- const guint rows_count = m_parser->get_rows_count();
- for(guint i = current_sample_rows; i < new_sample_rows && row_index < rows_count; ++i, ++row_index)
+ for(guint i = current_sample_rows; i < new_sample_rows; ++i, ++row_index)
{
Gtk::TreeModel::iterator iter = m_sample_model->append();
(*iter)[m_sample_columns.m_col_row] = row_index;
@@ -469,12 +470,12 @@ void Dialog_Import_CSV::on_parser_encoding_error()
/*
* No, this is wrong. Creating the tree model and handling a line from the CSV file are two separate steps. Proposal: Construct tree model *after* parsing, using row[0].
*/
-void Dialog_Import_CSV::on_parser_line_scanned(CsvParser::type_row_strings /*row*/, unsigned int row_number)
+void Dialog_Import_CSV::on_parser_line_scanned(CsvParser::type_row_strings row, unsigned int row_number)
{
// This is the first line read if there is no model yet:
if(!m_sample_model)
{
- setup_sample_model(row_number);
+ setup_sample_model(row);
Gtk::TreeModel::iterator iter = m_sample_model->append();
// -1 means the row to select target fields (see special handling in cell data funcs)
(*iter)[m_sample_columns.m_col_row] = -1;
@@ -486,26 +487,24 @@ void Dialog_Import_CSV::on_parser_line_scanned(CsvParser::type_row_strings /*row
const guint sample_rows = m_sample_model->children().size() - 1;
// Don't add if this is the first line and m_first_line_as_title is active:
- const guint parser_rows_count = m_parser->get_rows_count();
if(row_number > 1 || !m_first_line_as_title->get_active())
{
if(sample_rows < static_cast<guint>(m_sample_rows->get_value_as_int()))
{
Gtk::TreeModel::iterator tree_iter = m_sample_model->append();
- (*tree_iter)[m_sample_columns.m_col_row] = parser_rows_count - 1;
+ (*tree_iter)[m_sample_columns.m_col_row] = row_number;
}
}
}
-void Dialog_Import_CSV::setup_sample_model(guint data_row_number)
+void Dialog_Import_CSV::setup_sample_model(CsvParser::type_row_strings& row)
{
m_sample_model = Gtk::ListStore::create(m_sample_columns);
m_sample_view->set_model(m_sample_model);
// Create field vector that contains the fields into which to import
// the data.
- //m_fields.resize(row.size());
- m_fields.resize(m_parser->get_rows_count());
+ m_fields.resize(row.size());
// Start with a column showing the line number.
Gtk::CellRendererText* text = Gtk::manage(new Gtk::CellRendererText);
@@ -514,25 +513,26 @@ void Dialog_Import_CSV::setup_sample_model(guint data_row_number)
col->set_cell_data_func(*text, sigc::mem_fun(*this, &Dialog_Import_CSV::line_data_func));
m_sample_view->append_column(*col);
- const guint cols_count = m_parser->get_cols_count(data_row_number);
- for(guint i = 0; i < cols_count; ++ i)
+ m_cols_count = row.size();
+
+ for(guint i = 0; i < m_cols_count; ++ i)
{
- const Glib::ustring& data = m_parser->get_data(data_row_number, i);
- m_sample_view->append_column(*Gtk::manage(column_factory(data, i)));
+ const Glib::ustring& data = row[i];
+ m_sample_view->append_column(*Gtk::manage(create_sample_column(data, i)));
}
}
-Gtk::TreeViewColumn* Dialog_Import_CSV::column_factory(const Glib::ustring& title, guint index)
+Gtk::TreeViewColumn* Dialog_Import_CSV::create_sample_column(const Glib::ustring& title, guint index)
{
Gtk::TreeViewColumn* col = new Gtk::TreeViewColumn(title);
- Gtk::CellRendererCombo* cell = cell_factory(index);
+ Gtk::CellRendererCombo* cell = create_sample_cell(index);
col->pack_start(*Gtk::manage(cell), true);
col->set_cell_data_func(*cell, sigc::bind(sigc::mem_fun(*this, &Dialog_Import_CSV::field_data_func), index));
col->set_sizing(Gtk::TREE_VIEW_COLUMN_AUTOSIZE);
return col;
}
-Gtk::CellRendererCombo* Dialog_Import_CSV::cell_factory(guint index)
+Gtk::CellRendererCombo* Dialog_Import_CSV::create_sample_cell(guint index)
{
Gtk::CellRendererCombo* cell = new Gtk::CellRendererCombo;
#ifdef GLIBMM_PROPERTIES_ENABLED
@@ -594,7 +594,7 @@ void Dialog_Import_CSV::field_data_func(Gtk::CellRenderer* renderer, const Gtk::
{
sharedptr<Field> field = m_fields[column_number];
- if(row != -1 && (unsigned int)row < m_parser->get_rows_count())
+ if(row != -1) // && static_cast<unsigned int>(row) < m_parser->get_rows_count())
{
const Glib::ustring& orig_text = m_parser->get_data(row, column_number);
@@ -605,7 +605,6 @@ void Dialog_Import_CSV::field_data_func(Gtk::CellRenderer* renderer, const Gtk::
/* Exported data is always stored in postgres format */
bool success = false;
const Gnome::Gda::Value value = field->from_file_format(orig_text, success);
-
if(!success)
text = _("<Import Failure>");
else
@@ -663,7 +662,7 @@ void Dialog_Import_CSV::on_field_edited(const Glib::ustring& path, const Glib::u
if(vec_field_iter != m_fields.end()) *vec_field_iter = sharedptr<Field>();
m_fields[column_number] = field;
-
+
// Update the rows, so they are redrawn, doing a conversion to the new type.
const Gtk::TreeNodeChildren& sample_children = m_sample_model->children();
// Create a TreeModel::Path with initial index 0. We need a TreeModel::Path for the row_changed() call
diff --git a/glom/import_csv/dialog_import_csv.h b/glom/import_csv/dialog_import_csv.h
index a2e8bc8..da724fb 100644
--- a/glom/import_csv/dialog_import_csv.h
+++ b/glom/import_csv/dialog_import_csv.h
@@ -57,6 +57,8 @@ public:
sharedptr<const Field> get_field_for_column(unsigned int col) const;
const Glib::ustring& get_data(unsigned int row, unsigned int col);
+ CsvParser& get_parser();
+
typedef sigc::signal<void> type_signal_state_changed;
/** This signal will be emitted when the parser's state changes.
@@ -71,9 +73,9 @@ private:
Glib::ustring get_current_encoding() const;
void begin_parse();
- void setup_sample_model(guint data_row_number);
- Gtk::TreeViewColumn* column_factory(const Glib::ustring& title, guint index);
- Gtk::CellRendererCombo* cell_factory(guint index);
+ void setup_sample_model(CsvParser::type_row_strings& row);
+ Gtk::TreeViewColumn* create_sample_column(const Glib::ustring& title, guint index);
+ Gtk::CellRendererCombo* create_sample_cell(guint index);
//CellRenderer cell_data_func callbacks:
void line_data_func(Gtk::CellRenderer* renderer, const Gtk::TreeModel::iterator& iter);
@@ -103,7 +105,7 @@ private:
Gtk::TreeModelColumn<Glib::ustring> m_col_name;
Gtk::TreeModelColumn<Glib::ustring> m_col_charset;
};
-
+
class FieldColumns: public Gtk::TreeModelColumnRecord
{
public:
@@ -148,6 +150,11 @@ private:
// auto-detection is disabled.
int m_auto_detect_encoding;
+ // The first row decides the amount of columns in our model, during model
+ // setup. We implicitly fill up every row that is shorter, and cut longer
+ // rows.
+ guint m_cols_count;
+
// The fields into which to import the data:
typedef std::vector< sharedptr<Field> > type_vec_fields;
type_vec_fields m_fields;
diff --git a/glom/import_csv/dialog_import_csv_progress.cc b/glom/import_csv/dialog_import_csv_progress.cc
index 511bc70..bcc818f 100644
--- a/glom/import_csv/dialog_import_csv_progress.cc
+++ b/glom/import_csv/dialog_import_csv_progress.cc
@@ -97,8 +97,7 @@ void Dialog_Import_CSV_Progress::add_text(const Glib::ustring& text)
void Dialog_Import_CSV_Progress::begin_import()
{
- //Note to translators: This is a progress indication, showing how many rows have been imported, of the total number of rows.
- m_progress_bar->set_text(Glib::ustring::compose(_("%1 / %2"), m_current_row, m_data_source->get_row_count()));
+ m_progress_bar->set_text("0");
m_progress_bar->set_fraction(0.0);
m_progress_connection = Glib::signal_idle().connect(sigc::mem_fun(*this, &Dialog_Import_CSV_Progress::on_idle_import));
@@ -128,11 +127,13 @@ void Dialog_Import_CSV_Progress::on_state_changed()
bool Dialog_Import_CSV_Progress::on_idle_import()
{
- //Note to translators: This is a progress indication, showing how many rows have been imported, of the total number of rows.
- m_progress_bar->set_text(Glib::ustring::compose(_("%1 / %2"), m_current_row, m_data_source->get_row_count()));
- m_progress_bar->set_fraction(static_cast<double>(m_current_row) / static_cast<double>(m_data_source->get_row_count()));
+ //Note to translators: This is a progress indication, showing how many rows have been imported.
+ //TODO: replace with pulsing progress bar, or have a heuristic based on file size.
+ m_progress_bar->set_text(Glib::ustring::format(m_current_row));
- if(m_current_row == m_data_source->get_row_count())
+ const CsvParser::type_row_strings row = m_data_source->get_parser().fetch_next_row();
+
+ if(row.empty())
{
// Don't do the response immediately, so the user has a chance to read the
// warnings and errors, if any.
@@ -143,16 +144,18 @@ bool Dialog_Import_CSV_Progress::on_idle_import()
}
// Update the current row values map:
- for(unsigned int i = 0; i < m_data_source->get_column_count(); ++ i)
+ guint col_index = 0;
+ for(CsvParser::type_row_strings::const_iterator iter = row.begin();
+ iter != row.end();
+ ++iter)
{
- sharedptr<const Field> field = m_data_source->get_field_for_column(i);
+ sharedptr<const Field> field = m_data_source->get_field_for_column(col_index++);
if(field)
{
// We always assume exported data is in standard CSV format, since
// we export it this way.
- const Glib::ustring str = m_data_source->get_data(m_current_row, i);
bool success = false;
- Gnome::Gda::Value value = field->from_file_format(str, success);
+ Gnome::Gda::Value value = field->from_file_format(*iter, success);
if(success)
{
@@ -174,7 +177,7 @@ bool Dialog_Import_CSV_Progress::on_idle_import()
}
else
{
- const Glib::ustring message(Glib::ustring::compose(_("Warning: Importing row %1: The value for field %2, \"%3\" could not be converted to the field's type. The value will not be imported.\n"), m_current_row + 1, field->get_name(), m_data_source->get_data(m_current_row, i)));
+ const Glib::ustring message(Glib::ustring::compose(_("Warning: Importing row %1: The value for field %2, \"%3\" could not be converted to the field's type. The value will not be imported.\n"), m_current_row + 1, field->get_name(), *iter));
add_text(message);
}
}
@@ -196,7 +199,7 @@ bool Dialog_Import_CSV_Progress::on_idle_import()
if(!get_field_value_is_unique(m_table_name, layout_item, primary_key_value))
primary_key_value = Gnome::Gda::Value();
}
-
+
if(Glom::Conversions::value_is_empty(primary_key_value))
{
Glib::ustring message(Glib::ustring::compose(_("Error importing row %1: Cannot import the row because the primary key is empty.\n"), m_current_row + 1));
diff --git a/tests/import/test_parsing.cc b/tests/import/test_parsing.cc
index adf40c1..565d23b 100644
--- a/tests/import/test_parsing.cc
+++ b/tests/import/test_parsing.cc
@@ -20,7 +20,7 @@ type_tokens& get_tokens_instance()
void on_line_scanned(const std::vector<Glib::ustring>& row, guint /*line_number*/)
{
//std::cout << "debug: on_line_scanned(): row.size()=" << row.size() << std::endl;
-
+
for(std::vector<Glib::ustring>::const_iterator iter = row.begin();
iter != row.end();
++iter)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]