Skip to content

Commit

Permalink
Add support for changeset discussions in the dump.
Browse files Browse the repository at this point in the history
This adds support for an *additional* dump of changesets which
includes discussions. This will be the new format and deprecates
the previous format, but we'll allow a few months after the
announcement for the previous format to be sunsetted.

The format is the same as getting changesets with discussions from
the XML API.
  • Loading branch information
zerebubuth committed Feb 21, 2015
1 parent 365449c commit b1f6638
Show file tree
Hide file tree
Showing 17 changed files with 168 additions and 41 deletions.
8 changes: 6 additions & 2 deletions include/changeset_filter.hpp
Expand Up @@ -14,10 +14,14 @@
*/
template <typename T>
struct changeset_filter : public output_writer {
changeset_filter(const std::string &, const boost::program_options::variables_map &, const user_map_t &, const boost::posix_time::ptime &);
changeset_filter(const std::string &, const boost::program_options::variables_map &,
const user_map_t &, const boost::posix_time::ptime &,
bool include_discussions);
virtual ~changeset_filter();

void changesets(const std::vector<changeset> &, const std::vector<current_tag> &);
void changesets(const std::vector<changeset> &,
const std::vector<current_tag> &,
const std::vector<changeset_comment> &);
void nodes(const std::vector<node> &, const std::vector<old_tag> &);
void ways(const std::vector<way> &, const std::vector<way_node> &, const std::vector<old_tag> &);
void relations(const std::vector<relation> &, const std::vector<relation_member> &, const std::vector<old_tag> &);
Expand Down
4 changes: 3 additions & 1 deletion include/history_filter.hpp
Expand Up @@ -16,7 +16,9 @@ struct history_filter : public output_writer {
history_filter(const std::string &, const boost::program_options::variables_map &, const user_map_t &, const boost::posix_time::ptime &);
virtual ~history_filter();

void changesets(const std::vector<changeset> &, const std::vector<current_tag> &);
void changesets(const std::vector<changeset> &,
const std::vector<current_tag> &,
const std::vector<changeset_comment> &);
void nodes(const std::vector<node> &, const std::vector<old_tag> &);
void ways(const std::vector<way> &, const std::vector<way_node> &, const std::vector<old_tag> &);
void relations(const std::vector<relation> &, const std::vector<relation_member> &, const std::vector<old_tag> &);
Expand Down
4 changes: 3 additions & 1 deletion include/output_writer.hpp
Expand Up @@ -20,7 +20,9 @@ struct output_writer : private boost::noncopyable {
// dump a chunk of elements. included are the associated tags and other
// inner types for that element. the chunk will be already ordered and
// the inner types ordered by the (id, version) of their element.
virtual void changesets(const std::vector<changeset> &, const std::vector<current_tag> &) = 0;
virtual void changesets(const std::vector<changeset> &,
const std::vector<current_tag> &,
const std::vector<changeset_comment> &) = 0;
virtual void nodes(const std::vector<node> &, const std::vector<old_tag> &) = 0;
virtual void ways(const std::vector<way> &, const std::vector<way_node> &, const std::vector<old_tag> &) = 0;
virtual void relations(const std::vector<relation> &, const std::vector<relation_member> &, const std::vector<old_tag> &) = 0;
Expand Down
4 changes: 3 additions & 1 deletion include/pbf_writer.hpp
Expand Up @@ -14,7 +14,9 @@ class pbf_writer : public output_writer {
pbf_writer(const std::string &, const boost::program_options::variables_map &, const user_map_t &, const boost::posix_time::ptime &, bool = false);
virtual ~pbf_writer();

void changesets(const std::vector<changeset> &, const std::vector<current_tag> &);
void changesets(const std::vector<changeset> &,
const std::vector<current_tag> &,
const std::vector<changeset_comment> &);
void nodes(const std::vector<node> &, const std::vector<old_tag> &);
void ways(const std::vector<way> &, const std::vector<way_node> &, const std::vector<old_tag> &);
void relations(const std::vector<relation> &, const std::vector<relation_member> &, const std::vector<old_tag> &);
Expand Down
1 change: 1 addition & 0 deletions include/table_extractor.hpp
Expand Up @@ -16,6 +16,7 @@ template <> boost::posix_time::ptime timestamp_of<changeset>(const changeset &cs
template <> boost::posix_time::ptime timestamp_of<node>(const node &n) { return n.timestamp; }
template <> boost::posix_time::ptime timestamp_of<way>(const way &w) { return w.timestamp; }
template <> boost::posix_time::ptime timestamp_of<relation>(const relation &r) { return r.timestamp; }
template <> boost::posix_time::ptime timestamp_of<changeset_comment>(const changeset_comment &cc) { return cc.created_at; }

template <typename R>
struct table_extractor_with_timestamp {
Expand Down
21 changes: 20 additions & 1 deletion include/types.hpp
Expand Up @@ -57,6 +57,25 @@ BOOST_FUSION_ADAPT_STRUCT(
(std::string, value)
)

struct changeset_comment {
static const int num_keys = 2;
static const std::vector<std::string> &column_names();

int64_t changeset_id, author_id;
std::string body;
boost::posix_time::ptime created_at;
bool visible;
};

BOOST_FUSION_ADAPT_STRUCT(
changeset_comment,
(int64_t, changeset_id)
(boost::posix_time::ptime, created_at)
(int64_t, author_id)
(std::string, body)
(bool, visible)
)

struct changeset {
static const int num_keys = 1;
static const std::vector<std::string> &column_names();
Expand All @@ -65,7 +84,7 @@ struct changeset {
static const std::string inner_table_name();

typedef current_tag tag_type;
typedef int inner_type;
typedef changeset_comment inner_type;

int64_t id;
int32_t uid;
Expand Down
8 changes: 6 additions & 2 deletions include/xml_writer.hpp
Expand Up @@ -14,10 +14,14 @@ class xml_writer : public output_writer {
public:
typedef changeset_map changeset_map_t;

xml_writer(const std::string &, const boost::program_options::variables_map &, const user_map_t &, const boost::posix_time::ptime &, bool = false);
xml_writer(const std::string &, const boost::program_options::variables_map &, const user_map_t &,
const boost::posix_time::ptime &max_time,
bool has_history = false, bool has_changeset_discussions = false);
virtual ~xml_writer();

void changesets(const std::vector<changeset> &, const std::vector<current_tag> &);
void changesets(const std::vector<changeset> &,
const std::vector<current_tag> &,
const std::vector<changeset_comment> &);
void nodes(const std::vector<node> &, const std::vector<old_tag> &);
void ways(const std::vector<way> &, const std::vector<way_node> &, const std::vector<old_tag> &);
void relations(const std::vector<relation> &, const std::vector<relation_member> &, const std::vector<old_tag> &);
Expand Down
12 changes: 8 additions & 4 deletions src/changeset_filter.cpp
@@ -1,22 +1,26 @@

#include "changeset_filter.hpp"
#include <boost/date_time/posix_time/posix_time.hpp>

#include "xml_writer.hpp"

template <typename T>
changeset_filter<T>::changeset_filter(const std::string &option_name, const boost::program_options::variables_map &options,
const user_map_t &user_map, const boost::posix_time::ptime &max_time)
: m_writer(new T(option_name, options, user_map, max_time, false)) {
const user_map_t &user_map, const boost::posix_time::ptime &max_time,
bool include_discussions)
: m_writer(new T(option_name, options, user_map, max_time, false, include_discussions)) {
}

template <typename T>
changeset_filter<T>::~changeset_filter() {
}

template <typename T>
void changeset_filter<T>::changesets(const std::vector<changeset> &cs, const std::vector<current_tag> &ts) {
void changeset_filter<T>::changesets(const std::vector<changeset> &cs,
const std::vector<current_tag> &ts,
const std::vector<changeset_comment> &ccs) {
// no filtering for changesets - we want them.
m_writer->changesets(cs, ts);
m_writer->changesets(cs, ts, ccs);
}

template <typename T>
Expand Down
10 changes: 7 additions & 3 deletions src/copy_elements.cpp
Expand Up @@ -57,6 +57,7 @@ struct control_block {
std::vector<T> elements;
std::vector<tag_type> tags;
std::vector<inner_type> inners;
std::vector<changeset_comment> comments;
};

template <typename T>
Expand Down Expand Up @@ -185,18 +186,21 @@ template <> inline void zero_init<current_tag>(current_tag &t) { t.element_id =
template <> inline void zero_init<old_tag>(old_tag &t) { t.element_id = 0; }
template <> inline void zero_init<way_node>(way_node &wn) { wn.way_id = 0; }
template <> inline void zero_init<relation_member>(relation_member &rm) { rm.relation_id = 0; }
template <> inline void zero_init<changeset_comment>(changeset_comment &cc) { cc.changeset_id = 0; }
template <> inline void zero_init<int>(int &) { }

template <> inline int64_t id_of<current_tag>(const current_tag &t) { return t.element_id; }
template <> inline int64_t id_of<old_tag>(const old_tag &t) { return t.element_id; }
template <> inline int64_t id_of<way_node>(const way_node &wn) { return wn.way_id; }
template <> inline int64_t id_of<relation_member>(const relation_member &rm) { return rm.relation_id; }
template <> inline int64_t id_of<changeset_comment>(const changeset_comment &cc) { return cc.changeset_id; }

template <typename T>
inline int64_t version_of(const T &t) { return t.version; }

template <> inline int64_t version_of<changeset>(const changeset &) { return 0; }
template <> inline int64_t version_of<current_tag>(const current_tag &t) { return 0; }
template <> inline int64_t version_of<changeset_comment>(const changeset_comment &) { return 0; }

template <typename T>
inline void fetch_associated(T &t, int64_t id, int64_t version, db_reader<T> &reader, std::vector<T> &vec) {
Expand Down Expand Up @@ -233,7 +237,7 @@ void extract_element(thread_writer<T> &writer) {
std::vector<T> elements;
std::vector<tag_type> tags;
std::vector<inner_type> inners;

elements.resize(block_size);
size_t i = 0;

Expand Down Expand Up @@ -267,8 +271,8 @@ void extract_element(thread_writer<T> &writer) {

template <typename T> void write_elements(output_writer &writer, control_block<T> &blk);

template <> inline void write_elements<changeset>(output_writer &writer, control_block<changeset> &blk) {
writer.changesets(blk.elements, blk.tags);
template <> inline void write_elements<changeset>(output_writer &writer, control_block<changeset> &blk) {
writer.changesets(blk.elements, blk.tags, blk.inners);
}
template <> inline void write_elements<node>(output_writer &writer, control_block<node> &blk) {
writer.nodes(blk.elements, blk.tags);
Expand Down
1 change: 1 addition & 0 deletions src/dump_archive.cpp
Expand Up @@ -123,3 +123,4 @@ template struct run_thread<way>;
template struct run_thread<way_node>;
template struct run_thread<relation>;
template struct run_thread<relation_member>;
template struct run_thread<changeset_comment>;
1 change: 1 addition & 0 deletions src/extract_kv.cpp
Expand Up @@ -173,3 +173,4 @@ template struct extract_kv<way>;
template struct extract_kv<way_node>;
template struct extract_kv<relation>;
template struct extract_kv<relation_member>;
template struct extract_kv<changeset_comment>;
6 changes: 4 additions & 2 deletions src/history_filter.cpp
Expand Up @@ -18,10 +18,12 @@ history_filter<T>::~history_filter() {
}

template <typename T>
void history_filter<T>::changesets(const std::vector<changeset> &cs, const std::vector<current_tag> &ts) {
void history_filter<T>::changesets(const std::vector<changeset> &cs,
const std::vector<current_tag> &ts,
const std::vector<changeset_comment> &ccs) {
// no filtering for changesets - they are all "current", and all get passed
// through to the backend.
m_writer->changesets(cs, ts);
m_writer->changesets(cs, ts, ccs);
}

template <typename T>
Expand Down
1 change: 1 addition & 0 deletions src/insert_kv.cpp
Expand Up @@ -176,3 +176,4 @@ template void insert_kv<way>(way &, const slice_t &, const slice_t &);
template void insert_kv<way_node>(way_node &, const slice_t &, const slice_t &);
template void insert_kv<relation>(relation &, const slice_t &, const slice_t &);
template void insert_kv<relation_member>(relation_member &, const slice_t &, const slice_t &);
template void insert_kv<changeset_comment>(changeset_comment &, const slice_t &, const slice_t &);
4 changes: 3 additions & 1 deletion src/pbf_writer.cpp
Expand Up @@ -526,7 +526,9 @@ pbf_writer::pbf_writer(const std::string &file_name, const boost::program_option
pbf_writer::~pbf_writer() {
}

void pbf_writer::changesets(const std::vector<changeset> &cs, const std::vector<current_tag> &) {
void pbf_writer::changesets(const std::vector<changeset> &cs,
const std::vector<current_tag> &,
const std::vector<changeset_comment> &) {
std::map<int64_t, int64_t> &changeset_user_map = m_impl->m_changeset_user_map;
BOOST_FOREACH(const changeset &c, cs) {
changeset_user_map.insert(std::make_pair(c.id, c.uid));
Expand Down
20 changes: 15 additions & 5 deletions src/planet-dump.cpp
Expand Up @@ -35,6 +35,8 @@ static void get_options(int argc, char **argv, po::variables_map &vm) {
("pbf,p", po::value<std::string>(), "planet PBF output file (without history)")
("history-pbf,P", po::value<std::string>(), "history PBF output file")
("changesets,C", po::value<std::string>(), "changeset XML output file")
("changeset-discussions,D", po::value<std::string>(),
"changeset discussions XML output file")
("dense-nodes,d", po::value<bool>()->default_value("true"), "use dense nodes for PBF output")
("dump-file,f", po::value<std::string>(), "PostgreSQL table dump to read")
;
Expand All @@ -53,10 +55,13 @@ static void get_options(int argc, char **argv, po::variables_map &vm) {

if ((vm.count("xml") + vm.count("history-xml") +
vm.count("pbf") + vm.count("history-pbf") +
vm.count("changesets")) == 0) {
BOOST_THROW_EXCEPTION(std::runtime_error("No output file provided! You must provide one or more of "
"--xml, --history-xml, --pbf, --history-pbf or --changesets "
"to get output."));
vm.count("changesets") + vm.count("changeset-discussions")) == 0) {
std::cerr <<
"No output file provided! You must provide one or more of "
"--xml, --history-xml, --pbf, --history-pbf, --changesets or "
"--changeset-discussions to get output.\n\n";
std::cerr << desc << std::endl;
exit(1);
}
}

Expand All @@ -82,6 +87,7 @@ bt::ptime setup_leveldb_databases(const std::string &dump_file) {
threads.push_back(boost::make_shared<run_thread<relation_member> >("relation_members", dump_file));

threads.push_back(boost::make_shared<run_thread<user> >("users", dump_file));
threads.push_back(boost::make_shared<run_thread<changeset_comment> >("changeset_comments", dump_file));

bt::ptime max_time(bt::neg_infin);
BOOST_FOREACH(boost::shared_ptr<base_thread> &thr, threads) {
Expand Down Expand Up @@ -134,7 +140,11 @@ int main(int argc, char *argv[]) {
}
if (options.count("changesets")) {
std::string output_file = options["changesets"].as<std::string>();
writers.push_back(boost::shared_ptr<output_writer>(new changeset_filter<xml_writer>(output_file, options, display_name_map, max_time)));
writers.push_back(boost::shared_ptr<output_writer>(new changeset_filter<xml_writer>(output_file, options, display_name_map, max_time, false)));
}
if (options.count("changeset-discussions")) {
std::string output_file = options["changeset-discussions"].as<std::string>();
writers.push_back(boost::shared_ptr<output_writer>(new changeset_filter<xml_writer>(output_file, options, display_name_map, max_time, true)));
}

std::cerr << "Writing changesets..." << std::endl;
Expand Down
23 changes: 13 additions & 10 deletions src/types.cpp
Expand Up @@ -11,6 +11,7 @@ const char *way_column_names_[] = { "way_id", "version", "changeset_id", "visibl
const char *way_node_column_names_[] = { "way_id", "version", "sequence_id", "node_id" };
const char *relation_column_names_[] = { "relation_id", "version", "changeset_id", "visible", "timestamp", "redaction_id" };
const char *relation_member_column_names_[] = { "relation_id", "version", "sequence_id", "member_type", "member_id", "member_role" };
const char *changeset_comment_column_names_[] = { "changeset_id", "created_at", "author_id", "body", "visible" };

const std::vector<std::string> user_column_names = std::vector<std::string>(user_column_names_, user_column_names_ + sizeof(user_column_names_) / sizeof(*user_column_names_));
const std::vector<std::string> changeset_column_names = std::vector<std::string>(changeset_column_names_, changeset_column_names_ + sizeof(changeset_column_names_) / sizeof(*changeset_column_names_));
Expand All @@ -21,22 +22,24 @@ const std::vector<std::string> way_column_names = std::vector<std::string>(way_c
const std::vector<std::string> way_node_column_names = std::vector<std::string>(way_node_column_names_, way_node_column_names_ + sizeof(way_node_column_names_) / sizeof(*way_node_column_names_));
const std::vector<std::string> relation_column_names = std::vector<std::string>(relation_column_names_, relation_column_names_ + sizeof(relation_column_names_) / sizeof(*relation_column_names_));
const std::vector<std::string> relation_member_column_names = std::vector<std::string>(relation_member_column_names_, relation_member_column_names_ + sizeof(relation_member_column_names_) / sizeof(*relation_member_column_names_));
const std::vector<std::string> changeset_comment_column_names = std::vector<std::string>(changeset_comment_column_names_, changeset_comment_column_names_ + sizeof(changeset_comment_column_names_) / sizeof(*changeset_comment_column_names_));

} // anonymous namespace

const std::vector<std::string> &user::column_names() { return user_column_names; }
const std::vector<std::string> &changeset::column_names() { return changeset_column_names; }
const std::vector<std::string> &current_tag::column_names() { return current_tag_column_names; }
const std::vector<std::string> &old_tag::column_names() { return old_tag_column_names; }
const std::vector<std::string> &node::column_names() { return node_column_names; }
const std::vector<std::string> &way::column_names() { return way_column_names; }
const std::vector<std::string> &way_node::column_names() { return way_node_column_names; }
const std::vector<std::string> &relation::column_names() { return relation_column_names; }
const std::vector<std::string> &relation_member::column_names() { return relation_member_column_names; }
const std::vector<std::string> &user::column_names() { return user_column_names; }
const std::vector<std::string> &changeset::column_names() { return changeset_column_names; }
const std::vector<std::string> &current_tag::column_names() { return current_tag_column_names; }
const std::vector<std::string> &old_tag::column_names() { return old_tag_column_names; }
const std::vector<std::string> &node::column_names() { return node_column_names; }
const std::vector<std::string> &way::column_names() { return way_column_names; }
const std::vector<std::string> &way_node::column_names() { return way_node_column_names; }
const std::vector<std::string> &relation::column_names() { return relation_column_names; }
const std::vector<std::string> &relation_member::column_names() { return relation_member_column_names; }
const std::vector<std::string> &changeset_comment::column_names() { return changeset_comment_column_names; }

const std::string changeset::table_name() { return "changesets"; }
const std::string changeset::tag_table_name() { return "changeset_tags"; }
const std::string changeset::inner_table_name() { return ""; }
const std::string changeset::inner_table_name() { return "changeset_comments"; }

const std::string node::table_name() { return "nodes"; }
const std::string node::tag_table_name() { return "node_tags"; }
Expand Down

0 comments on commit b1f6638

Please sign in to comment.