/
heliotrope-import
141 lines (115 loc) · 5.01 KB
/
heliotrope-import
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env ruby
# encoding: UTF-8
require 'rubygems'
require 'trollop'
require "heliotrope"
require 'json'
SUP_LABEL_MAP = { "killed" => "muted" }
opts = Trollop::options do
banner <<EOS
Imports emails to a Heliotrope server directory, from an mbox file, a
maildir, an IMAP folder, or a GMail account.
Importing mail is much faster than adding mail with heliotrope-add, as
it writes directly to the server's store directory. However, it cannot
be run at the same time as the server.
Note that Heliotrope only returns threads in the order they were added
to the index. If you are using this tool to import mail from multiple
pre-existing stores, you may have to use heliotrope-reindex as the final
step to sort all messages by date.
Usage: #{$0} [options]
Where options include:
EOS
opt :dir, "Don't use the server. Instead, write directly to the store directory.", :type => String, :required => true
opt :num_messages, "Index at most this many messages and then quit", :type => Integer, :short => "n"
opt :num_skip, "Skip this many many messages and then start indexing", :type => Integer, :short => "k"
opt :state_file, "Store state to, and read state from, this file. This enables successive runs against the same source to pick up only what's changed.", :type => String
opt :sup_label_file, "Load labels from a sup label dump from this file", :type => String
opt :add_labels, "Add these labels to every message (should be a comma-separated list)", :type => String
opt :remove_labels, "Do not add any of these labels to any message (should be a comma-separated list)", :type => String
opt :verbose, "Enable verbose output"
opt :no_skip_spam, "If a message is marked as spam, add it anyways (default: ignore it)"
opt :no_skip_deleted, "If a message is marked as deleted, add it anyways (default: ignore it)"
banner <<EOS
Options for reading from an mbox file:
EOS
opt :mbox_fn, "mbox filename", :type => String, :short => "m"
opt :mbox_start_offset, "Start file offset for scanning", :default => 0, :short => "s"
banner <<EOS
Options for reading from a maildir directory:
EOS
opt :maildir_dirs, "maildir directory", :type => :strings
banner <<EOS
Options for reading from an IMAP account:
EOS
opt :imap_host, "IMAP server hostname", :type => String
opt :imap_port, "IMAP server port (default: 993 with ssl, 143 without)", :type => Integer
opt :dont_use_ssl, "Don't use SSL"
opt :imap_username, "IMAP username (default: prompt)", :type => String
opt :imap_password, "IMAP password (default: prompt)", :type => String
opt :imap_folder, "IMAP folder", :default => "INBOX"
banner <<EOS
Options for reading from a GMail account:
EOS
opt :gmail_username, "GMail username (default: prompt)", :type => String
opt :gmail_password, "GMail password (default: prompt)", :type => String
conflicts :mbox_fn, :maildir_dirs, :imap_host, :gmail_username
end
Trollop::die "unknown argument: #{ARGV.first}" unless ARGV.empty?
add_labels = (opts.add_labels || "").split(/\s*,\s*/)
remove_labels = (opts.remove_labels || "").split(/\s*,\s*/)
imported_labels = if opts.sup_label_file
puts "Loading sup label dump..."
v = IO.foreach(opts.sup_label_file).inject({}) do |h, l|
begin
l =~ /^(\S+?) \((.*)\)$/ or abort "Can't parse labels line: #{l[0..250].inspect}"
rescue ArgumentError # sigh
abort "Can't parse labels line: #{l[0..250].inspect}"
end
msgid, ll = $1, $2
h[msgid] = ll.split(/\s+/).map { |l| SUP_LABEL_MAP[l] || l }
h
end
puts "Loaded #{v.size} labels."
v
else
{}
end
adder = Heliotrope::MessageAdder.new opts
FileUtils.mkdir_p opts.dir
FileUtils.mkdir_p File.join(opts.dir, "index")
store = LevelDB::DB.new File.join(opts.dir, "store")
index = Whistlepig::Index.new File.join(opts.dir, "index", "whistlepig")
hooks = Heliotrope::Hooks.new File.join(opts.dir, "hooks")
metaindex = Heliotrope::MetaIndex.new store, index, hooks
zmbox = Heliotrope::ZMBox.new File.join(opts.dir, "messages")
adder.each_message do |rawbody, source_state, source_labels|
seen = indexed = bad = false
begin
message = Heliotrope::Message.new(rawbody).parse!
if metaindex.contains_safe_msgid? message.safe_msgid
seen = true
else
state, labels = if imported_labels.member? message.msgid
## if we have imported labels, use those for both labels and state
v = imported_labels[message.msgid]
[v, v]
else
[source_state, source_labels]
end
labels += add_labels
labels -= remove_labels
next if (state.include?("spam") || labels.include?("spam")) && !opts.no_skip_spam
next if (state.include?("deleted") || labels.include?("deleted")) && !opts.no_skip_deleted
loc = zmbox.add rawbody
metaindex.add_message message, state, labels, :loc => loc
indexed = true
end
rescue Heliotrope::InvalidMessageError => e
bad = true
rescue Exception => e # sigh
File.open("bad-message.txt", "w") { |f| f.write rawbody }
$stderr.puts "* wrote broken message to bad-message.txt"
raise e
end
[seen, indexed, bad]
end