-
Notifications
You must be signed in to change notification settings - Fork 4k
/
Copy pathcreate_def.cc
465 lines (410 loc) · 16.6 KB
/
create_def.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
/*****************************************************************************
Copyright (c) 2020, 2024, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
// This program creates a def file with all exports from specified libraries.
// This work is based on a JS script that was doing this before this program was
// created, and tries to closely copy the way the script was working. In essence
// we create a input file for link /dump, call link on this input with stdout
// redirected to big pipe that we read and process into set of unique symbols
// that matches our criteria. After link completes, we write out the gathered
// unique symbols. On Debug the link /dump generates ~280MB of output which is
// transformed into 8MB of the result def file, so it's quite important to have
// the processing fast. Currently the create_def consumes the data provided
// by the link two times faster than it takes to generate them, so the total
// execution time is still bound exactly by the link execution time.
// The original script removed the VS_UNICODE_OUTPUT environment variable used
// by the VC++ tools to communicate a need to use special pipes inside VS. This
// was used only sometime around VS2005-2008 and was removed.
#ifndef __clang__
// Disable runtime checks, iterator checks and debugs, and turn on
// optimizations. This is done to make debug version as fast as release one.
// Without this the create_def works even slower than original JS script. We
// can't enable this on CMake level (at least no way was found) because adding
// /O2 requires absence of /RTC1, which in turn can't be removed with CMake, and
// can't be overridden to be disabled by adding any compile options. Thus we
// have to disable /RTC1 and enable /O2 (or any level of optimization) using
// pragmas.
#ifdef _ITERATOR_DEBUG_LEVEL
#undef _ITERATOR_DEBUG_LEVEL
#endif
#define _ITERATOR_DEBUG_LEVEL 0
#pragma runtime_checks("", off)
#pragma optimize("g", on)
#endif
#include <windows.h>
#include <algorithm>
#include <atomic>
#include <chrono>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <system_error>
#include <thread>
#include <unordered_set>
#include <vector>
/** Prints an error message supplied and attaches GetLastError with formatted
* message. */
void error(std::string message) {
const DWORD last_error = GetLastError();
std::cerr << "Error during generating .def file: " << message << "\n"
<< "Last OS error code: " << last_error
<< ", msg: " << std::system_category().message((int)last_error)
<< std::endl;
exit(1);
}
/** Gathers and prints out the unique symbols. */
class Unique_symbol_map {
public:
Unique_symbol_map() { std::cout << "EXPORTS\n"; }
/** Processes a new symbol candidate in form of a single line of link /dump
* output. */
void insert(const std::string &symbol_line);
private:
/** List of symbols seen and printed out so far. */
std::unordered_set<std::string> m_symbols_seen;
};
/** Buffers input and runs a specified callback on single complete lines found.
*/
template <class TCallback>
class Line_buffer {
public:
Line_buffer(TCallback &&line_handler) : m_line_handler(line_handler) {}
/** Runs callback for last incomplete line if present. */
~Line_buffer();
/** Adds a raw buffer bytes to our buffer and finds any new completed lines to
* call callback on them. */
void insert(const char *buffer, size_t size);
private:
TCallback m_line_handler;
/** Current incomplete line. */
std::string m_curr_buffer;
};
template <class TCallback>
Line_buffer<TCallback> create_line_buffer(TCallback &&callback) {
return Line_buffer<TCallback>(std::forward<TCallback>(callback));
}
/** Runs a specified command line and calls a callback for all data that is
* written by the child program to its standard output. The standard error is
* redirected to parent standard error stream. There is no input stream
* redirected. */
template <class TCallback>
class Process {
public:
Process(std::string cmd_line, TCallback &&input_handler, DWORD pipe_size);
~Process();
double get_cpu_time() const;
private:
/** Creates a big pipe that will receive and buffer data coming from the
* child process. */
void create_pipe(DWORD pipe_size);
/** Runs the actual child process */
void create_process(std::string cmd_line);
/** Reads the child results until the pipe is not closed. Runs callback for
* all received data. */
void read_output(TCallback &&input_handler);
HANDLE m_stdout_read_pipe = INVALID_HANDLE_VALUE;
HANDLE m_stdout_write_pipe = INVALID_HANDLE_VALUE;
HANDLE m_process_handle = INVALID_HANDLE_VALUE;
};
template <class TCallback>
Process<TCallback> create_process(std::string cmd_line,
TCallback &&input_handler, DWORD pipe_size) {
return Process<TCallback>(cmd_line, std::forward<TCallback>(input_handler),
pipe_size);
}
template <class TCallback>
Process<TCallback>::Process(std::string cmd_line, TCallback &&input_handler,
DWORD pipe_size) {
create_pipe(pipe_size);
create_process(cmd_line);
read_output(std::forward<TCallback>(input_handler));
}
template <class TCallback>
Process<TCallback>::~Process() {
if (m_stdout_read_pipe != INVALID_HANDLE_VALUE) {
CloseHandle(m_stdout_read_pipe);
}
if (m_stdout_write_pipe != INVALID_HANDLE_VALUE) {
CloseHandle(m_stdout_write_pipe);
}
if (m_process_handle != INVALID_HANDLE_VALUE) {
CloseHandle(m_process_handle);
}
}
template <class TCallback>
void Process<TCallback>::create_pipe(DWORD pipe_size) {
SECURITY_ATTRIBUTES sec_attributes;
// The write side of the pipe needs to be inheritable.
sec_attributes.nLength = sizeof(SECURITY_ATTRIBUTES);
sec_attributes.bInheritHandle = TRUE;
sec_attributes.lpSecurityDescriptor = nullptr;
// We create buffer big enough to not make child process stall while we are
// processing its older output.
if (!CreatePipe(&m_stdout_read_pipe, &m_stdout_write_pipe, &sec_attributes,
pipe_size)) {
error("CreatePipe failed");
}
// The read side does not have to be inheritable.
if (!SetHandleInformation(m_stdout_read_pipe, HANDLE_FLAG_INHERIT, 0)) {
error("SetHandleInformation failed on read pipe");
}
}
template <class TCallback>
void Process<TCallback>::create_process(std::string cmd_line) {
PROCESS_INFORMATION proc_info;
STARTUPINFO start_info;
ZeroMemory(&proc_info, sizeof(PROCESS_INFORMATION));
ZeroMemory(&start_info, sizeof(STARTUPINFO));
start_info.cb = sizeof(STARTUPINFO);
start_info.hStdError = GetStdHandle(STD_ERROR_HANDLE);
start_info.hStdOutput = m_stdout_write_pipe;
start_info.hStdInput = INVALID_HANDLE_VALUE;
start_info.dwFlags |= STARTF_USESTDHANDLES;
// We cast away the constness as CreateProcess expects the non-const argument.
// However, only the UNICODE CreateProcessW variant actually modifies the
// memory supplied. Since this program assumes it is compiled with ANSI
// support only (no use of TCHAR) we can just cast the const away.
if (!CreateProcess(nullptr, const_cast<char *>(cmd_line.c_str()), nullptr,
nullptr, TRUE, 0, nullptr, nullptr, &start_info,
&proc_info)) {
error("CreateProcess failed");
} else {
m_process_handle = proc_info.hProcess;
CloseHandle(proc_info.hThread);
CloseHandle(m_stdout_write_pipe);
m_stdout_write_pipe = INVALID_HANDLE_VALUE;
}
}
// A quite small buffer for reading incoming data. It doesn't have to be big as
// the pipe itself does the buffering for more incoming data.
constexpr DWORD buf_size = 16 * 1024;
char raw_buffer[buf_size];
template <class TCallback>
void Process<TCallback>::read_output(TCallback &&input_handler) {
DWORD bytes_read;
DWORD bytes_available;
for (;;) {
// Check if there is any data to be read.
if (!PeekNamedPipe(m_stdout_read_pipe, nullptr, 0, nullptr,
&bytes_available, nullptr)) {
// This error is reported when the pipe is closed.
if (GetLastError() != 109) {
error("PeekNamedPipe failed");
}
return;
}
if (bytes_available) {
// Read actual data from pipe, but no more than our small buffer.
if (!ReadFile(m_stdout_read_pipe, raw_buffer,
std::min(bytes_available, buf_size), &bytes_read,
nullptr)) {
error("ReadFile on child process output pipe failed");
}
input_handler(raw_buffer, bytes_read);
} else {
Sleep(1);
}
}
}
double filetime_to_sec(const FILETIME &f) {
return (f.dwLowDateTime + (((uint64_t)f.dwHighDateTime) << 32)) / 10.0 /
1000 / 1000;
}
double get_cpu_time(HANDLE process) {
FILETIME kernel_time;
FILETIME user_time;
FILETIME creation_time;
FILETIME exit_time;
GetProcessTimes(process, &creation_time, &exit_time, &kernel_time,
&user_time);
return filetime_to_sec(kernel_time) + filetime_to_sec(user_time);
}
template <class TCallback>
double Process<TCallback>::get_cpu_time() const {
return ::get_cpu_time(m_process_handle);
}
double our_cpu_time() { return get_cpu_time(GetCurrentProcess()); }
template <class TCallback>
Line_buffer<TCallback>::~Line_buffer() {
if (!m_curr_buffer.empty()) {
m_line_handler(m_curr_buffer);
}
}
template <class TCallback>
void Line_buffer<TCallback>::insert(const char *buffer, size_t size) {
size_t last_index_added = 0;
for (size_t i = 0; i < size; ++i) {
if (buffer[i] == '\r' || buffer[i] == '\n') {
m_curr_buffer.append(buffer + last_index_added, i - last_index_added);
if (!m_curr_buffer.empty()) {
m_line_handler(m_curr_buffer);
m_curr_buffer.clear();
}
last_index_added = i + 1;
}
}
m_curr_buffer.append(buffer + last_index_added, size - last_index_added);
}
void Unique_symbol_map::insert(const std::string &symbol_line) {
// Some magic list of symbols we don't want to be exported.
static const char *compiler_symbols[] = {
"__real@", //
"__xmm@", // SSE instruction set constants
"_CTA2?", // std::bad_alloc
"_CTA3?", // std::length_error
"_CTA4?", // std::ios_base::failure
"_CTA5?", // std::ios_base::failure
"_CTA6?", // boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::bad_get>>
"_CTA7?", // boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::bad_lexical_cast>>
"_CTA8?AV?", // bad_rational
"_TI2?", // std::bad_alloc
"_TI3?", // std::length_error
"_TI4?", // std::ios_base::failure
"_TI5?", // std::ios_base::failure
"_TI6?", // boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::bad_get>>
"_TI7?", // boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::bad_lexical_cast>>
"_TI8?AV?", // bad_rational
"_RTC_", //
"??_C@_", //
"??_R", //
"??_7", //
"?_G", // scalar deleting destructor
"_VInfreq_?", // special label (exception handler?) for Intel compiler
"?_E", // vector deleting destructor
"<lambda_", // anything that is lambda-related
"__isa_available_default", // strange symbol that breaks PGO
};
if (symbol_line.find("External") == std::string::npos) {
return;
}
// Parse the line into tokens separated by a space.
std::vector<std::string> columns;
size_t current_pos = 0;
for (;;) {
auto pos = symbol_line.find(' ', current_pos);
if (pos == std::string::npos) break;
if (pos != 0) {
columns.push_back(symbol_line.substr(current_pos, pos - current_pos));
}
current_pos = pos + 1;
}
if (current_pos != symbol_line.size()) {
columns.push_back(symbol_line.substr(current_pos));
}
if (columns.size() < 3) {
return;
}
// Magic copied from JS script:
// If the third column of link /dump /symbols output contains SECTx, the
// symbol is defined in that section of the object file. If UNDEF appears, it
// is not defined in that object and must be resolved elsewhere. BSS symbols
// (like uninitialized arrays) appear to have non-zero second column.
if (columns[2].substr(0, 4) != "SECT") {
if (columns[2] == "UNDEF" && atol(columns[1].c_str()) == 0) {
return;
}
}
// Extract undecorated symbol names between "|" and next whitespace after it.
size_t index = 0;
while (index < columns.size() && columns[index] != "|") {
index++;
}
if (index + 1 >= columns.size()) {
error("Unexpected symbol line format: " + symbol_line);
}
// Extract the actual symbol name we care about and check it's not on list of
// compiler's symbols.
auto &symbol = columns[index + 1];
for (auto &compiler_symbol : compiler_symbols) {
if (symbol.find(compiler_symbol) != std::string::npos) {
return;
}
}
// Check if we have function or data.
if (symbol_line.find("notype () ") == std::string::npos) {
symbol.append(" DATA");
}
// Check if this is a function inside the std namespace
if (symbol_line.find(" __cdecl std::") != std::string::npos) {
return;
}
// Check if this symbol was seen before.
auto res = m_symbols_seen.emplace(symbol);
if (res.second) {
std::cout << symbol << "\n";
}
}
template <class TCallback>
double measure_execution_time(TCallback &&callback) {
auto start_time = std::chrono::high_resolution_clock::now();
callback();
// Print info about the time used.
auto end_time = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(end_time -
start_time)
.count() /
1000.0;
}
class Resp_file {
public:
Resp_file(int arguments_count, const char **arguments) {
std::ofstream rspFile(get_name().c_str());
rspFile << "/symbols \n";
for (int i = 0; i < arguments_count; ++i) {
const std::string input(arguments[i]);
if (input.size() > 4 && (input.substr(input.size() - 4) == ".lib" ||
input.substr(input.size() - 4) == ".obj")) {
rspFile << "\"" << input << "\"\n";
}
}
}
~Resp_file() {
// Cleanup.
_unlink(get_name().c_str());
}
std::string get_name() { return "dumpsymbols.rsp"; }
};
int main(int argc, const char *argv[]) {
double link_cpu_time;
auto time_in_sec = measure_execution_time([argc, argv, &link_cpu_time] {
// Prepare the input file for the link /dump.
std::cerr << "Creating def file..." << std::endl;
Resp_file resp_file(argc - 1, argv + 1);
// This should speed-up printing the result a little.
std::iostream::sync_with_stdio(false);
// Call the actual link /dump and process the data.
Unique_symbol_map symbol_map;
auto buffer = create_line_buffer(
[&symbol_map](std::string &line) { symbol_map.insert(line); });
auto process = create_process(
"link /dump @" + resp_file.get_name(),
[&buffer](const char *buf, size_t bytes_count) {
buffer.insert(buf, bytes_count);
},
// use bigger pipe buffer to let link /dump to buffer data in case we
// have some lag. The data comes from linker in rates of 10s MB/s.
16 * 1024 * 1024);
link_cpu_time = process.get_cpu_time();
});
std::cerr << std::setprecision(3) << "Creating def file finished in "
<< time_in_sec << "s. (We used " << our_cpu_time()
<< "s, link used " << link_cpu_time << "s CPU time)" << std::endl;
return 0;
}