-
Notifications
You must be signed in to change notification settings - Fork 488
/
Copy pathbinary_apple.c
244 lines (210 loc) · 8.47 KB
/
binary_apple.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#include "binary.h"
#include "common/io/io.h"
#include "util/stringUtils.h"
#include "util/mallocHelper.h"
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <mach-o/loader.h>
#include <mach-o/swap.h>
#include <mach-o/fat.h>
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // swap_fat_arch
// Ref: https://github.com/AlexDenisov/segment_dumper/blob/master/main.c
/**
* Helper function to read data from a file at a specific offset
*/
static inline bool readData(FILE *objFile, void *buf, size_t size, off_t offset)
{
fseek(objFile, offset, SEEK_SET);
return fread(buf, 1, size, objFile) == size;
}
/**
* Handles a Mach-O section by extracting strings from the __cstring section
*
* @param objFile File handle to the Mach-O object file
* @param name Section name to check
* @param offset Offset of the section in the file
* @param size Size of the section
* @param cb Callback function to process strings
* @param userdata User data for the callback
* @param minLength Minimum string length to extract
*
* @return true to continue processing, false to stop
*/
static bool handleMachSection(FILE *objFile, const char *name, off_t offset, size_t size, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
{
if (!ffStrEquals(name, "__cstring")) return true;
FF_AUTO_FREE char* data = (char*) malloc(size);
if (!readData(objFile, data, size, offset))
return true;
for (size_t off = 0; off < size; ++off)
{
const char* p = (const char*) data + off;
if (*p == '\0') continue;
uint32_t len = (uint32_t) strlen(p);
if (len < minLength) continue;
if (*p >= ' ' && *p <= '~') // Ignore control characters
{
if (!cb(p, len, userdata)) return false;
}
off += len;
}
return true;
}
/**
* Processes a Mach-O header (32-bit or 64-bit)
*
* This function parses the load commands in a Mach-O header, looking for
* LC_SEGMENT or LC_SEGMENT_64 commands that contain the __TEXT segment.
* It then processes the sections within that segment to extract strings.
*
* @param objFile File handle to the Mach-O object file
* @param offset Offset of the Mach header in the file
* @param is_64 Whether this is a 64-bit Mach-O header
* @param cb Callback function to process strings
* @param userdata User data for the callback
* @param minLength Minimum string length to extract
*
* @return NULL on success, error message on failure
*/
static const char* dumpMachHeader(FILE *objFile, off_t offset, bool is_64, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
{
uint32_t ncmds;
off_t loadCommandsOffset = offset;
if (is_64)
{
struct mach_header_64 header;
if (!readData(objFile, &header, sizeof(header), offset))
return "read mach header failed";
ncmds = header.ncmds;
loadCommandsOffset += sizeof(header);
}
else
{
struct mach_header header;
if (!readData(objFile, &header, sizeof(header), offset))
return "read mach header failed";
ncmds = header.ncmds;
loadCommandsOffset += sizeof(header);
}
off_t commandOffset = loadCommandsOffset;
struct load_command cmd = {};
for (uint32_t i = 0U; i < ncmds; i++, commandOffset += cmd.cmdsize)
{
if (!readData(objFile, &cmd, sizeof(cmd), commandOffset))
continue;
if (cmd.cmd == LC_SEGMENT_64)
{
struct segment_command_64 segment;
if (!readData(objFile, &segment, sizeof(segment), commandOffset))
continue;
if (!ffStrEquals(segment.segname, "__TEXT")) continue;
for (uint32_t j = 0U; j < segment.nsects; j++)
{
struct section_64 section;
if (!readData(objFile, §ion, sizeof(section), (off_t) ((size_t) commandOffset + sizeof(segment) + j * sizeof(section))))
continue;
if (!handleMachSection(objFile, section.sectname, section.offset, section.size, cb, userdata, minLength))
return NULL;
}
}
else if (cmd.cmd == LC_SEGMENT)
{
struct segment_command segment;
if (!readData(objFile, &segment, sizeof(segment), commandOffset))
continue;
if (!ffStrEquals(segment.segname, "__TEXT")) continue;
for (uint32_t j = 0; j < segment.nsects; j++)
{
struct section section;
if (!readData(objFile, §ion, sizeof(section), (off_t) ((size_t) commandOffset + sizeof(segment) + j * sizeof(section))))
continue;
if (!handleMachSection(objFile, section.sectname, section.offset, section.size, cb, userdata, minLength))
return NULL;
}
}
return NULL;
}
return NULL;
}
/**
* Processes a Fat binary header (Universal binary)
*
* This function handles the fat header of a universal binary, which can contain
* multiple Mach-O binaries for different architectures. It extracts and processes
* each embedded Mach-O file.
*
* @param objFile File handle to the universal binary
* @param cb Callback function to process strings
* @param userdata User data for the callback
* @param minLength Minimum string length to extract
*
* @return NULL on success, error message on failure
*/
static const char* dumpFatHeader(FILE *objFile, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
{
struct fat_header header;
if (!readData(objFile, &header, sizeof(header), 0))
return "read fat header failed";
bool needSwap = header.magic == FAT_CIGAM || header.magic == FAT_CIGAM_64;
if (needSwap) swap_fat_header(&header, NX_UnknownByteOrder);
for (uint32_t i = 0U; i < header.nfat_arch; i++)
{
off_t machHeaderOffset = 0;
if (header.magic == FAT_MAGIC)
{
struct fat_arch arch;
if (!readData(objFile, &arch, sizeof(arch), (off_t) (sizeof(header) + i * sizeof(arch))))
continue;
if (needSwap)
swap_fat_arch(&arch, 1, NX_UnknownByteOrder);
machHeaderOffset = (off_t)arch.offset;
}
else
{
struct fat_arch_64 arch;
if (!readData(objFile, &arch, sizeof(arch), (off_t) (sizeof(header) + i * sizeof(arch))))
continue;
if (needSwap)
swap_fat_arch_64(&arch, 1, NX_UnknownByteOrder);
machHeaderOffset = (off_t)arch.offset;
}
uint32_t magic;
if (!readData(objFile, &magic, sizeof(magic), machHeaderOffset))
continue;
if (magic == MH_MAGIC_64 || magic == MH_MAGIC)
{
dumpMachHeader(objFile, machHeaderOffset, magic == MH_MAGIC_64, cb, userdata, minLength);
return NULL;
}
}
return "Unsupported fat header";
}
/**
* Extracts string literals from a Mach-O (Apple) binary file
*
* This function supports both single-architecture Mach-O files and
* universal binaries (fat binaries) containing multiple architectures.
* It locates the __cstring section in the __TEXT segment which contains
* the string literals used in the program.
*/
const char *ffBinaryExtractStrings(const char *machoFile, bool (*cb)(const char *str, uint32_t len, void *userdata), void *userdata, uint32_t minLength)
{
FF_AUTO_CLOSE_FILE FILE *objFile = fopen(machoFile, "rb");
if (objFile == NULL)
return "File could not be opened";
// Read the magic number to determine the type of binary
uint32_t magic;
if (!readData(objFile, &magic, sizeof(magic), 0))
return "read magic number failed";
// Check for supported formats
// MH_CIGAM and MH_CIGAM_64 seem to be no longer used, as `swap_mach_header` is marked as deprecated.
// However FAT_CIGAM and FAT_CIGAM_64 are still used (/usr/bin/vim).
if (magic != MH_MAGIC && magic != MH_MAGIC_64 && magic != FAT_CIGAM && magic != FAT_CIGAM_64 && magic != FAT_MAGIC && magic != FAT_MAGIC_64)
return "Unsupported format or big endian mach-o file";
// Process either a fat binary or a regular Mach-O binary
if (magic == FAT_MAGIC || magic == FAT_MAGIC_64 || magic == FAT_CIGAM || magic == FAT_CIGAM_64)
return dumpFatHeader(objFile, cb, userdata, minLength);
else
return dumpMachHeader(objFile, 0, magic == MH_MAGIC_64, cb, userdata, minLength);
}