/
filesystem_summarise.ml
297 lines (265 loc) · 9.75 KB
/
filesystem_summarise.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
(*
* Copyright (C) 2011 Citrix Systems Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; version 2.1 only. with the special
* exception on linking described in file LICENSE.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)
open Stringext
(* C function with returns the number of filesystem blocks allocated, as
opposed to the apparent file size. The latter can be misleading in the
presence of sparse files. However we might not really care about this. *)
external file_actual_size: string -> int64 = "stub_file_actual_size"
module File = struct
type kind = File | Dir
let string_of_kind = function
| File -> "File"
| Dir -> "Dir"
let kind_of_string = function
| "File" -> File
| "Dir" -> Dir
| x -> failwith (Printf.sprintf "Unknown kind: '%s'" x)
(** A file which we are tracking *)
type t = {
path: string; (** full path *)
mtime: int64; (** last modification time *)
size: int64; (** apparent size of the file *)
kind: kind; (** whether this is a file or directory *)
}
(** [of_stat path s] returns a value representing the file at [path] with
Unix.LargeFile.stat results [s] *)
let of_stat path s =
{
path = path;
mtime = Int64.of_float s.Unix.LargeFile.st_mtime;
size = s.Unix.LargeFile.st_size;
kind = match s.Unix.LargeFile.st_kind with
| Unix.S_REG -> File
| Unix.S_DIR -> Dir
| _ -> failwith "Illegal st_kind"
}
(** [of_string line] returns a file value unmarshalled from [line] *)
let of_string line =
match List.rev (String.split ',' line) with
| mtime :: size :: kind :: rest ->
let path = String.concat "," (List.rev rest) in
{
mtime = Int64.of_string mtime;
size = Int64.of_string size;
path = path;
kind = kind_of_string kind;
}
| _ ->
raise Not_found
(** [to_string f] marshals the file value [f] to a string *)
let to_string x =
Printf.sprintf "%s,%s,%Ld,%Ld\n" x.path (string_of_kind x.kind) x.size x.mtime
end
module StringMap = Map.Make(struct type t = string let compare = compare end)
module IntSet = Set.Make(struct type t = int let compare = compare end)
(** A map of filesystem path -> File.t *)
type index = File.t StringMap.t
(** [of_dir root] returns an index describing all the files rooted at [root]
including only:
1. files in the same filesystem (like find -xdev)
2. regular files and directories only (no special files)
We take care to only visit an inode once, to cope with loops caused by
hardlinks *)
let of_dir (root: string) =
let rec f (root: string) (st_dev, inodes, files) =
let s = Unix.LargeFile.lstat root in
let seen_before = IntSet.mem s.Unix.LargeFile.st_ino inodes in
let file_or_dir =
false
|| s.Unix.LargeFile.st_kind = Unix.S_REG
|| s.Unix.LargeFile.st_kind = Unix.S_DIR in
let on_this_device = s.Unix.LargeFile.st_dev = st_dev in
if file_or_dir && on_this_device && not(seen_before)
then
let children =
if Sys.is_directory root then Sys.readdir root else [| |] in
let file = File.of_stat root s in
(*
(* Work around the sparse file problem *)
let file = { file with File.size = min s.Unix.LargeFile.st_size (file_actual_size root) } in
*)
let files = StringMap.add root file files in
let inodes = IntSet.add s.Unix.LargeFile.st_ino inodes in
Array.fold_left
(fun acc file ->
try
f (Filename.concat root file) acc
with
| Unix.Unix_error(Unix.ENOENT,_,_) ->
Printf.fprintf stderr "Ignoring ENOENT from root=%s file=%s\n" root file;
acc
| e ->
Printf.fprintf stderr "Got exception on root=%s file=%s\n" root file;
raise e (* Exception gets printed on stdout *)
)
(st_dev, inodes, files) children
else (st_dev, inodes, files) in
let s = Unix.LargeFile.lstat root in
let _, _, files =
f root (s.Unix.LargeFile.st_dev, IntSet.empty, StringMap.empty) in
files
(** [to_file index filename] marshals the [index] to the file [filename] *)
let to_file index filename =
Unixext.with_file filename [ Unix.O_WRONLY; Unix.O_CREAT ] 0o644
(fun fd ->
StringMap.iter
(fun path file ->
let s = File.to_string file in
ignore(Unix.write fd s 0 (String.length s))
) index
)
(** [of_file filename] unmarshals an index from [filename] *)
let of_file =
Unixext.file_lines_fold
(fun index line ->
try
let file = File.of_string line in
StringMap.add file.File.path file index
with Not_found ->
Printf.fprintf stderr "Skipping line: %s\n" line;
index
) StringMap.empty
(** [diff a b] returns (c, d) where c contains elements in both [a] and [b]
and d contains elements in [a] but different or missing in [b]. *)
let diff a b =
StringMap.partition
(fun path file ->
StringMap.mem path b && (StringMap.find path b = file)
) a
(** [stats index] returns (total number of files,
total number of bytes in files) *)
let stats a =
StringMap.fold
(fun path file (nfiles, nbytes) ->
if file.File.kind = File.File
then Int64.(add nfiles 1L, add nbytes file.File.size)
else (nfiles, nbytes)
) a (0L, 0L)
let string_of_stats (nfiles, nbytes) = Printf.sprintf "%Ld files (%Ld KiB)" nfiles (Int64.div nbytes 1024L)
let show_files prefix =
StringMap.iter
(fun path file ->
if file.File.kind = File.File
then Printf.printf "%s%s (%Ld)\n" prefix path file.File.size)
type whitelist_kind =
| Ignore (** harmless mutation: ignore these *)
| Trash (** rubbish which should be deleted on boot *)
| Persist (** configuration data which must be preserved *)
let whitelist_kind_of_string = function
| "I" -> Ignore
| "T" -> Trash
| "P" -> Persist
| x -> failwith (Printf.sprintf "Unknown whitelist_kind: %s" x)
open Listext
(** [whitelist_of_file filename] returns a list of categorised whitelists, as
regexps *)
let whitelist_of_file filename =
let lines = Unixext.file_lines_fold
(fun acc line ->
let line = String.strip String.isspace line in
if line = "" || (String.startswith "#" line) then acc
else begin
match String.split ':' ~limit:2 line with
| (("I" | "T" | "P" ) as key) :: r :: [] ->
(* Check r is itself a valid regexp *)
begin
try let (_:Str.regexp) = Str.regexp r in ()
with e ->
Printf.fprintf stderr "Failed to parse whitelist file line: %s\n" line;
raise e
end;
(whitelist_kind_of_string key, r) :: acc
| _ ->
failwith (Printf.sprintf "Failed to parse whitelist file line: %s\n" line)
end
) [] filename in
let regexp_of_lines = function
| [] -> None
| lines ->
let r = "\\(" ^ (String.concat "\\|" lines) ^ "\\)" in
Some (Str.regexp r) in
let one kind =
kind, regexp_of_lines (List.filter_map (fun (key, line) -> if key = kind then Some line else None) lines) in
[ one Ignore; one Trash; one Persist ]
(** [diff_whitelist a r] returns (c, d) where everything in c matches [r]
and everything in d does not. *)
let diff_whitelist a r =
StringMap.partition
(fun path file ->
match r with
| None -> false
| Some r -> Str.string_match r path 0
) a
type mode =
| Summarise
| Delete
| List
let _ =
let path = ref "/" in
let db = ref "/var/xapi/files.db" in
let whitelist = ref "/etc/xensource/whitelist" in
let mode = ref Summarise in
let verbose = ref false in
Arg.parse
[
"-path", Arg.Set_string path, Printf.sprintf "Path to scan (default %s)" !path;
"-db", Arg.Set_string db, Printf.sprintf "Path to database (default %s)" !db;
"-whitelist", Arg.Set_string whitelist, Printf.sprintf "Path to whitelist (default %s)" !whitelist;
"-mode", Arg.Symbol ([ "summarise"; "delete"; "list" ], function
| "summarise" -> mode := Summarise
| "delete" -> mode := Delete
| "list" -> mode := List
| _ -> assert false), "Mode (default: summarise)";
"-v", Arg.Set verbose, Printf.sprintf "Verbose (default %b)" !verbose;
]
(fun x -> Printf.fprintf stderr "Skipping unknown argument: %s\n%!" x)
"Scan for modified files in a filesystem";
if Sys.file_exists !db then begin
let whitelist =
if Sys.file_exists !whitelist
then whitelist_of_file !whitelist
else [ Ignore, None; Persist, None; Trash, None ] in
let previous = of_file !db in
let current = of_dir !path in
let _, modified = diff previous current in
let _, created = diff current previous in
let different = StringMap.merge
(fun path a b -> match a, b with
| Some f, _ -> Some f
| _, Some f -> Some f
| None, None -> None) modified created in
let ignore, rest = diff_whitelist different (List.assoc Ignore whitelist) in
let trash, rest = diff_whitelist rest (List.assoc Trash whitelist) in
let persist, rest = diff_whitelist rest (List.assoc Persist whitelist) in
match !mode with
| Summarise ->
Printf.printf "Total: %s\n" (string_of_stats (stats different));
Printf.printf "Whitelist/Ignore: %s\n" (string_of_stats (stats ignore));
if !verbose then show_files " " ignore;
Printf.printf "Whitelist/Trash: %s\n" (string_of_stats (stats trash));
if !verbose then show_files " " trash;
Printf.printf "Whitelist/Persist: %s\n" (string_of_stats (stats persist));
if !verbose then show_files " " persist;
Printf.printf "Unexpected: %s\n" (string_of_stats (stats rest));
show_files " " rest;
if rest <> StringMap.empty then exit 1;
exit 0;
| List ->
show_files "" rest
| Delete -> failwith "Unimplemented"
end else begin
Printf.printf "Creating initial database\n";
let current = of_dir !path in
to_file current !db
end