/
quality.py
735 lines (582 loc) · 21.8 KB
/
quality.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''This module contains functions to extract various bits of information from
FITS files related to their quality, e.g. header keys related to the
environment, image background levels, etc.
'''
#############
## LOGGING ##
#############
import logging
from fitsbits import log_sub, log_fmt, log_date_fmt
DEBUG = False
if DEBUG:
level = logging.DEBUG
else:
level = logging.INFO
LOGGER = logging.getLogger(__name__)
logging.basicConfig(
level=level,
style=log_sub,
format=log_fmt,
datefmt=log_date_fmt,
)
LOGDEBUG = LOGGER.debug
LOGINFO = LOGGER.info
LOGWARNING = LOGGER.warning
LOGERROR = LOGGER.error
LOGEXCEPTION = LOGGER.exception
#############
## IMPORTS ##
#############
import os.path
import pickle
import sys
import multiprocessing
# Ref: https://bugs.python.org/issue33725
# TLDR; Apple is trash at UNIX
if sys.platform == 'darwin':
mp = multiprocessing.get_context('forkserver')
else:
mp = multiprocessing
import numpy as np
import numpy.ma as npma
from ._coordutils import hms_str_to_decimal, dms_str_to_decimal
from .operations import (
read_fits,
compressed_fits_ext,
trim_image
)
from ._extractors import clean_fname
NCPUS = mp.cpu_count()
#######################
## FRAME WARP CHECKS ##
#######################
def check_frame_badwarp(
frame,
ext=None,
margins=50,
polyorder=(4,4),
max_ratio=(50.0, 50.0),
makeplot=False
):
'''This checks if a warped ("shifted" to some astrometric reference
coordinates) frame is bad, perhaps caused by a bad shift/convolution.
Calculates the median of the rows and columns of the image taking into
account the margin on either side (as specified by the margins kwarg). Then
fits a straight line to the trend. Then fits a polynomial of the specified
`polyorder`. The test for bad frame warps is then::
(chi-sq of straight line fit / chi-sq of polynomial fit) > max_ratio
Parameters
----------
frame : str
The input frame to process.
ext : int or None
The FITS extension containing the image to process. If None, determined
automatically.
margins : int
The margins around the edges of the frame to leave out of the fits.
polyorder : tuple of two ints
The order of the polynomial in x and y directions respectively to fit in
addition to a straight line fit.
max_ratio : tuple of two floats
The maximum ratio between the linear and polynomial fits in the x and y
directions that is allowed to consider this frame as OK and not badly
warped.
makeplot : bool
If True, plots the medians along the rows and columns and the respective
fits.
Returns
-------
dict
Returns an info dict containing the fit and x-y median info.
'''
compressed_check = compressed_fits_ext(frame)
if compressed_check and ext is None:
ext = compressed_check[0]
elif not compressed_check and ext is None:
ext = 0
image, header = read_fits(frame, ext=ext)
clippedimage = image[margins:-margins, margins:-margins]
# FIXME: this should probably be:
# yimagecoordnum, ximagecoordnum = clippedimage.shape
# and then use them separately to make aranges in each dimension
imagecoordnum = np.arange(len(clippedimage))
# get the medians in the x and y directions
medx = np.nanmedian(clippedimage,axis=1)
medy = np.nanmedian(clippedimage,axis=0)
# fit a 1-degree polynomial
lin_xfitcoeffs = np.polyfit(imagecoordnum,medx,1)
lin_yfitcoeffs = np.polyfit(imagecoordnum,medy,1)
xpolyorder, ypolyorder = polyorder
# fit a polyorder-degree polynomial
poly_xfitcoeffs = np.polyfit(imagecoordnum,medx,xpolyorder)
poly_yfitcoeffs = np.polyfit(imagecoordnum,medy,ypolyorder)
# linfit polynomial
lin_xfitpoly = np.poly1d(lin_xfitcoeffs)
lin_yfitpoly = np.poly1d(lin_yfitcoeffs)
# polyorderfit polynomial
poly_xfitpoly = np.poly1d(poly_xfitcoeffs)
poly_yfitpoly = np.poly1d(poly_yfitcoeffs)
lin_xfit = lin_xfitpoly(imagecoordnum)
lin_yfit = lin_yfitpoly(imagecoordnum)
poly_xfit = poly_xfitpoly(imagecoordnum)
poly_yfit = poly_yfitpoly(imagecoordnum)
lin_xfit_redchisq = (
np.sum((medx - lin_xfit)*(medx - lin_xfit))/(len(imagecoordnum) - 2)
)
lin_yfit_redchisq = (
np.sum((medy - lin_yfit)*(medy - lin_yfit))/(len(imagecoordnum) - 2)
)
poly_xfit_redchisq = (
np.sum((medx - poly_xfit)*(medx - poly_xfit))/(len(imagecoordnum) -
xpolyorder)
)
poly_yfit_redchisq = (
np.sum((medy - poly_yfit)*(medy - poly_yfit))/(len(imagecoordnum) -
ypolyorder)
)
check_ratio_x = lin_xfit_redchisq/poly_xfit_redchisq
check_ratio_y = lin_yfit_redchisq/poly_yfit_redchisq
if check_ratio_x > max_ratio[0] and check_ratio_y > max_ratio[1]:
warp_ok = False
else:
warp_ok = True
warpinfo = {
'medx':medx,
'medy':medy,
'lin_xfitpoly':lin_xfitpoly,
'lin_yfitpoly':lin_yfitpoly,
'lin_xfit':lin_xfit,
'lin_yfit':lin_yfit,
'lin_xfit_redchisq':lin_xfit_redchisq,
'lin_yfit_redchisq':lin_yfit_redchisq,
'poly_xfitpoly':poly_xfitpoly,
'poly_yfitpoly':poly_yfitpoly,
'poly_xfit':poly_xfit,
'poly_yfit':poly_yfit,
'poly_xfit_redchisq':poly_xfit_redchisq,
'poly_yfit_redchisq':poly_yfit_redchisq,
'check_ratio_x': check_ratio_x,
'check_ratio_y': check_ratio_y,
'warp_ok':warp_ok,
'kwargs':{
'frame':frame,
'ext':ext,
'margins':margins,
'polyorder':polyorder,
'max_ratio':max_ratio,
}
}
if makeplot:
try:
import matplotlib.pyplot as plt
fig = plt.figure(1, figsize=(6.4*2,4.8),clear=True)
ax1, ax2 = fig.subplots(ncols=2, nrows=1)
ax1.plot(imagecoordnum, medx, 'k-')
ax1.plot(imagecoordnum, lin_xfit, 'r-', alpha=0.5, label='linear')
ax1.plot(imagecoordnum, poly_xfit, 'b-', alpha=0.5,
label='polyorder %s' % xpolyorder)
ax1.set_xlabel('image coord number')
ax1.set_ylabel('x medians, linear fit, and poly fit')
ax2.plot(imagecoordnum, medy, 'k-')
ax2.plot(imagecoordnum, lin_yfit, 'r-', alpha=0.5, label='linear')
ax2.plot(imagecoordnum, poly_yfit, 'b-', alpha=0.5,
label='polyorder %s' % xpolyorder)
ax2.set_xlabel('image coord number')
ax2.set_ylabel('y medians, linear fit, and poly fit')
savename = '%s-warp-check.png' % clean_fname(frame)
fig.savefig(savename, bbox_inches='tight', dpi=100)
plt.close('all')
LOGINFO('Wrote frame-warp diagnostic plot for %s to %s' %
(frame, savename))
# add the diagnostic plot to the warpinfo dict
warpinfo['plot'] = os.path.abspath(savename)
except Exception:
LOGEXCEPTION("Could not make a warp-check plot. "
"Is matplotlib installed?")
return warpinfo
######################
## FRAME BACKGROUND ##
######################
def extract_img_background(frame,
ext=None,
custom_limits=None,
median_diffbelow=200.0,
image_min=None):
'''
This extracts the background of the image array provided:
- masks the array to only values between the median and the min of flux
img_array = image to find the background for
custom_limits = use this to provide custom median and min limits for the
background extraction
median_diffbelow = subtract this value from the median to get the upper
bound for background extraction
image_min = use this value as the lower bound for background extraction
'''
if isinstance(frame, str):
compressed_check = compressed_fits_ext(frame)
if compressed_check and ext is None:
ext = compressed_check[0]
elif not compressed_check and ext is None:
ext = 0
image, header = read_fits(frame, ext=ext)
elif isinstance(frame, tuple):
image, header = frame
if not custom_limits:
backmax = np.nanmedian(image) - median_diffbelow
backmin = image_min if image_min is not None else np.nanmin(image)
else:
backmin, backmax = custom_limits
masked = npma.masked_outside(image, backmin, backmax)
backmasked = npma.median(masked)
return backmasked
######################################
## FRAME SOURCE AND PHOTOMETRY INFO ##
######################################
def tile_frame(image, tilesize=128):
'''
This returns a list of image tiles and the number of tiles in x, y.
'''
# tile the image
ntiles_x = int(np.floor(image.shape[1]/tilesize))
ntiles_y = int(np.floor(image.shape[0]/tilesize))
if ntiles_x != ntiles_y:
ntiles_y = ntiles_x
tiles = [
image[
i*tilesize:i*tilesize +
tilesize,
j*tilesize:j*tilesize +
tilesize
] for i in range(ntiles_y) for j in range(ntiles_x)
]
return tiles, ntiles_x, ntiles_y
def frame_source_info(
fits,
stats_only=False,
trim_frame=True,
trim_headerkeys=('TRIMSEC','DATASEC','TRIMSEC0'),
calibrated_imagetype_headerkey='calibratedobject',
fits_ext=None,
fits_racenter_key='RA',
fits_racenter_unit='hr',
fits_declcenter_key='DEC',
fits_declcenter_unit='deg',
fits_background_mediandiffbelow=0.0,
fits_background_tilesize=128,
# these are used by HAT Survey instruments
fits_environ_keys=(
'Z',
'MOONDIST',
'MOONELEV',
'MOONPH',
'HA',
'WIND',
'HUMIDITY',
'SKYTDIFF',
'AMBTEMP',
'DEWPT',
'CCDTEMP',
'CCDSTEMP',
'SUNDIST',
'SUNELEV',
'MNTSTATE',
'CHOMEHA',
'CHOMEDEC',
'IHUALM',
'IHUALT',
'IHUFOC',
),
# these are also used by HAT Survey instruments
fits_extra_keys=(
'CMSERIAL',
'EXPTIME',
'MEXPTIME',
'VERSION',
'STNAME',
'STVER',
'STCOM',
'OBSERVAT',
'SITEID',
'SITEVER',
'SITELAT',
'SITELONG',
'SITEALT',
'MTVER',
'CMID',
'CMVER',
'TELID',
'TELVER',
'FOV',
'BIASVER',
'DARKVER',
'FLATVER',
'PROJID',
'PROJTITL',
'FILID',
'FILTERS',
'FOCUS',
'IMAGETYP',
'OBJECT',
'JD',
'MIDDATE',
'MIDTIME',
'PTVER',
'HATP_SVN',
# these are added by HATPI's PIPE-TrEx pipeline at various stages
'PIPETREX',
'MSTRBIAS',
'MSTRDARK',
'MSTRFLAT',
'FRNGCORR',
'RAWFRAME',
'CMBCVTAG',
'KERNSPEC',
'CONVTRGT',
'SUBTYPE',
'REFHDTAG',
'TGTHDTAG',
'REFERIMG',
'TARGTIMG',
'COMBLIST',
'NCOMBINE',
'COMBNTYP',
'MEDNTEMP',
# these are added by FITSbits
'FITSBITS',
'PROCDT',
),
extra_info_dict=None,
writepickle=False,
pickleprefix=None,
):
'''This collects the following information about a frame:
- all of the values in the fits_environ_keys from the FITS header
- the overall median background value of the frame
- the median background value of the frame in tiles over the frame
- the MAD of the tiled background across the frame
Parameters
----------
fits : str or tuple of two elements
If provided as a str, is the name of the FITS file to process. If
provided as a tuple, this must contain the image to work on as an
2D np.array in the first element and the image header as an
astropy.io.fits.Header object in the second element.
stats_only : bool
If True, will return early with the image stats only. If False, will
process headers and accompanying photometry info as well.
trim_frame : bool
If True, will trim the frame according to the header keys provided in
the ``trim_headerkeys`` kwarg.
trim_headerkeys : sequence of str
The header keys to try one after the other when trying to get the
TRIMSEC/DATASEC from the FITS header.
calibrated_imagetype_headerkey : str
This gives the IMAGETYP FITS header key value associated with the
calibrated image. NOTE: PIPE-TrEx will use IMAGETYPE =
'calibratedobject' for any calibrated object image that it produces.
fits_ext : int
Sets the extension of the FITS file to process. If None, will choose the
first image-like extension automatically.
fits_racenter_key : str
The header key to use to extract the target field center right ascension
from the FITS.
fits_racenter_unit : {'deg', 'hms', 'hr'}
The unit of the right ascension value in the FITS header.
fits_declcenter_key : str
The header key to use to extract the target field center declination
from the FITS.
fits_declcenter_unit : {'deg', 'dms'}
The unit of the declination value in the FITS header.
fits_environ_keys : list or tuple
Sets the keys to extract from the FITS header for use as environmental
indicators for the FITS frame. These can later be selected on when
searching for good frames, etc.
fits_extra_keys : list or tuple
Sets extra keys to extract from the FITS header.
fits_background_mediandiffbelow : float, default: 0.0
Sets the value to be subtracted from the median value to form the final
background value. This effectively lowers the background value to a
value below the median. Useful if the median of an image is not
considered as a valid measure of its background and a lower value must
be adopted.
fits_background_tilesize : int, default: 256
Sets the x and y tilesize to use when generating estimates of the median
background level of the frame and the associated MAD.
extra_info_dict : dict or None
An arbitrary dict of information to be added to the output dict and
pickle.
writepickle : bool, default: False
Sets if a pickle should be created alongside the FITS file containing
all the info extracted from the FITS, fistar, and fiphot.
pickleprefix : str or None
This is a prefix to attach to the pickle file name. Useful to indicate
that the pickle was produced in a specific processing stage, for
example.
Returns
-------
dict
Returns a dict with all of the values as key:val pairs and paths to the
FITS. This can be used to insert stuff into a database easily.
'''
fits_info = {}
# read in the FITS image
if isinstance(fits, str):
img, hdr = read_fits(fits, ext=fits_ext)
elif isinstance(fits, tuple):
img, hdr = fits
# trim the image if told to do so
if trim_frame is True and trim_headerkeys is not None:
img = trim_image(img, hdr, trimkeys=trim_headerkeys)
fits_info['trimmed'] = True
else:
fits_info['trimmed'] = False
fits_info['shape'] = img.shape
# check if the image is a raw image
if (hdr['IMAGETYP'] == calibrated_imagetype_headerkey):
fits_info['image_is_calibrated'] = True
else:
fits_info['image_is_calibrated'] = False
#
# 1. calculate image stats
#
# calculate the overall background
fits_info['overall_background_median'] = extract_img_background(
fits,
ext=fits_ext,
median_diffbelow=fits_background_mediandiffbelow
)
# get the overall stats
fits_info['overall_mean'] = np.nanmean(img)
fits_info['overall_stdev'] = np.nanstd(img)
fits_info['overall_median'] = np.nanmedian(img)
fits_info['overall_mad'] = np.nanmedian(
np.abs(img - fits_info['overall_median'])
)
fits_info['overall_stdev_from_mad'] = 1.4826*fits_info['overall_mad']
# tile the frame
tiles, ntiles_x, ntiles_y = tile_frame(
img,
tilesize=fits_background_tilesize
)
fits_info['ntiles_x'] = ntiles_x
fits_info['ntiles_y'] = ntiles_y
# get the stats for each tile
tile_means = [np.nanmean(x) for x in tiles]
tile_medians = [np.nanmedian(x) for x in tiles]
tile_mads = [np.nanmedian(np.abs(x - np.nanmedian(x))) for x in tiles]
tile_stdevs = [np.nanstd(x) for x in tiles]
tile_maxes = [np.nanmax(x) for x in tiles]
tile_percentiles = [np.nanpercentile(x,(5,95)) for x in tiles]
tile_ptps = [np.ptp(x) for x in tiles]
tile_medians_mean = np.nanmean(tile_medians)
tile_medians_stdev = np.nanstd(tile_medians)
fits_info['tile_medians_mean'] = tile_medians_mean
fits_info['tile_medians_stdev'] = tile_medians_stdev
tile_medians_median = np.nanmedian(tile_medians)
tile_medians_mad = np.nanmedian(np.abs(tile_medians - tile_medians_median))
fits_info['tile_medians_median'] = tile_medians_median
fits_info['tile_medians_mad'] = tile_medians_mad
# these are per-tile values of each item
fits_info['tile_means'] = np.reshape(np.array(tile_means),
(ntiles_y, ntiles_x))
fits_info['tile_medians'] = np.reshape(np.array(tile_medians),
(ntiles_y, ntiles_x))
fits_info['tile_mads'] = np.reshape(np.array(tile_mads),
(ntiles_y, ntiles_x))
fits_info['tile_maxes'] = np.reshape(np.array(tile_maxes),
(ntiles_y, ntiles_x))
fits_info['tile_stdevs'] = np.reshape(np.array(tile_stdevs),
(ntiles_y, ntiles_x))
fits_info['tile_percentiles'] = np.reshape(np.array(tile_percentiles),
(ntiles_y, ntiles_x, 2))
fits_info['tile_ptps'] = np.reshape(np.array(tile_ptps),
(ntiles_y, ntiles_x))
if stats_only:
return fits_info
#
# 2. get the header keys out of the image
#
# get the environ keys out of the header
for key in fits_environ_keys:
if key in hdr:
val = hdr[key]
fits_info[key.lower()] = val
else:
fits_info[key.lower()] = None
# get the extra keys out of the header
if isinstance(fits_extra_keys, (list, tuple)):
for key in fits_extra_keys:
if key in hdr:
val = hdr[key]
fits_info[key.lower()] = val
else:
fits_info[key.lower()] = None
# get the image center keys out of the header
center_ra = hdr[fits_racenter_key]
center_decl = hdr[fits_declcenter_key]
if fits_racenter_unit == 'hr':
center_ra = center_ra/24.0 * 360.0
elif fits_racenter_unit == 'hms':
center_ra = hms_str_to_decimal(center_ra)
if fits_declcenter_unit == 'dms':
center_decl = dms_str_to_decimal(center_decl)
fits_info['center_ra'] = center_ra
fits_info['center_decl'] = center_decl
# write the kwargs to the output dict
fits_info['kwargs'] = {
'fits':(os.path.abspath(fits)
if isinstance(fits, str) else 'from ndarray'),
'fits_ext':fits_ext,
'fits_racenter_key':fits_racenter_key,
'fits_racenter_unit':fits_racenter_unit,
'fits_declcenter_key':fits_declcenter_key,
'fits_declenter_unit':fits_declcenter_unit,
'fits_environ_keys':fits_environ_keys,
'fits_extra_keys':fits_extra_keys,
'fits_background_mediandiffbelow':fits_background_mediandiffbelow,
'fits_background_tilesize':fits_background_tilesize,
}
if extra_info_dict is not None and isinstance(extra_info_dict, dict):
fits_info.update(extra_info_dict)
if writepickle:
outpicklef = os.path.join(
os.path.abspath(os.path.dirname(fits)),
'%s-imginfo%s.pkl' % (
clean_fname(fits, basename=True),
'-%s' % pickleprefix if pickleprefix else ''
)
)
with open(outpicklef, 'wb') as outfd:
pickle.dump(fits_info, outfd, protocol=pickle.HIGHEST_PROTOCOL)
return fits_info
def _parallel_stats_worker(task):
'''
This is a parallel worker for getting frame stats.
'''
fits, kwargs = task
try:
stats = frame_source_info(fits, **kwargs)
LOGINFO('Stats for %s OK' % fits)
return stats
except Exception:
LOGEXCEPTION("Could not get stats for %s" % fits)
return None
def parallel_frame_stats(fits_list,
nworkers=NCPUS,
maxworkertasks=1000,
**worker_kwargs):
'''
This runs frame stats in parallel for list of FITS.
'''
tasks = [(fits, worker_kwargs) for fits in fits_list]
# initialize the pool of workers
pool = mp.Pool(nworkers, maxtasksperchild=maxworkertasks)
# fire up the pool of workers
results = pool.map(_parallel_stats_worker, tasks)
# wait for the processes to complete work
pool.close()
pool.join()
resultdict = {x:y for (x,y) in zip(fits_list, results)}
return resultdict