This repository has been archived by the owner on Oct 26, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathseeing.py
1073 lines (907 loc) · 41.5 KB
/
seeing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#! /usr/bin/env python2
# Copyright (c) 2012 Victor Terron. All rights reserved.
# Institute of Astrophysics of Andalusia, IAA-CSIC
#
# This file is part of LEMON.
#
# LEMON is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division
description = """
This module runs on parallel SExtractor on all the input FITS files, fully
leveraging multiple processors available on the machine. Both images with a too
large full width at half maximum) or elongation (ratio between its semi-major
and semi-minor axis lengths) are automatically discarded by fitting a Gaussian
distribution to the values and excluding those which are above a certain
sigma. Finally, the 'best-seeing' image, the optimal one on which to identify
sources, is found by identifying among the non-discarded images that whose
number of sources is at a given percentile and which has the best FWHM.
"""
import atexit
import logging
import multiprocessing
import numpy
import optparse
import os
import os.path
import scipy.stats
import scipy.signal
import shutil
import sys
import tempfile
import time
# LEMON modules
import util
import astromatic
import customparser
import defaults
import fitsimage
import keywords
import style
class FITSeeingImage(fitsimage.FITSImage):
"""High-level interface to the SExtractor catalog of each FITS image.
As running SExtractor on an image is a CPU-expensive operation, in the
order of seconds, this class uses the local filesystem as a cache, so that
SExtractor does not have to be unnecessarily run twice. To achieve this,
the SExtractor catalog is saved to a temporary file, whose path is stored
in the FITS header of the image. In order to prevent an outdated catalog
(that is, if the SExtractor configuration files are modified in between),
the MD5 hash of the configuration files is also stored.
"""
def __init__(self, path, maximum, margin, coaddk=keywords.coaddk):
"""Instantiation method for the FITSeeingImage class.
The path to the SExtractor catalog is read from the FITS header: if the
keyword is not found, or if it is present but refers to a non-existent
file, SExtractor has to be executed again. Even if the catalog exists,
however, the MD5 of the SExtractor configuration files that were used
when the catalog was created must be the same.
The 'maximum' parameter determines the pixel value above which it is
considered saturated. This value depends not only on the CCD, but also
on the number of coadded images. If three images were coadded, for
example, the effective saturation level equals three times the value of
'saturation'. The number of effective coadds is read from the 'coaddk'
parameter. If the keyword is missing, a value of one (that is, that the
image consisted of a single exposure) is assumed.
The 'margin' argument gives the width, in pixels, of the areas adjacent
to the edges of the image that are to be ignored when detecting sources
on the reference image. Stars whose center is fewer than this number of
pixels from any border of the FITS image are not considered.
"""
super(FITSeeingImage, self).__init__(path)
self.margin = margin
msg = "%s: width of margin: %d pixels" % (self.path, self.margin)
logging.debug(msg)
# Compute the MD5 hash of the SExtractor configuration files and this
# saturation level, which overrides the definition of SATUR_LEVEL.
satur_level = self.saturation(maximum, coaddk=coaddk)
options = dict(SATUR_LEVEL=str(satur_level))
sex_md5sum = astromatic.sextractor_md5sum(options=options)
msg = "%s: SExtractor MD5 hash: %s" % (self.path, sex_md5sum)
logging.debug(msg)
try:
try:
self.catalog_path = self.read_keyword(keywords.sex_catalog)
except KeyError:
msg = "%s: keyword '%s' not found"
logging.debug(msg % (self.path, keywords.sex_catalog))
raise
# The path not only must be stored: the catalog must also exist
msg = "%s: on-disk catalog %s" % (self.path, self.catalog_path)
if not os.path.exists(self.catalog_path):
logging.debug(msg + " does not exist")
raise IOError
else:
logging.debug(msg)
try:
img_sex_md5sum = self.read_keyword(keywords.sex_md5sum)
msg = "%s: on-disk catalog SExtractor MD5 hash (%s): %s"
args = self.path, keywords.sex_md5sum, img_sex_md5sum
logging.debug(msg % args)
except KeyError:
msg = "%s: keyword '%s' not found"
logging.debug(msg % (self.path, keywords.sex_md5sum))
raise
if img_sex_md5sum != sex_md5sum:
msg = "%s: outdated catalog (SExtractor hashes do not match)"
logging.debug(msg % self.path)
try:
os.unlink(self.catalog_path)
msg = "%s: outdated catalog removed from disk" % self.path
logging.debug(msg)
except (IOError, OSError), e:
msg = "%s: could not remove outdated catalog (%s)"
logging.warning(msg % (self.path, e))
finally:
raise ValueError
# This point only reached if on-disk catalog can be reused
msg = "%s: on-disk catalog exists and MD5 hashes match. Yay!"
logging.debug(msg % self.path)
except (KeyError, IOError, ValueError):
msg = (
"%s: could not reuse an existing, on-disk cached catalog; "
"SExtractor must be run"
) % self.path
logging.debug(msg)
# Redirect standard and error outputs to null device
with open(os.devnull, "wt") as fd:
logging.info("%s: running SExtractor" % self.path)
self.catalog_path = astromatic.sextractor(
self.path, options=options, stdout=fd, stderr=fd
)
logging.debug("%s: SExtractor OK" % self.path)
try:
# Update the FITS header with the path and MD5; give up
# silently in case we do not have permissions to do it
# (IOError) or if the length of the HIERARCH keyword, equal
# sign and value is longer than 80 characters (ValueError,
# see FITSImage.update_keyword() for details). The cast to
# str is needed because PyFITS has complained sometimes
# about "illegal values" if it receives a Unicode string.
self.update_keyword(keywords.sex_catalog, str(self.catalog_path))
self.update_keyword(keywords.sex_md5sum, sex_md5sum)
except (IOError, ValueError):
pass
@property
@util.memoize
def catalog(self):
"""Return the SExtraxtor catalog of the FITS image.
The method returns the SExtractor catalog of the FITS image (a
astromatic.Catalog instance), excluding from it those stars that are
too close to the image edges, according to the value 'margin' value
given when the object was instantiated.
As parsing the catalog file and constructing the astromatic.Catalog
instance requires a disk access, the catalog is memoized in order to
speed up our code. Note that this means that on-disk modifications of
the catalog (although this is something you should not be doing,
anyway) will not be reflected after the first call to this method.
"""
# Catalogs are a subclass of tuple and cannot therefore be modified, so
# we need to use a list comprehension first, to identify the stars that
# are not too close to the edges, and use from_sequence() to create a
# new Catalog instance with the selected stars.
self._ignored_sources = 0
catalog = astromatic.Catalog(self.catalog_path)
logging.info(
"Removing from the catalog objects too close to the "
"edges of %s" % self.path
)
logging.debug("Margin width: %d pixels" % self.margin)
logging.debug("Image size: (%d, %d)" % self.size)
non_discarded = []
for star in catalog:
logger_msg = "Star at %.3f, %.3f " % (star.x, star.y)
if (
star.x < self.margin
or star.x > (self.x_size - self.margin)
or star.y < self.margin
or star.y > (self.y_size - self.margin)
):
logging.debug(logger_msg + "ignored -- too close to edges")
self._ignored_sources += 1
else:
non_discarded.append(star)
logging.debug(logger_msg + "OK -- inside of margins")
if __debug__:
assert len(catalog) == len(non_discarded) + self._ignored_sources
for star in non_discarded:
assert star.x >= self.margin
assert star.x <= (self.x_size - self.margin)
assert star.y >= self.margin
assert star.y <= (self.y_size - self.margin)
return astromatic.Catalog.from_sequence(*non_discarded)
def __len__(self):
""" Return the number of stars detected by SExtractor in the image """
return len(self.catalog)
@property
def ignored(self):
"""Return the number of SExtractor detections ignored because they
were too close to the image edges"""
try:
return self._ignored_sources
# The _ignored_sources attribute does not exist until the SExtractor
# catalog is accessed the first time; so if this method is called
# before it happens, just access (and memoize) it and try again
except AttributeError:
self.catalog
return self.ignored
@property
def total(self):
"""Return the total number of detections in the SExtractor catalog,
ignored or not"""
return len(self) + self.ignored
def __getitem__(self, key):
"""Return the key-th star detected by SExtractor in the image --
obviously, ignored stars (too close to the edges) are not considered"""
return self.catalog[key]
@property
def coordinates(self):
"""Return a list with the celestial coordinates of the stars.
This method returns a list containing a Coordinates object for each
astronomical source that was detected by SExtractor and not ignored
because of its proximity to the image edge.
"""
return self.catalog.get_sky_coordinates()
def snr_percentile(self, per):
"""Return the score at the given percentile of the SNR of the stars.
This methods answers the question of what is the lowest SNR among a
given fraction of the stars with the best signal-to-noise ratio. Those
stars whose ADU counts are above the effective saturation level are
excluded from the computation. If the desired quantile lies between two
data points, we interpolate between them. Note that the score at
per = 50 is, by definition, the median.
The ValueError exception is raised in case no star can be used to
compute the percentile of the signal-to-noise ratio. This is usually
caused by a too low saturation level, although it may also happen with
small images of sparsely populated fields, for which the value of the
margin may cause all the stars to be ignored.
Arguments:
per - percentile at which to extract the score
"""
snrs = [star.snr for star in self if not star.saturated]
if not snrs:
raise ValueError(
"no stars available to compute the percentile SNR of '%s'" % self.path
)
# If empty, scoteatpercentile would raise IndexError
return scipy.stats.scoreatpercentile(snrs, per)
def fwhm(self, per=50, mode="median"):
"""Return the median (or mean) FWHM of the stars in the image.
The method returns the median (or mean) of the full width at half
maximum of the stars detected by SExtractor in the image. Those stars
whose ADU counts are above the effective saturation level are excluded
from the computation. Also are excluded stars with too low a SNR (see
the 'per' keyword argument), in order to prevent false detections and
faint objets from distorting the reliability of the result.
The ValueError exception is raised in case no star can be used to
compute the FWHM. This is usually caused by a too low saturation level,
although it may also happen with small images of sparsely populated
fields, for which the value of the margin may cause all the stars to
be ignored.
The FWHM is computed thanks to the FLUX_RADIUS parameter, which
estimates the radius of the circle centered on the barycenter of the
star that encloses about half of the total flux. For a Gaussian profile,
this is equal to 1/2 FWHM, although, according to the author of
SExtractor, for most astronomical images it will be slightly higher.
[URL] http://www.astromatic.net/forum/showthread.php?tid=318
Keyword arguments:
per - the score at the given percentile of the signal-to-noise ratio of
the stars that are to be included in the calculation of the
FWHM. That is: stars whose SNR is below the 'per' percentile are
excluded when the FWHM of the stars in the image is calculated.
Note that the percentile is calculated taking into account only
the stars within the image margins.
mode - how the FWHM of the image is computed out of the FWHM of the
stars on it detected. Must by 'median' or 'mean'; otherwise,
ValueError is raised.
"""
if mode not in ("median", "mean"):
raise ValueError("'mode' must be 'median' or 'mean'")
# Find the signal-to-noise ratio of the image at the 'per' percentile.
# This will be the minimum SNR that a star must have for its FWHM to be
# taken into account when calculating the FWHM of the image as a whole.
snr = self.snr_percentile(per)
fwhms = []
for star in self:
# Ignore saturated and noisy stars
if star.saturated or star.snr < snr:
continue
else:
fwhms.append(star.fwhm)
if not fwhms:
# Exception needed, NumPy would return NaN for an empty list
raise ValueError("no stars available to compute the FWHM")
if mode == "median":
return numpy.median(fwhms)
else:
assert mode == "mean"
return numpy.mean(fwhms)
def elongation(self, per=50, mode="median"):
"""Return the median (or mean) elongation of the stars in the image.
The method returns the median (or mean) of the elongation of the stars
detected by SEXtractor in the image. Those stars whose ADU counts are
above the effective saturation level are excluded from the computation.
Also are excluded stars with too low a SNR (see the 'per' keyword
argument), in order to prevent false detections and faint objets from
distorting the reliability of the result.
The ValueError exception is raised in case no star can be used to
compute the elongation. This is usually caused by a too low saturation
level, although it may also happen with small images of sparsely
populated fields, for which the value of the margin may cause all
the stars to be ignored.
The elongation of an object is given in the SExtractor catalogs by the
ELONGATION parameter, and is defined as A/B, where A and B are its
semi-major and semi-minor axis lengths, respectively. More precisely,
A and B represent the maximum and minimum spatial rms of the object
profile along any direction. Therefore, the average elongation is
always >= 1.0, as A is known to be greater or equal than B.
Keyword arguments:
per - the score at the given percentile of the signal-to-noise ratio of
the stars that are to be included in the calculation of the
elongation. That is: stars whose SNR is below the 'per'
percentile are excluded when the elongation of the stars in the
image is calculated. Note that the percentile is calculated
taking into account only the stars within the image margins.
mode - how the elongation of the image is computed out of the
elongation of the stars on it detected. Must by 'median' or
'mean'; otherwise, ValueError is raised.
"""
if mode not in ("median", "mean"):
raise ValueError("'mode' must be 'median' or 'mean'")
# Find the signal-to-noise ratio of the image at the 'per' percentile.
snr = self.snr_percentile(per)
elongations = []
for star in self:
# Ignore saturated and noisy stars
if star.saturated or star.snr < snr:
continue
else:
elongations.append(star.elongation)
if not elongations:
# Exception needed, NumPy would return NaN for an empty list
raise ValueError("no stars available to compute the elongation")
if mode == "median":
return numpy.median(elongations)
else:
assert mode == "mean"
return numpy.mean(elongations)
# This Queue is global -- this works, but note that we could have
# passed its reference to the function managed by pool.map_async.
# See http://stackoverflow.com/a/3217427/184363
queue = util.Queue()
parser = customparser.get_parser(description)
parser.usage = "%prog [OPTION]... INPUT_IMGS... OUTPUT_DIR"
parser.add_option(
"--filename",
action="store",
type="str",
dest="bseeingfn",
default="best_seeing.fits",
help="filename with which the FITS image with the best "
"full width at half-maximum (FWHM) will be saved to "
"OUTPUT_DIR. If set to an empty string (''), the image "
"will retain its original filename, although --suffix will "
"still be appended (in other words, it is treated as all "
"the other images) [default: %default]",
)
parser.add_option(
"--maximum",
action="store",
type="int",
dest="maximum",
default=defaults.maximum,
help=defaults.desc["maximum"],
)
parser.add_option(
"--margin",
action="store",
type="int",
dest="margin",
default=defaults.margin,
help=defaults.desc["margin"],
)
parser.add_option(
"--snr-percentile",
action="store",
type="float",
dest="per",
default=defaults.snr_percentile,
help=defaults.desc["snr_percentile"],
)
parser.add_option(
"--mean", action="store_true", dest="mean", help=defaults.desc["mean"]
)
parser.add_option(
"--sources-percentile",
action="store",
type="float",
dest="stars_per",
default=75.0,
help="the best-seeing image is identified by finding the "
"one with the best (lowest) FWHM among the non-discarded "
"images whose number of detected sources is at this "
"percentile. Taking directly the image with the best FWHM "
"could not work as we need the image to also be one of the "
"most populated [default: %default]",
)
parser.add_option(
"--suffix",
action="store",
type="str",
dest="suffix",
default="s",
help="string to be appended to output images, before "
"the file extension, of course [default: %default]",
)
parser.add_option(
"--overwrite",
action="store_true",
dest="overwrite",
help="overwrite any output file if it already exists",
)
parser.add_option(
"--cores",
action="store",
type="int",
dest="ncores",
default=defaults.ncores,
help=defaults.desc["ncores"],
)
parser.add_option(
"-v",
"--verbose",
action="count",
dest="verbose",
default=defaults.verbosity,
help=defaults.desc["verbosity"],
)
fwhm_group = optparse.OptionGroup(
parser,
"Full width at half maximum",
"After SExtractor is run on the FITS files, the FWHM of each "
"image is computed by taking the median or mean of the stars "
"which center is not too close to the margins see --mean and "
"--margin options) and fitting a Gaussian distribution to the "
"values. Those images whose FWHM exceed the specified number "
"of standard deviations of the mean (mu + n x sigma) are "
"discarded",
)
fwhm_group.add_option(
"--fsigma",
action="store",
type="float",
dest="fwhm_sigma",
default=3.0,
help="the maximum number of standard deviations of "
"the mean; gives the maximum FWHM allowed for images "
"[default: %default]",
)
fwhm_group.add_option(
"--fwhm_dir",
action="store",
type="str",
dest="fwhm_dir",
default="fwhm_discarded",
help="subdirectory where automatically discarded "
"images because of their FWHM will be saved in "
"OUTPUT_DIR [default: %default]",
)
parser.add_option_group(fwhm_group)
elong_group = optparse.OptionGroup(
parser,
"Elongation",
"After images with a bad (excessively large) FWHM are "
"discarded, those elongated are identified and excluded by "
"using the same approach. The elongation of a star, given by "
"SExtractor, is defined as A/B, where A and B are its "
"semi-major and semi-minor axis lengths, respectively. More "
"precisely, A and B represent the maximum and minimum spatial "
"rms of the object profile along any direction. Elongated "
"images are usually caused by intermittent problems with the "
"telescope tracking.",
)
elong_group.add_option(
"--esigma",
action="store",
type="float",
dest="elong_sigma",
default=3.0,
help="the maximum number of standard deviations of "
"the mean; gives the maximum elongation allowed for "
"images [default: %default]",
)
elong_group.add_option(
"--elong_dir",
action="store",
type="str",
dest="elong_dir",
default="elong_discarded",
help="subdirectory where automatically discarded "
"images because of their elongation will be saved in "
"OUTPUT_DIR [default: %default]",
)
parser.add_option_group(elong_group)
key_group = optparse.OptionGroup(parser, "FITS Keywords", keywords.group_description)
key_group.add_option(
"--coaddk",
action="store",
type="str",
dest="coaddk",
default=keywords.coaddk,
help=keywords.desc["coaddk"],
)
key_group.add_option(
"--fwhmk",
action="store",
type="str",
dest="fwhmk",
default=keywords.fwhmk,
help="keyword to which to write the estimated FWHM "
"of of the image. This value is needed by subsequent "
"modules of the pipeline [default: %default]",
)
parser.add_option_group(key_group)
customparser.clear_metavars(parser)
@util.print_exception_traceback
def parallel_sextractor(args):
"""Run SExtractor and compute the FWHM and elongation of a FITS image.
This method is intended to be used with a multiprocessing' pool of workers.
It receives a two-element tuple, the path of an image and the 'instance'
object returned by optparse's parse_args(), containing the values for all
the options of the program, and runs SExtractor on the image. The median or
mean (depending whether the --mean option was given) FWHM and elongation of
the stars in the image is also computed. Nothing is returned; instead, the
result is saved to the global variable 'queue' as a five-element tuple: (1)
path of the input image, (2) path of the temporary output image (a copy of
the input image, with the path to the SExtractor catalog and the MD5 hash
of the configuration files written to its header), (3) FWHM, (4) elongation
and (5) number of objects that were detected by SExtractor. Nothing is
added to 'queue' in case an error is encountered.
"""
path, options = args
mode = options.mean and "mean" or "median"
try:
# FITSeeingImage.__init__() writes to the header of the FITS image the
# path to the SExtractor catalog and the MD5 hash of the configuration
# files. Work on a copy of the input image so as not to modify it.
kwargs = dict(prefix="%s_" % os.path.basename(path), suffix=".fits")
fd, output_path = tempfile.mkstemp(**kwargs)
os.close(fd)
shutil.copy2(path, output_path)
# Allow FITSeeingImage.__init__() to write to the FITS header
util.owner_writable(output_path, True) # chmod u+w
args = output_path, options.maximum, options.margin
kwargs = dict(coaddk=options.coaddk)
image = FITSeeingImage(*args, **kwargs)
fwhm = image.fwhm(per=options.per, mode=mode)
logging.debug("%s: FWHM = %.3f" % (path, fwhm))
elong = image.elongation(per=options.per, mode=mode)
logging.debug("%s: Elongation = %.3f" % (path, elong))
nstars = len(image)
logging.debug("%s: %d sources detected" % (path, nstars))
queue.put((path, output_path, fwhm, elong, nstars))
except fitsimage.NonStandardFITS:
logging.info("%s ignored (non-standard FITS)" % path)
return
# Raised if no star can be used to compute the FWHM or elongation
except ValueError, e:
logging.info("%s ignored (%s)" % (path, str(e)))
def main(arguments=None):
"""main() function, encapsulated in a method to allow for easy invokation.
This method follows Guido van Rossum's suggestions on how to write Python
main() functions in order to make them more flexible. By encapsulating the
main code of the script in a function and making it take an optional
argument the script can be called not only from other modules, but also
from the interactive Python prompt.
Guido van van Rossum - Python main() functions:
http://www.artima.com/weblogs/viewpost.jsp?thread=4829
Keyword arguments:
arguments - the list of command line arguments passed to the script.
"""
if arguments is None:
arguments = sys.argv[1:] # ignore argv[0], the script name
(options, args) = parser.parse_args(args=arguments)
# Adjust the logger level to WARNING, INFO or DEBUG, depending on the
# given number of -v options (none, one or two or more, respectively)
logging_level = logging.WARNING
if options.verbose == 1:
logging_level = logging.INFO
elif options.verbose >= 2:
logging_level = logging.DEBUG
logging.basicConfig(format=style.LOG_FORMAT, level=logging_level)
# Print the help and abort the execution if there are not two positional
# arguments left after parsing the options, as the user must specify at
# least one (only one?) input FITS file and the output directory
if len(args) < 2:
parser.print_help()
return 2 # 2 is generally used for command line syntax errors
else:
input_paths = args[:-1]
output_dir = args[-1]
# Make sure that the output directory exists, and create it if it doesn't.
# The subdirectories for discarded images are not yet created; we put this
# off until we know that at least one image is indeed going to be excluded.
util.determine_output_dir(output_dir)
fwhm_dir = os.path.join(output_dir, options.fwhm_dir)
elong_dir = os.path.join(output_dir, options.elong_dir)
print "%s%d paths given as input, on which sources will be detected." % (
style.prefix,
len(input_paths),
)
print "%sRunning SExtractor on all the FITS images..." % style.prefix
# Use a pool of workers and run SExtractor on the images in parallel!
pool = multiprocessing.Pool(options.ncores)
map_async_args = ((path, options) for path in input_paths if os.path.isfile(path))
result = pool.map_async(parallel_sextractor, map_async_args)
util.show_progress(0.0)
while not result.ready():
time.sleep(1)
util.show_progress(queue.qsize() / len(input_paths) * 100)
# Do not update the progress bar when debugging; instead, print it
# on a new line each time. This prevents the next logging message,
# if any, from being printed on the same line that the bar.
if logging_level < logging.WARNING:
print
result.get() # reraise exceptions of the remote call, if any
util.show_progress(100) # in case the queue was ready too soon
print
# Three sets, to keep the track of all the images on which SExtractor
# has been run and also of which have been discarded because of their
# unnaceptable FWHM or elongation ratio.
all_images = set()
fwhm_discarded = set()
elong_discarded = set()
# Dictionary mapping each input image to the temporary output file: a copy
# of the input image but whose FITS header has been updated with the path
# to the SExtractor catalog and the MD5 hash of the configuration files.
seeing_tmp_paths = dict()
# Extract the four-element tuples (path to the image, FWHM, elongation and
# number of sources detected by SExtractor) from the multiprocessing' queue
# and store the values in three independent dictionaries; these provide
# fast access, with O(1) lookup, to the data.
fwhms = {}
elongs = {}
nstars = {}
for _ in xrange(queue.qsize()):
path, output_tmp_path, fwhm, elong, stars = queue.get()
all_images.add(path)
seeing_tmp_paths[path] = output_tmp_path
# The clean-up function cannot be registered in parallel_sextractor()
# because it would remove the temporary FITS file when the process
# terminates (instead of when our program exits, which is what we
# need). Do it here, to make sure that whatever happens next these
# temporary files are always deleted.
atexit.register(util.clean_tmp_files, output_tmp_path)
fwhms[path] = fwhm
elongs[path] = elong
nstars[path] = stars
if not all_images:
print "%sError. No FITS images were detected." % style.prefix
print style.error_exit_message
return 1
# Let's first discard those images with a bad full width at half maximum.
# In order to to this, we fit a normal distribution (assuming the FWHMs to
# be Gaussian distributed) and define the maximum allowed value as that
# which exceeds the specified number of standard deviations of the mean.
print "%sFitting a Gaussian distribution to the FWHMs..." % style.prefix,
sys.stdout.flush()
logging.debug("Fitting a Gaussian distribution to the %d FWHMs" % len(fwhms))
mu, sigma = scipy.stats.norm.fit(fwhms.values())
logging.debug("FWHMs mean = %.3f" % mu)
logging.debug("FWHMs sigma = %.3f" % sigma)
print "done."
sys.stdout.flush()
print "%sFWHMs mean = %.3f, sigma = %.3f pixels" % (style.prefix, mu, sigma)
maximum_fwhm = mu + (options.fwhm_sigma * sigma)
logging.debug(
"Maximum allowed FWHM = %.3f + %.1f x %.3f = %.3f pixels"
% (mu, options.fwhm_sigma, sigma, maximum_fwhm)
)
print "%sDiscarding images with a FWHM > %.3f + %.1f x %.3f = %.3f pixels..." % (
style.prefix,
mu,
options.fwhm_sigma,
sigma,
maximum_fwhm,
)
# Exclude images by adding them to the FWHM-discarded set
for path, fwhm in sorted(fwhms.iteritems()):
if fwhm > maximum_fwhm:
fwhm_discarded.add(path)
logging.debug(
"%s discarded (FWHM = %.3f > %.3f" % (path, fwhm, maximum_fwhm)
)
print "%s%s discarded (FWHM = %.3f)" % (style.prefix, path, fwhm)
logging.info("Images discarded by FWHM: %d" % len(fwhm_discarded))
if not fwhm_discarded:
print "%sNo images were discarded because of their FWHM. Hooray!" % style.prefix
else:
discarded_fraction = len(fwhm_discarded) / len(all_images) * 100
nleft = len(all_images) - len(fwhm_discarded) # non-discarded images
print "%s%d FITS images (%.2f %%) discarded, %d remain" % (
style.prefix,
len(fwhm_discarded),
discarded_fraction,
nleft,
)
# Repeat the same approach, now with the elongation ratios. Images already
# discarded because of their FWHM are not even considered -- why discard
# them twice? They can simply be ignored.
print "%sFitting a Gaussian distribution to the elongations..." % style.prefix,
sys.stdout.flush()
mu, sigma = scipy.stats.norm.fit(elongs.values())
logging.debug("Elongations mean = %.3f" % mu)
logging.debug("Elongations sigma = %.3f" % sigma)
print "done."
sys.stdout.flush()
print "%sElongation mean = %.3f, sigma = %.3f pixels" % (style.prefix, mu, sigma)
maximum_elong = mu + (options.elong_sigma * sigma)
logging.debug(
"Maximum allowed elongation = %.3f + %.1f x %.3f = %.3f pixels"
% (mu, options.elong_sigma, sigma, maximum_elong)
)
print "%sDiscarding images with an elongation > %.3f + %.1f x %.3f = %.3f ..." % (
style.prefix,
mu,
options.elong_sigma,
sigma,
maximum_elong,
)
for path, elong in sorted(elongs.iteritems()):
# Ignore FWHM-discarded images
if path in fwhm_discarded:
logging.debug("%s ignored (already discarded by FWHM)" % path)
continue
elif elong > maximum_elong:
elong_discarded.add(path)
logging.debug(
"%s discarded (elongation = %.3f > %.3f" % (path, fwhm, maximum_elong)
)
print "%s%s discarded (elongation = %.3f)" % (style.prefix, path, elong)
logging.info("Images discarded by elongation: %d" % len(elong_discarded))
if not elong_discarded:
print "%sNo images were discarded because of their elongation. Yay!" % style.prefix
else:
initial_size = len(all_images) - len(fwhm_discarded)
discarded_fraction = len(elong_discarded) / initial_size * 100
nleft = initial_size - len(elong_discarded)
print "%s%d FITS images (%.2f %%) discarded, %d remain" % (
style.prefix,
len(elong_discarded),
discarded_fraction,
nleft,
)
# Finally, take the images whose number of stars is at the 'stars_per'
# percentile and select the one with the best FWHM. This will be our
# 'best-seeing' image, in which sources may be detected. Taking directly
# the image with the best FWHM may not work as we need the best-seeomg
# image to also be one of the most populated.
print "%sIdentifying the images whose number of detected sources it at " "the %.2f percentile..." % (
style.prefix,
options.stars_per,
),
sys.stdout.flush()
# Ignore discarded images, for whatever reason
logging.debug(
"Finding the %.2f percentile of the number of stars "
"detected by SExtractor" % options.stars_per
)
for path in fwhm_discarded.union(elong_discarded):
del nstars[path]
reason = "FWHM" if path in fwhm_discarded else "elongation"
logging.debug("%s ignored (was discarded by %s)" % (path, reason))
min_nstars = scipy.stats.scoreatpercentile(nstars.values(), options.stars_per)
print "done."
print "%sNumber of stars at percentile = %d, taking the images with at " "least this number of sources..." % (
style.prefix,
min_nstars,
),
sys.stdout.flush()
most_populated_images = [
path for path, stars in nstars.iteritems() if stars >= min_nstars
]
logging.debug(
"There are %s images with a number of stars at the %.2f "
"percentile" % (len(most_populated_images), options.stars_per)
)
logging.debug("Identifying the image with the lowest FWHM")
print "done."
print "%sFinally, finding the image with the lowest FWHM among these " "%d images..." % (
style.prefix,
len(most_populated_images),
),
sys.stdout.flush()
# Find the image with the best seeing (lowest FWHM)
best_seeing = min(most_populated_images, key=lambda path: fwhms[path])
logging.debug("Best-seeing image: %s" % path)
logging.debug("Best-seeing image FWHM = %.3f" % fwhms[best_seeing])
logging.debug("Best-seeing image elongation = %.3f" % elongs[best_seeing])
logging.debug("Best-seeing image sources = %d" % nstars[best_seeing])
assert best_seeing not in fwhm_discarded
assert best_seeing not in elong_discarded
print "done."
print "%sBest-seeing image = %s, with %d sources and a FWHM of %.3f pixels" % (
style.prefix,
best_seeing,
nstars[best_seeing],
fwhms[best_seeing],
)
# The subdirectories are created only if at least one image is going to be
# discarded. We do not want empty directories in case no image is discarded
# because of its full-width at half maximum (FWHM) or elongation.
if fwhm_discarded:
util.determine_output_dir(fwhm_dir, quiet=True)
if elong_discarded:
util.determine_output_dir(elong_dir, quiet=True)
# Finally, copy all the FITS images to the output directory
processed = 0
for path in sorted(all_images):
# Add the suffix to the basename of the FITS image
root, ext = os.path.splitext(os.path.basename(path))
output_filename = root + options.suffix + ext
logging.debug(
"Basename '%s' + '%s' becomes '%s'"
% (path, options.suffix, output_filename)
)
if path in fwhm_discarded:
output_path = os.path.join(fwhm_dir, output_filename)
logging.debug("%s was discarded because of its FWHM" % path)
logging.debug("%s to be copied to subdirectory %s" % (path, fwhm_dir))
history_msg1 = "Image discarded by LEMON on %s" % util.utctime()
history_msg2 = (
"[Discarded] FWHM = %.3f pixels, maximum allowed value = %.3f"
% (fwhms[path], maximum_fwhm)
)
elif path in elong_discarded:
output_path = os.path.join(elong_dir, output_filename)
logging.debug("%s was discarded because of its elongation ratio" % path)