# This code is a part of the Democratising Archival X-ray Astronomy (DAXA) module.
# Last modified by David J Turner (turne540@msu.edu) 23/04/2024, 17:33. Copyright (c) The Contributors
import os.path
from random import randint
from astropy.units import Quantity, UnitConversionError
from daxa import NUM_CORES
from daxa.archive.base import Archive
from daxa.exceptions import NoDependencyProcessError
from daxa.process._cleanup import _last_process
from daxa.process.erosita._common import _esass_process_setup, ALLOWED_EROSITA_MISSIONS, esass_call, _is_valid_flag
[docs]
@_last_process(ALLOWED_EROSITA_MISSIONS, 1)
@esass_call
def cleaned_evt_lists(obs_archive: Archive, lo_en: Quantity = Quantity(0.2, 'keV'),
hi_en: Quantity = Quantity(10, 'keV'), flag: int = 0xc0000000, flag_invert: bool = True,
pattern: int = 15, num_cores: int = NUM_CORES, disable_progress: bool = False,
timeout: Quantity = None):
"""
The function wraps the eROSITA eSASS task evtool, which is used for selecting events.
This has been tested up to evtool v2.10.1
This function is used to apply the soft-proton filtering (along with any other filtering you may desire, including
the setting of energy limits) to eROSITA event lists, resulting in the creation of sets of cleaned event lists
which are ready to be analysed.
:param Archive obs_archive: An Archive instance containing eROSITA mission instances with observations for
which cleaned event lists should be created. This function will fail if no eROSITA missions are present in
the archive.
:param Quantity lo_en: The lower bound of an energy filter to be applied to the cleaned, filtered, event lists. If
'lo_en' is set to an Astropy Quantity, then 'hi_en' must be as well. Default is 0.2 keV, which is the
minimum allowed by the eROSITA toolset. Passing None will result in the default value being used.
:param Quantity hi_en: The upper bound of an energy filter to be applied to the cleaned, filtered, event lists. If
'hi_en' is set to an Astropy Quantity, then 'lo_en' must be as well. Default is 10 keV, which is the
maximum allowed by the eROSITA toolset. Passing None will result in the default value being used.
:param int flag: FLAG parameter to select events based on owner, information, rejection, quality, and corrupted
data. The eROSITA website contains the full description of event flags in section 1.1.2 of the following link:
https://erosita.mpe.mpg.de/edr/DataAnalysis/prod_descript/EventFiles_edr.html. The default parameter will
select all events flagged as either singly corrupt or as part of a corrupt frame.
:param bool flag_invert: If set to True, this function will discard all events selected by the flag parameter.
This is the default behaviour.
:param int pattern: Selects events of a certain pattern chosen by the integer key. The default of 15 selects
all four of the recognized legal patterns.
:param int num_cores: The number of cores to use, default is set to 90% of available.
:param bool disable_progress: Setting this to true will turn off the eSASS generation progress bar.
:param Quantity timeout: The amount of time each individual process is allowed to run for, the default is None.
Please note that this is not a timeout for the entire cleaned_evt_lists process, but a timeout for individual
ObsID-Inst-subexposure processes.
"""
# Run the setup for eSASS processes, which checks that eSASS is installed, checks that the archive has at least
# one eROSITA mission in it, and shows a warning if the eROSITA missions have already been processed
esass_in_docker = _esass_process_setup(obs_archive)
# We ensure that if a null value is passed the lo_en and hi_en values revert to default behaviour
if lo_en is None:
lo_en = Quantity(0.2, 'keV')
if hi_en is None:
hi_en = Quantity(10.0, 'keV')
# Checking user's choice of energy limit parameters
if not isinstance(lo_en, Quantity) or not isinstance(hi_en, Quantity):
raise TypeError("The lo_en and hi_en arguments must be astropy quantities in units "
"that can be converted to keV.")
# Have to make sure that the energy bounds are in units that can be converted to keV (which is what evtool
# expects for these arguments).
elif not lo_en.unit.is_equivalent('eV') or not hi_en.unit.is_equivalent('eV'):
raise UnitConversionError("The lo_en and hi_en arguments must be astropy quantities in units "
"that can be converted to keV.")
# Checking that the upper energy limit is not below the lower energy limit
elif hi_en <= lo_en:
raise ValueError("The hi_en argument must be larger than the lo_en argument.")
# Converting to the right unit
else:
lo_en = lo_en.to('keV')
hi_en = hi_en.to('keV')
# Checking user's lo_en and hi_en inputs are in the valid energy range for eROSITA
if (lo_en < Quantity(200, 'eV') or lo_en > Quantity(10000, 'eV')) or \
(hi_en < Quantity(200, 'eV') or hi_en > Quantity(10000, 'eV')):
raise ValueError("The lo_en and hi_en value must be between 0.2 keV and 10 keV.")
# The eSASS software has a bug when the user specifies the flag inversion parameter
# so for the moment we wont let the user chose the flag
if flag != 0xc0000000:
raise NotImplementedError("DAXA currently doesn't support flag selection due to a bug "
"within the eSASS software.")
# Checking user has input the flag parameter as an integer
#if not isinstance(flag, int):
# raise TypeError("The flag parameter must be an integer.")
# Checking the input is a valid hexidecimal number
#if not _is_valid_flag(flag):
# raise ValueError("{} is not a valid eSASS flag, see the eROSITA website"
# " for valid flags.".format(flag))
if not flag_invert:
raise NotImplementedError("DAXA currently doesn't support flag selection due to a bug "
"within the eSASS software.")
# Checking user has input flag_invert as a boolean
if not isinstance(flag_invert, bool):
raise TypeError("The flag_invert parameter must be a boolean.")
# Checking user has input pattern as an integer
if not isinstance(pattern, int):
raise TypeError("The pattern parameter must be an integer between 1 and 15 inclusive.")
# Checking user has input a valid pattern
if pattern <= 0 or pattern >= 16:
raise ValueError("Valid eROSITA patterns are between 1 and 15 inclusive")
# Converting the parameters to the correct format for the esass command
lo_en = lo_en.value
hi_en = hi_en.value
#if flag_invert:
# flag_invert = 'yes'
#else:
# flag_invert = 'no'
# Define the form of the evtool command that must be run for event list filtering to take place
evtool_cmd = "cd {d}; evtool eventfiles={ef} gti=FLAREGTI outfile={of} pattern={p} " \
"emin={emin} emax={emax}; mv {of} {fep}; rm -r {d}"
#evtool_cmd = "cd {d}; evtool eventfiles={ef} gti=FLAREGTI outfile={of} pattern={p} " \
# " flag={f} flag_invert={fi} emin={emin} emax={emax}; mv {of} {fep}; rm -r {d}"
# Sets up storage dictionaries for bash commands, final file paths (to check they exist at the end), and any
# extra information that might be useful to provide to the next step in the generation process
miss_cmds = {}
miss_final_paths = {}
miss_extras = {}
# Just grabs the eROSITA missions, we already know there will be at least one because otherwise
# _esass_process_setup would have thrown an error
erosita_miss = [mission for mission in obs_archive if mission.name in ALLOWED_EROSITA_MISSIONS]
# We are iterating through erosita missions (options could include erosita_cal_pv for instance).
for miss in erosita_miss:
# Sets up the top level keys (mission name) in our storage dictionaries
miss_cmds[miss.name] = {}
miss_final_paths[miss.name] = {}
miss_extras[miss.name] = {}
# This method will fetch the valid data (ObsID, Instruments) that can be processed
all_obs_info = obs_archive.get_obs_to_process(miss.name)
# Checking that any valid observations are left after the get_obs_to_process function is run
if len(all_obs_info) == 0:
raise FileNotFoundError("No valid observations have been found, so cleaned_evt_lists may not be run.")
# all_obs_info is a list of lists, where each list is of the format: [ObsID, Inst, 'usable'].
# There is a new list for each instrument, but I just want to loop over the ObsID in the following
# bit of code,
# I also want to know all the instruments that the ObsID contains events for
# So here I am just making a dictionary of the format: {ObsID: insts}
# Getting unique obs_ids in all_obs_info
obs_ids = list(set([all_obs_info_list[0] for all_obs_info_list in all_obs_info]))
obs_info_dict = {}
for obs in obs_ids:
# Collecting all the insts that a certain ObsID has events for
insts = ''.join([all_obs_info_list[1] for all_obs_info_list in all_obs_info if obs in all_obs_info_list])
# The insts are all TM{x} where x is a number from 1-7, we want to separate them with _ for the file names
obs_info_dict[obs] = '_'.join("TM"+ch for ch in insts if ch.isdigit())
# Counter for number of ObsIDs that flaregti has not been run successfully on
bad_obs_counter = 0
# We iterate through the valid identifying information
for obs_id in obs_info_dict:
try:
# Checking that flaregti has been run successfully on this observation so that it can be cleaned
# Then only writing a command for ObsIDs that have had flaregti successfully run on them
obs_archive.check_dependence_success(miss.name, obs_id, 'flaregti')
# Getting the insts associated with this obs for file naming purposes
insts = obs_info_dict[obs_id]
# Search through the process_extra_info attribute of the archive to find the paths
# to the event lists
evt_list_file = obs_archive._process_extra_info[miss.name][obs_id]['path']
# This path is guaranteed to exist, as it was set up in _esass_process_setup. This is where output
# files will be written to.
dest_dir = obs_archive.construct_processed_data_path(miss, obs_id)
# Set up a temporary directory to work in (probably not really necessary in this case, but will be
# in other processing functions).
temp_name = "tempdir_{}".format(randint(0, 1e+8))
temp_dir = dest_dir + temp_name + "/"
# Setting the paths to the output cleaned event list file
filt_evt_name = "obsid{o}-inst{i}-subexpALL-en{l}_{u}keV-finalevents.fits".format(i=insts, l=lo_en,
u=hi_en, o=obs_id)
filt_evt_path = os.path.join(dest_dir, 'events', filt_evt_name)
# The path that needs to exist is the filtered event list
final_paths = [filt_evt_path]
# If it doesn't already exist then we will create commands to generate it
# TODO Need to decide which file to check for here to see whether the command has already been run
# Make the temporary directory (it shouldn't already exist but doing this to be safe)
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
cmd = evtool_cmd.format(d=temp_dir, ef=evt_list_file, of=filt_evt_name, f=flag, fi=flag_invert,
p=pattern, emin=lo_en, emax=hi_en, fep=filt_evt_path)
# Now store the bash command, the path, and extra info in the dictionaries
miss_cmds[miss.name][obs_id] = cmd
miss_final_paths[miss.name][obs_id] = final_paths
miss_extras[miss.name][obs_id] = {'final_evt': filt_evt_path, 'flag': flag, 'flag_invert': flag_invert,
'pattern': pattern}
except NoDependencyProcessError:
# If archive.check_dependence_success raises this error, it means flaregti was not run
# successfully, and so a warning will be raised saying this observation has not been cleaned
bad_obs_counter += 1
pass
# TODO THIS SHOULD BE REMOVED WHEN I'VE MADE SURE THE DEPENDENCY CHECKER WORKS FOR EROSITA
# If no observations have had flaregti run successfully, then no events can be cleaned
if bad_obs_counter == len(obs_info_dict):
raise NoDependencyProcessError("The required process flaregti has not been run successfully "
"for any data in {mn}".format(mn=miss.name))
# This is just used for populating a progress bar during the process run
process_message = 'Generating final event lists'
return (miss_cmds, miss_final_paths, miss_extras, process_message, num_cores, disable_progress, timeout,
esass_in_docker)