CMS 3D CMS Logo

ProcessAcceleratorAlpaka.py
Go to the documentation of this file.
1 import FWCore.ParameterSet.Config as cms
2 
3 import os
4 
5 from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
6 
8  def __init__(self, accelerators, backend):
9  # first element is used as the default if nothing is set
10  self._valid_backends = []
11  if "gpu-nvidia" in accelerators:
12  self._valid_backends.append("cuda_async")
13  if "gpu-amd" in accelerators:
14  self._valid_backends.append("rocm_async")
15  if "cpu" in accelerators:
16  self._valid_backends.append("serial_sync")
17  if len(self._valid_backends) == 0:
18  raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "ModuleTypeResolverAlpaka had no backends available because of the combination of the job configuration and accelerator availability of on the machine. The job sees {} accelerators.".format(", ".join(accelerators)))
19  if backend is not None:
20  if not backend in self._valid_backends:
21  raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "The ProcessAcceleratorAlpaka was configured to use {} backend, but that backend is not available because of the combination of the job configuration and accelerator availability on the machine. The job was configured to use {} accelerators, which translates to {} Alpaka backends.".format(
22  backend, ", ".join(accelerators), ", ".join(self._valid_backends)))
23  if backend != self._valid_backends[0]:
24  self._valid_backends.remove(backend)
25  self._valid_backends.insert(0, backend)
26 
27  def plugin(self):
28  return "ModuleTypeResolverAlpaka"
29 
30  def setModuleVariant(self, module):
31  if module.type_().endswith("@alpaka"):
32  defaultBackend = self._valid_backends[0]
33  if hasattr(module, "alpaka"):
34  if hasattr(module.alpaka, "backend"):
35  if module.alpaka.backend == "":
36  module.alpaka.backend = defaultBackend
37  elif module.alpaka.backend.value() not in self._valid_backends:
38  raise cms.EDMException(cms.edm.errors.UnavailableAccelerator, "Module {} has the Alpaka backend set explicitly, but its accelerator is not available for the job because of the combination of the job configuration and accelerator availability on the machine. The following Alpaka backends are available for the job {}.".format(module.label_(), ", ".join(self._valid_backends)))
39  else:
40  module.alpaka.backend = cms.untracked.string(defaultBackend)
41  else:
42  module.alpaka = cms.untracked.PSet(
43  backend = cms.untracked.string(defaultBackend)
44  )
45 
46 class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
47  """ProcessAcceleratorAlpaka itself does not define or inspect
48  availability of any accelerator devices. It merely sets up
49  necessary Alpaka infrastructure based on the availability of
50  accelerators that the concrete ProcessAccelerators (like
51  ProcessAcceleratorCUDA) define.
52  """
53  def __init__(self):
54  super(ProcessAcceleratorAlpaka, self).__init__()
55  self._backend = None
56 
57  # User-facing interface
58  def setBackend(self, backend):
59  self._backend = backend
60 
61  # Framework-facing interface
62  def moduleTypeResolver(self, accelerators):
63  return ModuleTypeResolverAlpaka(accelerators, self._backend)
64 
65  def apply(self, process, accelerators):
66  # Propagate the AlpakaService messages through the MessageLogger
67  if not hasattr(process.MessageLogger, "AlpakaService"):
68  process.MessageLogger.AlpakaService = cms.untracked.PSet()
69 
70  # Check if the CPU backend is available
71  try:
72  if not "cpu" in accelerators:
73  raise False
74  from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
75  except:
76  # the CPU backend is not available, do not load the AlpakaServiceSerialSync
77  if hasattr(process, "AlpakaServiceSerialSync"):
78  del process.AlpakaServiceSerialSync
79  else:
80  # the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded
81  if not hasattr(process, "AlpakaServiceSerialSync"):
82  process.add_(AlpakaServiceSerialSync)
83 
84  # Check if CUDA is available, and if the system has at least one usable NVIDIA GPU
85  try:
86  if not "gpu-nvidia" in accelerators:
87  raise False
88  from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
89  except:
90  # CUDA is not available, do not load the AlpakaServiceCudaAsync
91  if hasattr(process, "AlpakaServiceCudaAsync"):
92  del process.AlpakaServiceCudaAsync
93  else:
94  # CUDA is available, ensure the AlpakaServiceCudaAsync is loaded
95  if not hasattr(process, "AlpakaServiceCudaAsync"):
96  process.add_(AlpakaServiceCudaAsync)
97 
98  # Check if ROCm is available, and if the system has at least one usable AMD GPU
99  try:
100  if not "gpu-amd" in accelerators:
101  raise False
102  from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
103  except:
104  # ROCm is not available, do not load the AlpakaServiceROCmAsync
105  if hasattr(process, "AlpakaServiceROCmAsync"):
106  del process.AlpakaServiceROCmAsync
107  else:
108  # ROCm is available, ensure the AlpakaServiceROCmAsync is loaded
109  if not hasattr(process, "AlpakaServiceROCmAsync"):
110  process.add_(AlpakaServiceROCmAsync)
111 
112 
113 # Ensure this module is kept in the configuration when dumping it
114 cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")
bool insert(Storage &iStorage, ItemType *iItem, const IdTag &iIdTag)
Definition: HCMethods.h:50
static std::string join(char **cmd)
Definition: RemoteFile.cc:21
def remove(d, key, TELL=False)
Definition: MatrixUtil.py:233