Coverage for melissa/launcher/schema.py: 56%
52 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-09-22 10:36 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-09-22 10:36 +0000
1import rapidjson
2from typing import Dict, Any, Tuple
3import argparse
4from jsonschema import Draft4Validator, validators
5from jsonschema.exceptions import ValidationError
6import logging
7import sys
9logger = logging.getLogger(__name__)
11CONFIG_PARSE_MODE = rapidjson.PM_COMMENTS | rapidjson.PM_TRAILING_COMMAS
14CONF_SCHEMA = {
15 'type': 'object',
16 'properties': {
17 "server_filename": {"type": "string", "required": True, "message": "The name of the file containing the user defined server. Assumed to be in the same folder as the config."},
18 "server_class": {"type": "string", "required": True, "message": "The class name of the user defined server inside the server_filename file."},
19 "output_dir": {"type": "string", "required": True, "message": "The output dir to write results and logs. If relative path, then it is assumed relative to the CWD of the melissa-launcher command."},
20 "study_options": {
21 "type": "object",
22 "message": "A custom dictionary which is accessible inside the server_class for users to parameterize their studies.",
23 "properties": {
24 "parameter_sweep_size": {"type": "integer", "message": "The number of clients to launch (or groups if using sobol indices)."},
25 "num_samples": {"type": "integer", "default": 0, "message": "Number of samples expected to arrive from each client. When not given, it can be inferred by Melissa (DL server only)."},
26 "verbosity": {"type": "integer", "default": 0, "message": "Set the logger verbosity. 3 includes all levels (including info, error, warning, and debug), 0 reduces to logging to minimum (error only)."}
27 }
28 },
29 "dl_config": {
30 "type": "object",
31 "properties": {
32 "simulation_timeout": {"type": "integer", "default": 400, "message": "Seconds of client inactivity between two messages before timing out the client."},
33 "batch_size": {"type": "integer", "default": 10, "message": "Number of samples to build each batch."},
34 "n_batches_update": {"type": "integer", "default": 10, "message": "Number of batches between validation checks and loss logging."},
35 "buffer_size": {"type": "integer", "default": 10000, "message": "Maximum number of samples to store in the buffer (object used to generate batches for training)."},
36 "per_server_watermark": {"type": "integer", "message": "Required number of samples in each server process buffer before batch creation and training can begin."},
37 "tensorboard": {"type": "boolean", "default": True, "message": "Set to False to disable tensorboard logger entirely for production level runs where you do not wish to log metrics"},
38 "get_buffer_statistics": {"type": "boolean", "default": False, "message": "Estimate buffer statistics each time a batch is generated and add to the tensorboard log. Requires custom server imlementation of `get_buffer_statistics()`."},
39 },
40 "message": "A custom dictionary which is accessible inside the server_class for users to customize their training loops and buffers."},
41 "sa_config": {
42 "type": "object",
43 "properties": {
44 "mean": {"type": "boolean", "default": True, "message": "Collect mean for all fields."},
45 "variance": {"type": "boolean", "default": False, "message": "Collect variance for all fields."},
46 "skewness": {"type": "boolean", "default": False, "message": "Collect skewness for all fields."},
47 "kurtosis": {"type": "boolean", "default": False, "message": "Collect kurtosis for all fields."},
48 "checkpoint_interval": {"type": "integer", "default": 0, "message": "Checkpoint frequency for the sensitivity analysis. Number of samples between each checkpoint."},
49 "sobol_indices": {"type": "boolean", "default": False, "message": "Activate sobol indicies. Group count determined by study_options.parameter_sweep_size"},
50 },
51 "message": "A dictionary used to control the sensitivity analysis servers."
52 },
53 "server_config": {
54 "type": "object",
55 "default": {"preprocessing_commands": []},
56 "properties": {
57 "preprocessing_commands": {"type": "array", "default": [], "message": "Commands that will be preprocessed by bash prior to launching the server job."},
58 "melissa_server_env": {"type": "string", "message": "Explicit path to the server installation. Typically does not need to be touched unless two different melissa installations are used."}
59 },
60 "message": "Special configuration for the server only.",
61 },
62 "client_config": {
63 "type": "object",
64 "properties": {
65 "preprocessing_commands": {"type": "array", "default": [], "message": "Commands that will be preprocessed by bash prior to launching the client job."},
66 "melissa_client_env": {"type": "string", "message": "Explicit path to find the client installation. Typically does not need to be touched unless two different melissa installations are used."}
67 },
68 "message": "Special configuration for the client only."},
69 "launcher_config": {
70 "type": "object",
71 "properties": {
72 "scheduler": {"type": "string", "required": True, "message": "Select scheduler, can be 'oar', 'slurm', 'openmpi'"},
73 "server_executable": {"type": "string", "default": "server.sh", "message": "Experienced users only, used to modify the bash template."},
74 "bind": {"type": "string", "default": "0.0.0.0", "message": "Address to bind the REST API."},
75 "http_port": {"type": "integer", "default": 8888, "message": "Port to put the REST API."},
76 "http_token": {"type": "string", "default": "", "message": "Token used to access REST API, leave empty to let Melissa generate a unique secure token on launch."},
77 "fault_tolerance": {"type": "boolean", "default": True, "message": "Activate/deactivate fault tolerance."},
78 "protocol": {"type": "string", "default": "auto", "message": "Experienced users only, Melissa determines best protocol automatically."},
79 "std_output": {"type": "boolean", "default": True, "message": "Keep or delete the std out/err files from all jobs."},
80 "scheduler_arg": {"type": "array", "default": [], "message": "Common arguments to pass to scheduler for both client and server."},
81 "scheduler_arg_client": {"type": "array", "default": [], "message": "Arguments to pass to scheduler for client only."},
82 "scheduler_arg_server": {"type": "array", "default": [], "message": "Arguments to pass to scheduler for server only."},
83 "scheduler_server_command": {"type": "string", "message": "Option to change the execution command (e.g. in place of srun or mpirun)"},
84 "scheduler_client_command": {"type": "string", "message": "Option to change the execution command (e.g. in place of srun or mpirun)"},
85 "scheduler_server_command_options": {"type": "array", "default": [], "message": "Options to pass to the scheduler inside the client execution command. Example: ['mpi=pmi2'] which, with slurm, would yield an sbatch.X.sh file with srun mpi=pmi2 <other arguments>."},
86 "scheduler_client_command_options": {"type": "array", "default": [], "message": "Options to pass to the scheduler inside the server execution command. Example: ['mpi=pmi2'] which, with slurm, would yield an sbatch.X.sh file with srun mpi=pmi2 <other arguments>."},
87 "scheduler_arg_container": {"type": "array", "default": [], "message": "Arguments to pass to containers (e.g. oar-hybrid)."},
88 "container_client_size": {"type": "integer", "default": 1, "message": "Size of the container."},
89 "job_limit": {"type": "integer", "default": 1000, "message": "Maximum number of active jobs allowed."},
90 "besteffort_allocation_frequency": {"type": "integer", "default": 1, "message": "The frequency of job submission to submit to best-effort queue."},
91 "timer_delay": {"type": "integer", "message": "The minimal delay between two job status updates with the same value."},
92 "server_timeout": {"type": "integer", "message": "Maximum amount of seconds which defines a server timeout exit."},
93 "load_from_checkpoint": {"type": "boolean", "default": False, "message": "Look for checkpoint files to start the server from."},
94 "verbosity": {"type": "integer", "default": 0, "message": "Set the logger verbosity. 3 includes all levels (including info, error, warning, and debug), 0 reduces to logging to minimum (error only)."}
95 },
96 }
97 }
98}
101def _extend_validator(validator_class):
102 """
103 Extended validator for Melissa
104 """
105 validate_properties = validator_class.VALIDATORS['properties']
107 def set_defaults(validator, properties, instance, schema):
108 for prop, subschema in properties.items():
109 if 'default' in subschema:
110 instance.setdefault(prop, subschema['default'])
112 for error in validate_properties(
113 validator, properties, instance, schema,
114 ):
115 yield error
117 return validators.extend(
118 validator_class, {'properties': set_defaults}
119 )
122def validate_config(args: argparse.Namespace,
123 config: Dict[str, Any]) -> Tuple[argparse.Namespace,
124 Dict[str, Any]]:
126 MelissaValidator = _extend_validator(Draft4Validator)
127 try:
128 MelissaValidator(CONF_SCHEMA).validate(config)
129 except ValidationError as e:
130 logger.critical(
131 f"Invalid configuration. Reason: {e}"
132 )
134 return args, config
137class bcolors:
138 OKBLUE = '\033[32m'
139 OKGREEN = '\033[91m'
140 ENDC = '\033[0m'
141 UNDERLINE = '\033[4m'
144def print_options():
146 print(f"{bcolors.UNDERLINE}Available config options{bcolors.ENDC}\n")
147 for config in CONF_SCHEMA["properties"]:
148 top_dict = CONF_SCHEMA["properties"][config]
149 type = top_dict['type']
150 try:
151 message = top_dict["message"]
152 except KeyError:
153 message = ""
154 print(f"{bcolors.OKBLUE}{config}{bcolors.ENDC}: {message} "
155 f"Type {bcolors.UNDERLINE}{type}{bcolors.ENDC}.")
157 if "properties" in CONF_SCHEMA["properties"][config]:
158 conf_dict = CONF_SCHEMA["properties"][config]
160 for property in conf_dict["properties"]:
161 message = conf_dict['properties'][property]['message']
162 type = conf_dict['properties'][property]['type']
163 try:
164 default = conf_dict['properties'][property]['default']
165 except KeyError:
166 default = "N/A"
167 print(f" {bcolors.OKGREEN}{property}{bcolors.ENDC}: {message} "
168 f"Default value {default}, Type {bcolors.UNDERLINE}{type}{bcolors.ENDC}.")
169 sys.exit()