Coverage for melissa/launcher/schema.py: 56%

52 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-09-22 10:36 +0000

1import rapidjson 

2from typing import Dict, Any, Tuple 

3import argparse 

4from jsonschema import Draft4Validator, validators 

5from jsonschema.exceptions import ValidationError 

6import logging 

7import sys 

8 

9logger = logging.getLogger(__name__) 

10 

11CONFIG_PARSE_MODE = rapidjson.PM_COMMENTS | rapidjson.PM_TRAILING_COMMAS 

12 

13 

14CONF_SCHEMA = { 

15 'type': 'object', 

16 'properties': { 

17 "server_filename": {"type": "string", "required": True, "message": "The name of the file containing the user defined server. Assumed to be in the same folder as the config."}, 

18 "server_class": {"type": "string", "required": True, "message": "The class name of the user defined server inside the server_filename file."}, 

19 "output_dir": {"type": "string", "required": True, "message": "The output dir to write results and logs. If relative path, then it is assumed relative to the CWD of the melissa-launcher command."}, 

20 "study_options": { 

21 "type": "object", 

22 "message": "A custom dictionary which is accessible inside the server_class for users to parameterize their studies.", 

23 "properties": { 

24 "parameter_sweep_size": {"type": "integer", "message": "The number of clients to launch (or groups if using sobol indices)."}, 

25 "num_samples": {"type": "integer", "default": 0, "message": "Number of samples expected to arrive from each client. When not given, it can be inferred by Melissa (DL server only)."}, 

26 "verbosity": {"type": "integer", "default": 0, "message": "Set the logger verbosity. 3 includes all levels (including info, error, warning, and debug), 0 reduces to logging to minimum (error only)."} 

27 } 

28 }, 

29 "dl_config": { 

30 "type": "object", 

31 "properties": { 

32 "simulation_timeout": {"type": "integer", "default": 400, "message": "Seconds of client inactivity between two messages before timing out the client."}, 

33 "batch_size": {"type": "integer", "default": 10, "message": "Number of samples to build each batch."}, 

34 "n_batches_update": {"type": "integer", "default": 10, "message": "Number of batches between validation checks and loss logging."}, 

35 "buffer_size": {"type": "integer", "default": 10000, "message": "Maximum number of samples to store in the buffer (object used to generate batches for training)."}, 

36 "per_server_watermark": {"type": "integer", "message": "Required number of samples in each server process buffer before batch creation and training can begin."}, 

37 "tensorboard": {"type": "boolean", "default": True, "message": "Set to False to disable tensorboard logger entirely for production level runs where you do not wish to log metrics"}, 

38 "get_buffer_statistics": {"type": "boolean", "default": False, "message": "Estimate buffer statistics each time a batch is generated and add to the tensorboard log. Requires custom server imlementation of `get_buffer_statistics()`."}, 

39 }, 

40 "message": "A custom dictionary which is accessible inside the server_class for users to customize their training loops and buffers."}, 

41 "sa_config": { 

42 "type": "object", 

43 "properties": { 

44 "mean": {"type": "boolean", "default": True, "message": "Collect mean for all fields."}, 

45 "variance": {"type": "boolean", "default": False, "message": "Collect variance for all fields."}, 

46 "skewness": {"type": "boolean", "default": False, "message": "Collect skewness for all fields."}, 

47 "kurtosis": {"type": "boolean", "default": False, "message": "Collect kurtosis for all fields."}, 

48 "checkpoint_interval": {"type": "integer", "default": 0, "message": "Checkpoint frequency for the sensitivity analysis. Number of samples between each checkpoint."}, 

49 "sobol_indices": {"type": "boolean", "default": False, "message": "Activate sobol indicies. Group count determined by study_options.parameter_sweep_size"}, 

50 }, 

51 "message": "A dictionary used to control the sensitivity analysis servers." 

52 }, 

53 "server_config": { 

54 "type": "object", 

55 "default": {"preprocessing_commands": []}, 

56 "properties": { 

57 "preprocessing_commands": {"type": "array", "default": [], "message": "Commands that will be preprocessed by bash prior to launching the server job."}, 

58 "melissa_server_env": {"type": "string", "message": "Explicit path to the server installation. Typically does not need to be touched unless two different melissa installations are used."} 

59 }, 

60 "message": "Special configuration for the server only.", 

61 }, 

62 "client_config": { 

63 "type": "object", 

64 "properties": { 

65 "preprocessing_commands": {"type": "array", "default": [], "message": "Commands that will be preprocessed by bash prior to launching the client job."}, 

66 "melissa_client_env": {"type": "string", "message": "Explicit path to find the client installation. Typically does not need to be touched unless two different melissa installations are used."} 

67 }, 

68 "message": "Special configuration for the client only."}, 

69 "launcher_config": { 

70 "type": "object", 

71 "properties": { 

72 "scheduler": {"type": "string", "required": True, "message": "Select scheduler, can be 'oar', 'slurm', 'openmpi'"}, 

73 "server_executable": {"type": "string", "default": "server.sh", "message": "Experienced users only, used to modify the bash template."}, 

74 "bind": {"type": "string", "default": "0.0.0.0", "message": "Address to bind the REST API."}, 

75 "http_port": {"type": "integer", "default": 8888, "message": "Port to put the REST API."}, 

76 "http_token": {"type": "string", "default": "", "message": "Token used to access REST API, leave empty to let Melissa generate a unique secure token on launch."}, 

77 "fault_tolerance": {"type": "boolean", "default": True, "message": "Activate/deactivate fault tolerance."}, 

78 "protocol": {"type": "string", "default": "auto", "message": "Experienced users only, Melissa determines best protocol automatically."}, 

79 "std_output": {"type": "boolean", "default": True, "message": "Keep or delete the std out/err files from all jobs."}, 

80 "scheduler_arg": {"type": "array", "default": [], "message": "Common arguments to pass to scheduler for both client and server."}, 

81 "scheduler_arg_client": {"type": "array", "default": [], "message": "Arguments to pass to scheduler for client only."}, 

82 "scheduler_arg_server": {"type": "array", "default": [], "message": "Arguments to pass to scheduler for server only."}, 

83 "scheduler_server_command": {"type": "string", "message": "Option to change the execution command (e.g. in place of srun or mpirun)"}, 

84 "scheduler_client_command": {"type": "string", "message": "Option to change the execution command (e.g. in place of srun or mpirun)"}, 

85 "scheduler_server_command_options": {"type": "array", "default": [], "message": "Options to pass to the scheduler inside the client execution command. Example: ['mpi=pmi2'] which, with slurm, would yield an sbatch.X.sh file with srun mpi=pmi2 <other arguments>."}, 

86 "scheduler_client_command_options": {"type": "array", "default": [], "message": "Options to pass to the scheduler inside the server execution command. Example: ['mpi=pmi2'] which, with slurm, would yield an sbatch.X.sh file with srun mpi=pmi2 <other arguments>."}, 

87 "scheduler_arg_container": {"type": "array", "default": [], "message": "Arguments to pass to containers (e.g. oar-hybrid)."}, 

88 "container_client_size": {"type": "integer", "default": 1, "message": "Size of the container."}, 

89 "job_limit": {"type": "integer", "default": 1000, "message": "Maximum number of active jobs allowed."}, 

90 "besteffort_allocation_frequency": {"type": "integer", "default": 1, "message": "The frequency of job submission to submit to best-effort queue."}, 

91 "timer_delay": {"type": "integer", "message": "The minimal delay between two job status updates with the same value."}, 

92 "server_timeout": {"type": "integer", "message": "Maximum amount of seconds which defines a server timeout exit."}, 

93 "load_from_checkpoint": {"type": "boolean", "default": False, "message": "Look for checkpoint files to start the server from."}, 

94 "verbosity": {"type": "integer", "default": 0, "message": "Set the logger verbosity. 3 includes all levels (including info, error, warning, and debug), 0 reduces to logging to minimum (error only)."} 

95 }, 

96 } 

97 } 

98} 

99 

100 

101def _extend_validator(validator_class): 

102 """ 

103 Extended validator for Melissa 

104 """ 

105 validate_properties = validator_class.VALIDATORS['properties'] 

106 

107 def set_defaults(validator, properties, instance, schema): 

108 for prop, subschema in properties.items(): 

109 if 'default' in subschema: 

110 instance.setdefault(prop, subschema['default']) 

111 

112 for error in validate_properties( 

113 validator, properties, instance, schema, 

114 ): 

115 yield error 

116 

117 return validators.extend( 

118 validator_class, {'properties': set_defaults} 

119 ) 

120 

121 

122def validate_config(args: argparse.Namespace, 

123 config: Dict[str, Any]) -> Tuple[argparse.Namespace, 

124 Dict[str, Any]]: 

125 

126 MelissaValidator = _extend_validator(Draft4Validator) 

127 try: 

128 MelissaValidator(CONF_SCHEMA).validate(config) 

129 except ValidationError as e: 

130 logger.critical( 

131 f"Invalid configuration. Reason: {e}" 

132 ) 

133 

134 return args, config 

135 

136 

137class bcolors: 

138 OKBLUE = '\033[32m' 

139 OKGREEN = '\033[91m' 

140 ENDC = '\033[0m' 

141 UNDERLINE = '\033[4m' 

142 

143 

144def print_options(): 

145 

146 print(f"{bcolors.UNDERLINE}Available config options{bcolors.ENDC}\n") 

147 for config in CONF_SCHEMA["properties"]: 

148 top_dict = CONF_SCHEMA["properties"][config] 

149 type = top_dict['type'] 

150 try: 

151 message = top_dict["message"] 

152 except KeyError: 

153 message = "" 

154 print(f"{bcolors.OKBLUE}{config}{bcolors.ENDC}: {message} " 

155 f"Type {bcolors.UNDERLINE}{type}{bcolors.ENDC}.") 

156 

157 if "properties" in CONF_SCHEMA["properties"][config]: 

158 conf_dict = CONF_SCHEMA["properties"][config] 

159 

160 for property in conf_dict["properties"]: 

161 message = conf_dict['properties'][property]['message'] 

162 type = conf_dict['properties'][property]['type'] 

163 try: 

164 default = conf_dict['properties'][property]['default'] 

165 except KeyError: 

166 default = "N/A" 

167 print(f" {bcolors.OKGREEN}{property}{bcolors.ENDC}: {message} " 

168 f"Default value {default}, Type {bcolors.UNDERLINE}{type}{bcolors.ENDC}.") 

169 sys.exit()