src.fairreckitlib.recommender_system
This module contains the FairRecKit recommender system.
Classes:
RecommenderSystem: class that includes the entire recommender system.
This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)
1"""This module contains the FairRecKit recommender system. 2 3Classes: 4 5 RecommenderSystem: class that includes the entire recommender system. 6 7This program has been developed by students from the bachelor Computer Science at 8Utrecht University within the Software Project course. 9© Copyright Utrecht University (Department of Information and Computing Sciences) 10""" 11 12import errno 13import os 14from typing import Any, Dict, Callable, List, Union 15 16from .core.threading.thread_processor import ThreadProcessor 17from .data.data_factory import KEY_DATA 18from .data.filter.filter_constants import KEY_DATA_SUBSET 19from .data.ratings.convert_constants import KEY_RATING_CONVERTER 20from .data.set.dataset_registry import DataRegistry 21from .data.split.split_constants import KEY_SPLITTING 22from .evaluation.evaluation_factory import KEY_EVALUATION 23from .experiment.experiment_config import PredictorExperimentConfig, RecommenderExperimentConfig 24from .experiment.experiment_config_parser import ExperimentConfigParser 25from .experiment.experiment_factory import create_experiment_factory 26from .experiment.experiment_run import ExperimentPipelineConfig, resolve_experiment_start_run 27from .experiment.experiment_thread import ThreadExperiment 28from .model.model_factory import KEY_MODELS 29 30 31class RecommenderSystem: 32 """The FairReckit Recommender System. 33 34 Defines the top level API intended for use by applications. 35 36 Public methods: 37 38 abort_computation 39 run_experiment 40 run_experiment_from_yml 41 validate_experiment 42 get_active_computations 43 get_available_algorithms 44 get_available_datasets 45 get_available_data_filters 46 get_available_metrics 47 get_available_rating_converters 48 get_available_splitters 49 """ 50 51 def __init__(self, data_dir: str, result_dir: str, verbose: bool=True): 52 """Construct the RecommenderSystem. 53 54 Initializes the data registry with available datasets on which the 55 recommender system depends and therefore the data directory is expected to exist. 56 The result directory however is created when non-existing. 57 58 Args: 59 data_dir: path to the directory that contains the datasets. 60 result_dir: path to the directory to store computation results. 61 verbose: whether the data registry should give verbose output on startup. 62 63 Raises: 64 IOError: when the specified data directory does not exist. 65 """ 66 try: 67 self.data_registry = DataRegistry(data_dir, verbose=verbose) 68 except IOError as err: 69 raise IOError('Failed to initialize DataRegistry: ' 70 'unknown data directory => ' + data_dir) from err 71 72 self.result_dir = result_dir 73 if not os.path.isdir(self.result_dir): 74 os.mkdir(self.result_dir) 75 76 self.experiment_factory = create_experiment_factory(self.data_registry) 77 self.thread_processor = ThreadProcessor() 78 79 def abort_computation(self, computation_name: str) -> bool: 80 """Attempt to abort a running computation thread. 81 82 The name of the computation is the same as specified in the configuration 83 file when the computation is run. If the computation with the specified 84 name does not exist this function returns False. 85 Note that the computation is only requested to abort as soon as possible, 86 therefore it might take a while until the computation actually stops. 87 88 Args: 89 computation_name: name of the active computation thread to abort. 90 91 Returns: 92 whether the computation is successfully requested to abort. 93 """ 94 if not self.thread_processor.is_active_thread(computation_name): 95 return False 96 97 self.thread_processor.stop(computation_name) 98 return True 99 100 def run_experiment( 101 self, 102 config: Union[PredictorExperimentConfig, RecommenderExperimentConfig], 103 *, 104 events: Dict[str, Callable[[Any], None]] = None, 105 num_threads: int = 0, 106 verbose: bool = True, 107 validate_config: bool = True) -> bool: 108 """Run an experiment with the specified configuration. 109 110 It is advised to validate the configuration (default) before running the 111 experiment, to make sure the configuration describes a valid experiment. 112 The configuration is invalid when it contains no selected datasets or 113 models, only the evaluation is optional. If the configuration is invalidated 114 this function will return False. 115 116 Args: 117 events: the external events to dispatch during the experiment. 118 config: the configuration of the experiment. 119 num_threads: the max number of threads the experiment can use. 120 verbose: whether the internal events should give verbose output. 121 validate_config: whether to validate the configuration beforehand. 122 123 Raises: 124 KeyError: when a computation with the same name is already active. 125 IOError: when the experiment result directory already exists. 126 TypeError: when the provided configuration is not a valid experiment configuration. 127 128 Returns: 129 whether the experiment successfully started. 130 """ 131 if not isinstance(config, (PredictorExperimentConfig, RecommenderExperimentConfig)): 132 raise TypeError('Invalid experiment configuration type.') 133 134 result_dir = os.path.join(self.result_dir, config.name) 135 if os.path.isdir(result_dir): 136 raise IOError('Result already exists: ' + result_dir) 137 138 if validate_config: 139 parser = ExperimentConfigParser(verbose) 140 config = parser.parse_experiment_config(config.to_yml_format(), 141 self.data_registry, 142 self.experiment_factory) 143 if config is None: 144 return False 145 146 self.thread_processor.start(ThreadExperiment( 147 config.name, 148 events, 149 verbose, 150 pipeline_config=ExperimentPipelineConfig( 151 result_dir, 152 self.data_registry, 153 self.experiment_factory, 154 config, 155 0, 156 1, 157 num_threads 158 ) 159 )) 160 161 return True 162 163 def run_experiment_from_yml( 164 self, 165 file_path: str, 166 *, 167 events: Dict[str, Callable[[Any], None]] = None, 168 num_threads: int = 0, 169 verbose: bool = True) -> bool: 170 """Run an experiment from a yml file. 171 172 The configuration in the file is validated before starting the experiment. 173 It is invalid when it contains no selected datasets or models, 174 only the evaluation is optional. If the configuration is invalidated 175 this function will return False. 176 177 Args: 178 events: the external events to dispatch during the experiment. 179 file_path: path to the yml file without extension. 180 num_threads: the max number of threads the experiment can use. 181 verbose: whether the internal events should give verbose output. 182 183 Raises: 184 FileNotFoundError: when the specified yml file does not exist. 185 KeyError: when a computation with the same name is already active. 186 IOError: when the experiment result directory already exists. 187 188 Returns: 189 whether the experiment successfully started. 190 """ 191 try: 192 parser = ExperimentConfigParser(verbose) 193 config = parser.parse_experiment_config_from_yml(file_path, 194 self.data_registry, 195 self.experiment_factory) 196 if config is None: 197 return False 198 except FileNotFoundError as err: 199 raise FileNotFoundError(errno.ENOENT, 'Config file not found', file_path) from err 200 201 return self.run_experiment( 202 config, 203 events=events, 204 num_threads=num_threads, 205 verbose=verbose, 206 validate_config=False 207 ) 208 209 def validate_experiment( 210 self, 211 result_dir: str, 212 num_runs: int, 213 *, 214 events: Dict[str, Callable[[Any], None]] = None, 215 num_threads: int = 0, 216 verbose: bool = True) -> bool: 217 """Validate an experiment for an additional number of runs. 218 219 It is not possible to validate an active experiment computation until it is done. 220 The configuration file is expected to be stored in the specified result directory. 221 Moreover, the configuration is validated before starting the experiment validation. 222 If the configuration is invalidated this function will return False. 223 224 Args: 225 events: the external events to dispatch during the experiment. 226 result_dir: path to an existing experiment result directory. 227 num_runs: the number of runs to validate the experiment. 228 num_threads: the max number of threads the experiment can use. 229 verbose: whether the internal events should give verbose output. 230 231 Raises: 232 FileNotFoundError: when the configuration file does not exist in the result directory. 233 KeyError: when a computation with the same name is already active. 234 IOError: when the experiment result directory does not exist. 235 236 Returns: 237 whether the experiment successfully started. 238 """ 239 result_dir = os.path.join(self.result_dir, result_dir) 240 if not os.path.isdir(result_dir): 241 raise IOError('Result does not exist: ' + result_dir) 242 243 config_path = os.path.join(result_dir, 'config') 244 try: 245 parser = ExperimentConfigParser(verbose) 246 config = parser.parse_experiment_config_from_yml(config_path, 247 self.data_registry, 248 self.experiment_factory) 249 if config is None: 250 return False 251 except FileNotFoundError as err: 252 raise FileNotFoundError(errno.ENOENT, 'Config file not found', config_path) from err 253 254 self.thread_processor.start(ThreadExperiment( 255 config.name, 256 events, 257 verbose, 258 pipeline_config=ExperimentPipelineConfig( 259 result_dir, 260 self.data_registry, 261 self.experiment_factory, 262 config, 263 resolve_experiment_start_run(result_dir), 264 num_runs, 265 num_threads 266 ) 267 )) 268 269 return True 270 271 def get_active_computations(self) -> List[str]: 272 """Get the names of any active computations. 273 274 Returns: 275 a list of computations names that are currently running. 276 """ 277 return self.thread_processor.get_active_threads() 278 279 def get_available_algorithms(self, model_type: str = None): 280 """Get the available algorithms of the recommender system. 281 282 Args: 283 model_type: type of model to query for availability, accepted values are 284 TYPE_PREDICTION, TYPE_RECOMMENDATION or None. 285 286 Returns: 287 a dictionary with the availability of algorithms categorized by API. 288 """ 289 return self.experiment_factory.get_sub_availability( 290 KEY_MODELS, 291 sub_type=model_type 292 ) 293 294 def get_available_datasets(self) -> Dict[str, Any]: 295 """Get the available datasets of the recommender system. 296 297 Returns: 298 a dictionary where the key corresponds to the dataset name and 299 the value corresponds to the matrix information dictionary. 300 """ 301 return self.data_registry.get_info() 302 303 def get_available_data_filters(self) -> Dict[str, Any]: 304 """Get the available data filters of the recommender system. 305 306 Returns: 307 a dictionary with the availability of data filters. 308 """ 309 return self.experiment_factory.get_sub_availability( 310 KEY_DATA, 311 sub_type=KEY_DATA_SUBSET 312 ) 313 314 def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]: 315 """Get the available metrics of the recommender system. 316 317 Args: 318 eval_type(str): type of evaluation to query for availability, accepted values are 319 TYPE_PREDICTION, TYPE_RECOMMENDATION or None. 320 321 Returns: 322 a dictionary with the availability of metrics categorized by evaluation type. 323 """ 324 return self.experiment_factory.get_sub_availability( 325 KEY_EVALUATION, 326 sub_type=eval_type 327 ) 328 329 def get_available_rating_converters(self) -> Dict[str, Any]: 330 """Get the available data rating converters of the recommender system. 331 332 Returns: 333 a dictionary with the availability of rating converters. 334 """ 335 return self.experiment_factory.get_sub_availability( 336 KEY_DATA, 337 sub_type=KEY_RATING_CONVERTER 338 ) 339 340 def get_available_splitters(self) -> Dict[str, Any]: 341 """Get the available data splitters of the recommender system. 342 343 Returns: 344 a dictionary with the availability of data splitters. 345 """ 346 return self.experiment_factory.get_sub_availability( 347 KEY_DATA, 348 sub_type=KEY_SPLITTING 349 )
32class RecommenderSystem: 33 """The FairReckit Recommender System. 34 35 Defines the top level API intended for use by applications. 36 37 Public methods: 38 39 abort_computation 40 run_experiment 41 run_experiment_from_yml 42 validate_experiment 43 get_active_computations 44 get_available_algorithms 45 get_available_datasets 46 get_available_data_filters 47 get_available_metrics 48 get_available_rating_converters 49 get_available_splitters 50 """ 51 52 def __init__(self, data_dir: str, result_dir: str, verbose: bool=True): 53 """Construct the RecommenderSystem. 54 55 Initializes the data registry with available datasets on which the 56 recommender system depends and therefore the data directory is expected to exist. 57 The result directory however is created when non-existing. 58 59 Args: 60 data_dir: path to the directory that contains the datasets. 61 result_dir: path to the directory to store computation results. 62 verbose: whether the data registry should give verbose output on startup. 63 64 Raises: 65 IOError: when the specified data directory does not exist. 66 """ 67 try: 68 self.data_registry = DataRegistry(data_dir, verbose=verbose) 69 except IOError as err: 70 raise IOError('Failed to initialize DataRegistry: ' 71 'unknown data directory => ' + data_dir) from err 72 73 self.result_dir = result_dir 74 if not os.path.isdir(self.result_dir): 75 os.mkdir(self.result_dir) 76 77 self.experiment_factory = create_experiment_factory(self.data_registry) 78 self.thread_processor = ThreadProcessor() 79 80 def abort_computation(self, computation_name: str) -> bool: 81 """Attempt to abort a running computation thread. 82 83 The name of the computation is the same as specified in the configuration 84 file when the computation is run. If the computation with the specified 85 name does not exist this function returns False. 86 Note that the computation is only requested to abort as soon as possible, 87 therefore it might take a while until the computation actually stops. 88 89 Args: 90 computation_name: name of the active computation thread to abort. 91 92 Returns: 93 whether the computation is successfully requested to abort. 94 """ 95 if not self.thread_processor.is_active_thread(computation_name): 96 return False 97 98 self.thread_processor.stop(computation_name) 99 return True 100 101 def run_experiment( 102 self, 103 config: Union[PredictorExperimentConfig, RecommenderExperimentConfig], 104 *, 105 events: Dict[str, Callable[[Any], None]] = None, 106 num_threads: int = 0, 107 verbose: bool = True, 108 validate_config: bool = True) -> bool: 109 """Run an experiment with the specified configuration. 110 111 It is advised to validate the configuration (default) before running the 112 experiment, to make sure the configuration describes a valid experiment. 113 The configuration is invalid when it contains no selected datasets or 114 models, only the evaluation is optional. If the configuration is invalidated 115 this function will return False. 116 117 Args: 118 events: the external events to dispatch during the experiment. 119 config: the configuration of the experiment. 120 num_threads: the max number of threads the experiment can use. 121 verbose: whether the internal events should give verbose output. 122 validate_config: whether to validate the configuration beforehand. 123 124 Raises: 125 KeyError: when a computation with the same name is already active. 126 IOError: when the experiment result directory already exists. 127 TypeError: when the provided configuration is not a valid experiment configuration. 128 129 Returns: 130 whether the experiment successfully started. 131 """ 132 if not isinstance(config, (PredictorExperimentConfig, RecommenderExperimentConfig)): 133 raise TypeError('Invalid experiment configuration type.') 134 135 result_dir = os.path.join(self.result_dir, config.name) 136 if os.path.isdir(result_dir): 137 raise IOError('Result already exists: ' + result_dir) 138 139 if validate_config: 140 parser = ExperimentConfigParser(verbose) 141 config = parser.parse_experiment_config(config.to_yml_format(), 142 self.data_registry, 143 self.experiment_factory) 144 if config is None: 145 return False 146 147 self.thread_processor.start(ThreadExperiment( 148 config.name, 149 events, 150 verbose, 151 pipeline_config=ExperimentPipelineConfig( 152 result_dir, 153 self.data_registry, 154 self.experiment_factory, 155 config, 156 0, 157 1, 158 num_threads 159 ) 160 )) 161 162 return True 163 164 def run_experiment_from_yml( 165 self, 166 file_path: str, 167 *, 168 events: Dict[str, Callable[[Any], None]] = None, 169 num_threads: int = 0, 170 verbose: bool = True) -> bool: 171 """Run an experiment from a yml file. 172 173 The configuration in the file is validated before starting the experiment. 174 It is invalid when it contains no selected datasets or models, 175 only the evaluation is optional. If the configuration is invalidated 176 this function will return False. 177 178 Args: 179 events: the external events to dispatch during the experiment. 180 file_path: path to the yml file without extension. 181 num_threads: the max number of threads the experiment can use. 182 verbose: whether the internal events should give verbose output. 183 184 Raises: 185 FileNotFoundError: when the specified yml file does not exist. 186 KeyError: when a computation with the same name is already active. 187 IOError: when the experiment result directory already exists. 188 189 Returns: 190 whether the experiment successfully started. 191 """ 192 try: 193 parser = ExperimentConfigParser(verbose) 194 config = parser.parse_experiment_config_from_yml(file_path, 195 self.data_registry, 196 self.experiment_factory) 197 if config is None: 198 return False 199 except FileNotFoundError as err: 200 raise FileNotFoundError(errno.ENOENT, 'Config file not found', file_path) from err 201 202 return self.run_experiment( 203 config, 204 events=events, 205 num_threads=num_threads, 206 verbose=verbose, 207 validate_config=False 208 ) 209 210 def validate_experiment( 211 self, 212 result_dir: str, 213 num_runs: int, 214 *, 215 events: Dict[str, Callable[[Any], None]] = None, 216 num_threads: int = 0, 217 verbose: bool = True) -> bool: 218 """Validate an experiment for an additional number of runs. 219 220 It is not possible to validate an active experiment computation until it is done. 221 The configuration file is expected to be stored in the specified result directory. 222 Moreover, the configuration is validated before starting the experiment validation. 223 If the configuration is invalidated this function will return False. 224 225 Args: 226 events: the external events to dispatch during the experiment. 227 result_dir: path to an existing experiment result directory. 228 num_runs: the number of runs to validate the experiment. 229 num_threads: the max number of threads the experiment can use. 230 verbose: whether the internal events should give verbose output. 231 232 Raises: 233 FileNotFoundError: when the configuration file does not exist in the result directory. 234 KeyError: when a computation with the same name is already active. 235 IOError: when the experiment result directory does not exist. 236 237 Returns: 238 whether the experiment successfully started. 239 """ 240 result_dir = os.path.join(self.result_dir, result_dir) 241 if not os.path.isdir(result_dir): 242 raise IOError('Result does not exist: ' + result_dir) 243 244 config_path = os.path.join(result_dir, 'config') 245 try: 246 parser = ExperimentConfigParser(verbose) 247 config = parser.parse_experiment_config_from_yml(config_path, 248 self.data_registry, 249 self.experiment_factory) 250 if config is None: 251 return False 252 except FileNotFoundError as err: 253 raise FileNotFoundError(errno.ENOENT, 'Config file not found', config_path) from err 254 255 self.thread_processor.start(ThreadExperiment( 256 config.name, 257 events, 258 verbose, 259 pipeline_config=ExperimentPipelineConfig( 260 result_dir, 261 self.data_registry, 262 self.experiment_factory, 263 config, 264 resolve_experiment_start_run(result_dir), 265 num_runs, 266 num_threads 267 ) 268 )) 269 270 return True 271 272 def get_active_computations(self) -> List[str]: 273 """Get the names of any active computations. 274 275 Returns: 276 a list of computations names that are currently running. 277 """ 278 return self.thread_processor.get_active_threads() 279 280 def get_available_algorithms(self, model_type: str = None): 281 """Get the available algorithms of the recommender system. 282 283 Args: 284 model_type: type of model to query for availability, accepted values are 285 TYPE_PREDICTION, TYPE_RECOMMENDATION or None. 286 287 Returns: 288 a dictionary with the availability of algorithms categorized by API. 289 """ 290 return self.experiment_factory.get_sub_availability( 291 KEY_MODELS, 292 sub_type=model_type 293 ) 294 295 def get_available_datasets(self) -> Dict[str, Any]: 296 """Get the available datasets of the recommender system. 297 298 Returns: 299 a dictionary where the key corresponds to the dataset name and 300 the value corresponds to the matrix information dictionary. 301 """ 302 return self.data_registry.get_info() 303 304 def get_available_data_filters(self) -> Dict[str, Any]: 305 """Get the available data filters of the recommender system. 306 307 Returns: 308 a dictionary with the availability of data filters. 309 """ 310 return self.experiment_factory.get_sub_availability( 311 KEY_DATA, 312 sub_type=KEY_DATA_SUBSET 313 ) 314 315 def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]: 316 """Get the available metrics of the recommender system. 317 318 Args: 319 eval_type(str): type of evaluation to query for availability, accepted values are 320 TYPE_PREDICTION, TYPE_RECOMMENDATION or None. 321 322 Returns: 323 a dictionary with the availability of metrics categorized by evaluation type. 324 """ 325 return self.experiment_factory.get_sub_availability( 326 KEY_EVALUATION, 327 sub_type=eval_type 328 ) 329 330 def get_available_rating_converters(self) -> Dict[str, Any]: 331 """Get the available data rating converters of the recommender system. 332 333 Returns: 334 a dictionary with the availability of rating converters. 335 """ 336 return self.experiment_factory.get_sub_availability( 337 KEY_DATA, 338 sub_type=KEY_RATING_CONVERTER 339 ) 340 341 def get_available_splitters(self) -> Dict[str, Any]: 342 """Get the available data splitters of the recommender system. 343 344 Returns: 345 a dictionary with the availability of data splitters. 346 """ 347 return self.experiment_factory.get_sub_availability( 348 KEY_DATA, 349 sub_type=KEY_SPLITTING 350 )
The FairReckit Recommender System.
Defines the top level API intended for use by applications.
Public methods:
abort_computation run_experiment run_experiment_from_yml validate_experiment get_active_computations get_available_algorithms get_available_datasets get_available_data_filters get_available_metrics get_available_rating_converters get_available_splitters
52 def __init__(self, data_dir: str, result_dir: str, verbose: bool=True): 53 """Construct the RecommenderSystem. 54 55 Initializes the data registry with available datasets on which the 56 recommender system depends and therefore the data directory is expected to exist. 57 The result directory however is created when non-existing. 58 59 Args: 60 data_dir: path to the directory that contains the datasets. 61 result_dir: path to the directory to store computation results. 62 verbose: whether the data registry should give verbose output on startup. 63 64 Raises: 65 IOError: when the specified data directory does not exist. 66 """ 67 try: 68 self.data_registry = DataRegistry(data_dir, verbose=verbose) 69 except IOError as err: 70 raise IOError('Failed to initialize DataRegistry: ' 71 'unknown data directory => ' + data_dir) from err 72 73 self.result_dir = result_dir 74 if not os.path.isdir(self.result_dir): 75 os.mkdir(self.result_dir) 76 77 self.experiment_factory = create_experiment_factory(self.data_registry) 78 self.thread_processor = ThreadProcessor()
Construct the RecommenderSystem.
Initializes the data registry with available datasets on which the recommender system depends and therefore the data directory is expected to exist. The result directory however is created when non-existing.
Args: data_dir: path to the directory that contains the datasets. result_dir: path to the directory to store computation results. verbose: whether the data registry should give verbose output on startup.
Raises: IOError: when the specified data directory does not exist.
80 def abort_computation(self, computation_name: str) -> bool: 81 """Attempt to abort a running computation thread. 82 83 The name of the computation is the same as specified in the configuration 84 file when the computation is run. If the computation with the specified 85 name does not exist this function returns False. 86 Note that the computation is only requested to abort as soon as possible, 87 therefore it might take a while until the computation actually stops. 88 89 Args: 90 computation_name: name of the active computation thread to abort. 91 92 Returns: 93 whether the computation is successfully requested to abort. 94 """ 95 if not self.thread_processor.is_active_thread(computation_name): 96 return False 97 98 self.thread_processor.stop(computation_name) 99 return True
Attempt to abort a running computation thread.
The name of the computation is the same as specified in the configuration file when the computation is run. If the computation with the specified name does not exist this function returns False. Note that the computation is only requested to abort as soon as possible, therefore it might take a while until the computation actually stops.
Args: computation_name: name of the active computation thread to abort.
Returns: whether the computation is successfully requested to abort.
101 def run_experiment( 102 self, 103 config: Union[PredictorExperimentConfig, RecommenderExperimentConfig], 104 *, 105 events: Dict[str, Callable[[Any], None]] = None, 106 num_threads: int = 0, 107 verbose: bool = True, 108 validate_config: bool = True) -> bool: 109 """Run an experiment with the specified configuration. 110 111 It is advised to validate the configuration (default) before running the 112 experiment, to make sure the configuration describes a valid experiment. 113 The configuration is invalid when it contains no selected datasets or 114 models, only the evaluation is optional. If the configuration is invalidated 115 this function will return False. 116 117 Args: 118 events: the external events to dispatch during the experiment. 119 config: the configuration of the experiment. 120 num_threads: the max number of threads the experiment can use. 121 verbose: whether the internal events should give verbose output. 122 validate_config: whether to validate the configuration beforehand. 123 124 Raises: 125 KeyError: when a computation with the same name is already active. 126 IOError: when the experiment result directory already exists. 127 TypeError: when the provided configuration is not a valid experiment configuration. 128 129 Returns: 130 whether the experiment successfully started. 131 """ 132 if not isinstance(config, (PredictorExperimentConfig, RecommenderExperimentConfig)): 133 raise TypeError('Invalid experiment configuration type.') 134 135 result_dir = os.path.join(self.result_dir, config.name) 136 if os.path.isdir(result_dir): 137 raise IOError('Result already exists: ' + result_dir) 138 139 if validate_config: 140 parser = ExperimentConfigParser(verbose) 141 config = parser.parse_experiment_config(config.to_yml_format(), 142 self.data_registry, 143 self.experiment_factory) 144 if config is None: 145 return False 146 147 self.thread_processor.start(ThreadExperiment( 148 config.name, 149 events, 150 verbose, 151 pipeline_config=ExperimentPipelineConfig( 152 result_dir, 153 self.data_registry, 154 self.experiment_factory, 155 config, 156 0, 157 1, 158 num_threads 159 ) 160 )) 161 162 return True
Run an experiment with the specified configuration.
It is advised to validate the configuration (default) before running the experiment, to make sure the configuration describes a valid experiment. The configuration is invalid when it contains no selected datasets or models, only the evaluation is optional. If the configuration is invalidated this function will return False.
Args: events: the external events to dispatch during the experiment. config: the configuration of the experiment. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output. validate_config: whether to validate the configuration beforehand.
Raises: KeyError: when a computation with the same name is already active. IOError: when the experiment result directory already exists. TypeError: when the provided configuration is not a valid experiment configuration.
Returns: whether the experiment successfully started.
164 def run_experiment_from_yml( 165 self, 166 file_path: str, 167 *, 168 events: Dict[str, Callable[[Any], None]] = None, 169 num_threads: int = 0, 170 verbose: bool = True) -> bool: 171 """Run an experiment from a yml file. 172 173 The configuration in the file is validated before starting the experiment. 174 It is invalid when it contains no selected datasets or models, 175 only the evaluation is optional. If the configuration is invalidated 176 this function will return False. 177 178 Args: 179 events: the external events to dispatch during the experiment. 180 file_path: path to the yml file without extension. 181 num_threads: the max number of threads the experiment can use. 182 verbose: whether the internal events should give verbose output. 183 184 Raises: 185 FileNotFoundError: when the specified yml file does not exist. 186 KeyError: when a computation with the same name is already active. 187 IOError: when the experiment result directory already exists. 188 189 Returns: 190 whether the experiment successfully started. 191 """ 192 try: 193 parser = ExperimentConfigParser(verbose) 194 config = parser.parse_experiment_config_from_yml(file_path, 195 self.data_registry, 196 self.experiment_factory) 197 if config is None: 198 return False 199 except FileNotFoundError as err: 200 raise FileNotFoundError(errno.ENOENT, 'Config file not found', file_path) from err 201 202 return self.run_experiment( 203 config, 204 events=events, 205 num_threads=num_threads, 206 verbose=verbose, 207 validate_config=False 208 )
Run an experiment from a yml file.
The configuration in the file is validated before starting the experiment. It is invalid when it contains no selected datasets or models, only the evaluation is optional. If the configuration is invalidated this function will return False.
Args: events: the external events to dispatch during the experiment. file_path: path to the yml file without extension. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output.
Raises: FileNotFoundError: when the specified yml file does not exist. KeyError: when a computation with the same name is already active. IOError: when the experiment result directory already exists.
Returns: whether the experiment successfully started.
210 def validate_experiment( 211 self, 212 result_dir: str, 213 num_runs: int, 214 *, 215 events: Dict[str, Callable[[Any], None]] = None, 216 num_threads: int = 0, 217 verbose: bool = True) -> bool: 218 """Validate an experiment for an additional number of runs. 219 220 It is not possible to validate an active experiment computation until it is done. 221 The configuration file is expected to be stored in the specified result directory. 222 Moreover, the configuration is validated before starting the experiment validation. 223 If the configuration is invalidated this function will return False. 224 225 Args: 226 events: the external events to dispatch during the experiment. 227 result_dir: path to an existing experiment result directory. 228 num_runs: the number of runs to validate the experiment. 229 num_threads: the max number of threads the experiment can use. 230 verbose: whether the internal events should give verbose output. 231 232 Raises: 233 FileNotFoundError: when the configuration file does not exist in the result directory. 234 KeyError: when a computation with the same name is already active. 235 IOError: when the experiment result directory does not exist. 236 237 Returns: 238 whether the experiment successfully started. 239 """ 240 result_dir = os.path.join(self.result_dir, result_dir) 241 if not os.path.isdir(result_dir): 242 raise IOError('Result does not exist: ' + result_dir) 243 244 config_path = os.path.join(result_dir, 'config') 245 try: 246 parser = ExperimentConfigParser(verbose) 247 config = parser.parse_experiment_config_from_yml(config_path, 248 self.data_registry, 249 self.experiment_factory) 250 if config is None: 251 return False 252 except FileNotFoundError as err: 253 raise FileNotFoundError(errno.ENOENT, 'Config file not found', config_path) from err 254 255 self.thread_processor.start(ThreadExperiment( 256 config.name, 257 events, 258 verbose, 259 pipeline_config=ExperimentPipelineConfig( 260 result_dir, 261 self.data_registry, 262 self.experiment_factory, 263 config, 264 resolve_experiment_start_run(result_dir), 265 num_runs, 266 num_threads 267 ) 268 )) 269 270 return True
Validate an experiment for an additional number of runs.
It is not possible to validate an active experiment computation until it is done. The configuration file is expected to be stored in the specified result directory. Moreover, the configuration is validated before starting the experiment validation. If the configuration is invalidated this function will return False.
Args: events: the external events to dispatch during the experiment. result_dir: path to an existing experiment result directory. num_runs: the number of runs to validate the experiment. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output.
Raises: FileNotFoundError: when the configuration file does not exist in the result directory. KeyError: when a computation with the same name is already active. IOError: when the experiment result directory does not exist.
Returns: whether the experiment successfully started.
272 def get_active_computations(self) -> List[str]: 273 """Get the names of any active computations. 274 275 Returns: 276 a list of computations names that are currently running. 277 """ 278 return self.thread_processor.get_active_threads()
Get the names of any active computations.
Returns: a list of computations names that are currently running.
280 def get_available_algorithms(self, model_type: str = None): 281 """Get the available algorithms of the recommender system. 282 283 Args: 284 model_type: type of model to query for availability, accepted values are 285 TYPE_PREDICTION, TYPE_RECOMMENDATION or None. 286 287 Returns: 288 a dictionary with the availability of algorithms categorized by API. 289 """ 290 return self.experiment_factory.get_sub_availability( 291 KEY_MODELS, 292 sub_type=model_type 293 )
Get the available algorithms of the recommender system.
Args: model_type: type of model to query for availability, accepted values are TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
Returns: a dictionary with the availability of algorithms categorized by API.
295 def get_available_datasets(self) -> Dict[str, Any]: 296 """Get the available datasets of the recommender system. 297 298 Returns: 299 a dictionary where the key corresponds to the dataset name and 300 the value corresponds to the matrix information dictionary. 301 """ 302 return self.data_registry.get_info()
Get the available datasets of the recommender system.
Returns: a dictionary where the key corresponds to the dataset name and the value corresponds to the matrix information dictionary.
304 def get_available_data_filters(self) -> Dict[str, Any]: 305 """Get the available data filters of the recommender system. 306 307 Returns: 308 a dictionary with the availability of data filters. 309 """ 310 return self.experiment_factory.get_sub_availability( 311 KEY_DATA, 312 sub_type=KEY_DATA_SUBSET 313 )
Get the available data filters of the recommender system.
Returns: a dictionary with the availability of data filters.
315 def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]: 316 """Get the available metrics of the recommender system. 317 318 Args: 319 eval_type(str): type of evaluation to query for availability, accepted values are 320 TYPE_PREDICTION, TYPE_RECOMMENDATION or None. 321 322 Returns: 323 a dictionary with the availability of metrics categorized by evaluation type. 324 """ 325 return self.experiment_factory.get_sub_availability( 326 KEY_EVALUATION, 327 sub_type=eval_type 328 )
Get the available metrics of the recommender system.
Args: eval_type(str): type of evaluation to query for availability, accepted values are TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
Returns: a dictionary with the availability of metrics categorized by evaluation type.
330 def get_available_rating_converters(self) -> Dict[str, Any]: 331 """Get the available data rating converters of the recommender system. 332 333 Returns: 334 a dictionary with the availability of rating converters. 335 """ 336 return self.experiment_factory.get_sub_availability( 337 KEY_DATA, 338 sub_type=KEY_RATING_CONVERTER 339 )
Get the available data rating converters of the recommender system.
Returns: a dictionary with the availability of rating converters.
341 def get_available_splitters(self) -> Dict[str, Any]: 342 """Get the available data splitters of the recommender system. 343 344 Returns: 345 a dictionary with the availability of data splitters. 346 """ 347 return self.experiment_factory.get_sub_availability( 348 KEY_DATA, 349 sub_type=KEY_SPLITTING 350 )
Get the available data splitters of the recommender system.
Returns: a dictionary with the availability of data splitters.