src.fairreckitlib.recommender_system

This module contains the FairRecKit recommender system.

Classes:

RecommenderSystem: class that includes the entire recommender system.

This program has been developed by students from the bachelor Computer Science at Utrecht University within the Software Project course. © Copyright Utrecht University (Department of Information and Computing Sciences)

  1"""This module contains the FairRecKit recommender system.
  2
  3Classes:
  4
  5    RecommenderSystem: class that includes the entire recommender system.
  6
  7This program has been developed by students from the bachelor Computer Science at
  8Utrecht University within the Software Project course.
  9© Copyright Utrecht University (Department of Information and Computing Sciences)
 10"""
 11
 12import errno
 13import os
 14from typing import Any, Dict, Callable, List, Union
 15
 16from .core.threading.thread_processor import ThreadProcessor
 17from .data.data_factory import KEY_DATA
 18from .data.filter.filter_constants import KEY_DATA_SUBSET
 19from .data.ratings.convert_constants import KEY_RATING_CONVERTER
 20from .data.set.dataset_registry import DataRegistry
 21from .data.split.split_constants import KEY_SPLITTING
 22from .evaluation.evaluation_factory import KEY_EVALUATION
 23from .experiment.experiment_config import PredictorExperimentConfig, RecommenderExperimentConfig
 24from .experiment.experiment_config_parser import ExperimentConfigParser
 25from .experiment.experiment_factory import create_experiment_factory
 26from .experiment.experiment_run import ExperimentPipelineConfig, resolve_experiment_start_run
 27from .experiment.experiment_thread import ThreadExperiment
 28from .model.model_factory import KEY_MODELS
 29
 30
 31class RecommenderSystem:
 32    """The FairReckit Recommender System.
 33
 34    Defines the top level API intended for use by applications.
 35
 36    Public methods:
 37
 38    abort_computation
 39    run_experiment
 40    run_experiment_from_yml
 41    validate_experiment
 42    get_active_computations
 43    get_available_algorithms
 44    get_available_datasets
 45    get_available_data_filters
 46    get_available_metrics
 47    get_available_rating_converters
 48    get_available_splitters
 49    """
 50
 51    def __init__(self, data_dir: str, result_dir: str, verbose: bool=True):
 52        """Construct the RecommenderSystem.
 53
 54        Initializes the data registry with available datasets on which the
 55        recommender system depends and therefore the data directory is expected to exist.
 56        The result directory however is created when non-existing.
 57
 58        Args:
 59            data_dir: path to the directory that contains the datasets.
 60            result_dir: path to the directory to store computation results.
 61            verbose: whether the data registry should give verbose output on startup.
 62
 63        Raises:
 64            IOError: when the specified data directory does not exist.
 65        """
 66        try:
 67            self.data_registry = DataRegistry(data_dir, verbose=verbose)
 68        except IOError as err:
 69            raise IOError('Failed to initialize DataRegistry: '
 70                          'unknown data directory => ' + data_dir) from err
 71
 72        self.result_dir = result_dir
 73        if not os.path.isdir(self.result_dir):
 74            os.mkdir(self.result_dir)
 75
 76        self.experiment_factory = create_experiment_factory(self.data_registry)
 77        self.thread_processor = ThreadProcessor()
 78
 79    def abort_computation(self, computation_name: str) -> bool:
 80        """Attempt to abort a running computation thread.
 81
 82        The name of the computation is the same as specified in the configuration
 83        file when the computation is run. If the computation with the specified
 84        name does not exist this function returns False.
 85        Note that the computation is only requested to abort as soon as possible,
 86        therefore it might take a while until the computation actually stops.
 87
 88        Args:
 89            computation_name: name of the active computation thread to abort.
 90
 91        Returns:
 92            whether the computation is successfully requested to abort.
 93        """
 94        if not self.thread_processor.is_active_thread(computation_name):
 95            return False
 96
 97        self.thread_processor.stop(computation_name)
 98        return True
 99
100    def run_experiment(
101            self,
102            config: Union[PredictorExperimentConfig, RecommenderExperimentConfig],
103            *,
104            events: Dict[str, Callable[[Any], None]] = None,
105            num_threads: int = 0,
106            verbose: bool = True,
107            validate_config: bool = True) -> bool:
108        """Run an experiment with the specified configuration.
109
110        It is advised to validate the configuration (default) before running the
111        experiment, to make sure the configuration describes a valid experiment.
112        The configuration is invalid when it contains no selected datasets or
113        models, only the evaluation is optional. If the configuration is invalidated
114        this function will return False.
115
116        Args:
117            events: the external events to dispatch during the experiment.
118            config: the configuration of the experiment.
119            num_threads: the max number of threads the experiment can use.
120            verbose: whether the internal events should give verbose output.
121            validate_config: whether to validate the configuration beforehand.
122
123        Raises:
124            KeyError: when a computation with the same name is already active.
125            IOError: when the experiment result directory already exists.
126            TypeError: when the provided configuration is not a valid experiment configuration.
127
128        Returns:
129            whether the experiment successfully started.
130        """
131        if not isinstance(config, (PredictorExperimentConfig, RecommenderExperimentConfig)):
132            raise TypeError('Invalid experiment configuration type.')
133
134        result_dir = os.path.join(self.result_dir, config.name)
135        if os.path.isdir(result_dir):
136            raise IOError('Result already exists: ' + result_dir)
137
138        if validate_config:
139            parser = ExperimentConfigParser(verbose)
140            config = parser.parse_experiment_config(config.to_yml_format(),
141                                                    self.data_registry,
142                                                    self.experiment_factory)
143            if config is None:
144                return False
145
146        self.thread_processor.start(ThreadExperiment(
147            config.name,
148            events,
149            verbose,
150            pipeline_config=ExperimentPipelineConfig(
151                result_dir,
152                self.data_registry,
153                self.experiment_factory,
154                config,
155                0,
156                1,
157                num_threads
158            )
159        ))
160
161        return True
162
163    def run_experiment_from_yml(
164            self,
165            file_path: str,
166            *,
167            events: Dict[str, Callable[[Any], None]] = None,
168            num_threads: int = 0,
169            verbose: bool = True) -> bool:
170        """Run an experiment from a yml file.
171
172        The configuration in the file is validated before starting the experiment.
173        It is invalid when it contains no selected datasets or models,
174        only the evaluation is optional.  If the configuration is invalidated
175        this function will return False.
176
177        Args:
178            events: the external events to dispatch during the experiment.
179            file_path: path to the yml file without extension.
180            num_threads: the max number of threads the experiment can use.
181            verbose: whether the internal events should give verbose output.
182
183        Raises:
184            FileNotFoundError: when the specified yml file does not exist.
185            KeyError: when a computation with the same name is already active.
186            IOError: when the experiment result directory already exists.
187
188        Returns:
189            whether the experiment successfully started.
190        """
191        try:
192            parser = ExperimentConfigParser(verbose)
193            config = parser.parse_experiment_config_from_yml(file_path,
194                                                             self.data_registry,
195                                                             self.experiment_factory)
196            if config is None:
197                return False
198        except FileNotFoundError as err:
199            raise FileNotFoundError(errno.ENOENT, 'Config file not found', file_path) from err
200
201        return self.run_experiment(
202            config,
203            events=events,
204            num_threads=num_threads,
205            verbose=verbose,
206            validate_config=False
207        )
208
209    def validate_experiment(
210            self,
211            result_dir: str,
212            num_runs: int,
213            *,
214            events: Dict[str, Callable[[Any], None]] = None,
215            num_threads: int = 0,
216            verbose: bool = True) -> bool:
217        """Validate an experiment for an additional number of runs.
218
219        It is not possible to validate an active experiment computation until it is done.
220        The configuration file is expected to be stored in the specified result directory.
221        Moreover, the configuration is validated before starting the experiment validation.
222        If the configuration is invalidated this function will return False.
223
224        Args:
225            events: the external events to dispatch during the experiment.
226            result_dir: path to an existing experiment result directory.
227            num_runs: the number of runs to validate the experiment.
228            num_threads: the max number of threads the experiment can use.
229            verbose: whether the internal events should give verbose output.
230
231        Raises:
232            FileNotFoundError: when the configuration file does not exist in the result directory.
233            KeyError: when a computation with the same name is already active.
234            IOError: when the experiment result directory does not exist.
235
236        Returns:
237            whether the experiment successfully started.
238        """
239        result_dir = os.path.join(self.result_dir, result_dir)
240        if not os.path.isdir(result_dir):
241            raise IOError('Result does not exist: ' + result_dir)
242
243        config_path = os.path.join(result_dir, 'config')
244        try:
245            parser = ExperimentConfigParser(verbose)
246            config = parser.parse_experiment_config_from_yml(config_path,
247                                                             self.data_registry,
248                                                             self.experiment_factory)
249            if config is None:
250                return False
251        except FileNotFoundError as err:
252            raise FileNotFoundError(errno.ENOENT, 'Config file not found', config_path) from err
253
254        self.thread_processor.start(ThreadExperiment(
255            config.name,
256            events,
257            verbose,
258            pipeline_config=ExperimentPipelineConfig(
259                result_dir,
260                self.data_registry,
261                self.experiment_factory,
262                config,
263                resolve_experiment_start_run(result_dir),
264                num_runs,
265                num_threads
266            )
267        ))
268
269        return True
270
271    def get_active_computations(self) -> List[str]:
272        """Get the names of any active computations.
273
274        Returns:
275            a list of computations names that are currently running.
276        """
277        return self.thread_processor.get_active_threads()
278
279    def get_available_algorithms(self, model_type: str = None):
280        """Get the available algorithms of the recommender system.
281
282        Args:
283            model_type: type of model to query for availability, accepted values are
284                TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
285
286        Returns:
287            a dictionary with the availability of algorithms categorized by API.
288        """
289        return self.experiment_factory.get_sub_availability(
290            KEY_MODELS,
291            sub_type=model_type
292        )
293
294    def get_available_datasets(self) -> Dict[str, Any]:
295        """Get the available datasets of the recommender system.
296
297        Returns:
298            a dictionary where the key corresponds to the dataset name and
299                the value corresponds to the matrix information dictionary.
300        """
301        return self.data_registry.get_info()
302
303    def get_available_data_filters(self) -> Dict[str, Any]:
304        """Get the available data filters of the recommender system.
305
306        Returns:
307            a dictionary with the availability of data filters.
308        """
309        return self.experiment_factory.get_sub_availability(
310            KEY_DATA,
311            sub_type=KEY_DATA_SUBSET
312        )
313
314    def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]:
315        """Get the available metrics of the recommender system.
316
317        Args:
318            eval_type(str): type of evaluation to query for availability, accepted values are
319                TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
320
321        Returns:
322            a dictionary with the availability of metrics categorized by evaluation type.
323        """
324        return self.experiment_factory.get_sub_availability(
325            KEY_EVALUATION,
326            sub_type=eval_type
327        )
328
329    def get_available_rating_converters(self) -> Dict[str, Any]:
330        """Get the available data rating converters of the recommender system.
331
332        Returns:
333            a dictionary with the availability of rating converters.
334        """
335        return self.experiment_factory.get_sub_availability(
336            KEY_DATA,
337            sub_type=KEY_RATING_CONVERTER
338        )
339
340    def get_available_splitters(self) -> Dict[str, Any]:
341        """Get the available data splitters of the recommender system.
342
343        Returns:
344            a dictionary with the availability of data splitters.
345        """
346        return self.experiment_factory.get_sub_availability(
347            KEY_DATA,
348            sub_type=KEY_SPLITTING
349        )
class RecommenderSystem:
 32class RecommenderSystem:
 33    """The FairReckit Recommender System.
 34
 35    Defines the top level API intended for use by applications.
 36
 37    Public methods:
 38
 39    abort_computation
 40    run_experiment
 41    run_experiment_from_yml
 42    validate_experiment
 43    get_active_computations
 44    get_available_algorithms
 45    get_available_datasets
 46    get_available_data_filters
 47    get_available_metrics
 48    get_available_rating_converters
 49    get_available_splitters
 50    """
 51
 52    def __init__(self, data_dir: str, result_dir: str, verbose: bool=True):
 53        """Construct the RecommenderSystem.
 54
 55        Initializes the data registry with available datasets on which the
 56        recommender system depends and therefore the data directory is expected to exist.
 57        The result directory however is created when non-existing.
 58
 59        Args:
 60            data_dir: path to the directory that contains the datasets.
 61            result_dir: path to the directory to store computation results.
 62            verbose: whether the data registry should give verbose output on startup.
 63
 64        Raises:
 65            IOError: when the specified data directory does not exist.
 66        """
 67        try:
 68            self.data_registry = DataRegistry(data_dir, verbose=verbose)
 69        except IOError as err:
 70            raise IOError('Failed to initialize DataRegistry: '
 71                          'unknown data directory => ' + data_dir) from err
 72
 73        self.result_dir = result_dir
 74        if not os.path.isdir(self.result_dir):
 75            os.mkdir(self.result_dir)
 76
 77        self.experiment_factory = create_experiment_factory(self.data_registry)
 78        self.thread_processor = ThreadProcessor()
 79
 80    def abort_computation(self, computation_name: str) -> bool:
 81        """Attempt to abort a running computation thread.
 82
 83        The name of the computation is the same as specified in the configuration
 84        file when the computation is run. If the computation with the specified
 85        name does not exist this function returns False.
 86        Note that the computation is only requested to abort as soon as possible,
 87        therefore it might take a while until the computation actually stops.
 88
 89        Args:
 90            computation_name: name of the active computation thread to abort.
 91
 92        Returns:
 93            whether the computation is successfully requested to abort.
 94        """
 95        if not self.thread_processor.is_active_thread(computation_name):
 96            return False
 97
 98        self.thread_processor.stop(computation_name)
 99        return True
100
101    def run_experiment(
102            self,
103            config: Union[PredictorExperimentConfig, RecommenderExperimentConfig],
104            *,
105            events: Dict[str, Callable[[Any], None]] = None,
106            num_threads: int = 0,
107            verbose: bool = True,
108            validate_config: bool = True) -> bool:
109        """Run an experiment with the specified configuration.
110
111        It is advised to validate the configuration (default) before running the
112        experiment, to make sure the configuration describes a valid experiment.
113        The configuration is invalid when it contains no selected datasets or
114        models, only the evaluation is optional. If the configuration is invalidated
115        this function will return False.
116
117        Args:
118            events: the external events to dispatch during the experiment.
119            config: the configuration of the experiment.
120            num_threads: the max number of threads the experiment can use.
121            verbose: whether the internal events should give verbose output.
122            validate_config: whether to validate the configuration beforehand.
123
124        Raises:
125            KeyError: when a computation with the same name is already active.
126            IOError: when the experiment result directory already exists.
127            TypeError: when the provided configuration is not a valid experiment configuration.
128
129        Returns:
130            whether the experiment successfully started.
131        """
132        if not isinstance(config, (PredictorExperimentConfig, RecommenderExperimentConfig)):
133            raise TypeError('Invalid experiment configuration type.')
134
135        result_dir = os.path.join(self.result_dir, config.name)
136        if os.path.isdir(result_dir):
137            raise IOError('Result already exists: ' + result_dir)
138
139        if validate_config:
140            parser = ExperimentConfigParser(verbose)
141            config = parser.parse_experiment_config(config.to_yml_format(),
142                                                    self.data_registry,
143                                                    self.experiment_factory)
144            if config is None:
145                return False
146
147        self.thread_processor.start(ThreadExperiment(
148            config.name,
149            events,
150            verbose,
151            pipeline_config=ExperimentPipelineConfig(
152                result_dir,
153                self.data_registry,
154                self.experiment_factory,
155                config,
156                0,
157                1,
158                num_threads
159            )
160        ))
161
162        return True
163
164    def run_experiment_from_yml(
165            self,
166            file_path: str,
167            *,
168            events: Dict[str, Callable[[Any], None]] = None,
169            num_threads: int = 0,
170            verbose: bool = True) -> bool:
171        """Run an experiment from a yml file.
172
173        The configuration in the file is validated before starting the experiment.
174        It is invalid when it contains no selected datasets or models,
175        only the evaluation is optional.  If the configuration is invalidated
176        this function will return False.
177
178        Args:
179            events: the external events to dispatch during the experiment.
180            file_path: path to the yml file without extension.
181            num_threads: the max number of threads the experiment can use.
182            verbose: whether the internal events should give verbose output.
183
184        Raises:
185            FileNotFoundError: when the specified yml file does not exist.
186            KeyError: when a computation with the same name is already active.
187            IOError: when the experiment result directory already exists.
188
189        Returns:
190            whether the experiment successfully started.
191        """
192        try:
193            parser = ExperimentConfigParser(verbose)
194            config = parser.parse_experiment_config_from_yml(file_path,
195                                                             self.data_registry,
196                                                             self.experiment_factory)
197            if config is None:
198                return False
199        except FileNotFoundError as err:
200            raise FileNotFoundError(errno.ENOENT, 'Config file not found', file_path) from err
201
202        return self.run_experiment(
203            config,
204            events=events,
205            num_threads=num_threads,
206            verbose=verbose,
207            validate_config=False
208        )
209
210    def validate_experiment(
211            self,
212            result_dir: str,
213            num_runs: int,
214            *,
215            events: Dict[str, Callable[[Any], None]] = None,
216            num_threads: int = 0,
217            verbose: bool = True) -> bool:
218        """Validate an experiment for an additional number of runs.
219
220        It is not possible to validate an active experiment computation until it is done.
221        The configuration file is expected to be stored in the specified result directory.
222        Moreover, the configuration is validated before starting the experiment validation.
223        If the configuration is invalidated this function will return False.
224
225        Args:
226            events: the external events to dispatch during the experiment.
227            result_dir: path to an existing experiment result directory.
228            num_runs: the number of runs to validate the experiment.
229            num_threads: the max number of threads the experiment can use.
230            verbose: whether the internal events should give verbose output.
231
232        Raises:
233            FileNotFoundError: when the configuration file does not exist in the result directory.
234            KeyError: when a computation with the same name is already active.
235            IOError: when the experiment result directory does not exist.
236
237        Returns:
238            whether the experiment successfully started.
239        """
240        result_dir = os.path.join(self.result_dir, result_dir)
241        if not os.path.isdir(result_dir):
242            raise IOError('Result does not exist: ' + result_dir)
243
244        config_path = os.path.join(result_dir, 'config')
245        try:
246            parser = ExperimentConfigParser(verbose)
247            config = parser.parse_experiment_config_from_yml(config_path,
248                                                             self.data_registry,
249                                                             self.experiment_factory)
250            if config is None:
251                return False
252        except FileNotFoundError as err:
253            raise FileNotFoundError(errno.ENOENT, 'Config file not found', config_path) from err
254
255        self.thread_processor.start(ThreadExperiment(
256            config.name,
257            events,
258            verbose,
259            pipeline_config=ExperimentPipelineConfig(
260                result_dir,
261                self.data_registry,
262                self.experiment_factory,
263                config,
264                resolve_experiment_start_run(result_dir),
265                num_runs,
266                num_threads
267            )
268        ))
269
270        return True
271
272    def get_active_computations(self) -> List[str]:
273        """Get the names of any active computations.
274
275        Returns:
276            a list of computations names that are currently running.
277        """
278        return self.thread_processor.get_active_threads()
279
280    def get_available_algorithms(self, model_type: str = None):
281        """Get the available algorithms of the recommender system.
282
283        Args:
284            model_type: type of model to query for availability, accepted values are
285                TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
286
287        Returns:
288            a dictionary with the availability of algorithms categorized by API.
289        """
290        return self.experiment_factory.get_sub_availability(
291            KEY_MODELS,
292            sub_type=model_type
293        )
294
295    def get_available_datasets(self) -> Dict[str, Any]:
296        """Get the available datasets of the recommender system.
297
298        Returns:
299            a dictionary where the key corresponds to the dataset name and
300                the value corresponds to the matrix information dictionary.
301        """
302        return self.data_registry.get_info()
303
304    def get_available_data_filters(self) -> Dict[str, Any]:
305        """Get the available data filters of the recommender system.
306
307        Returns:
308            a dictionary with the availability of data filters.
309        """
310        return self.experiment_factory.get_sub_availability(
311            KEY_DATA,
312            sub_type=KEY_DATA_SUBSET
313        )
314
315    def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]:
316        """Get the available metrics of the recommender system.
317
318        Args:
319            eval_type(str): type of evaluation to query for availability, accepted values are
320                TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
321
322        Returns:
323            a dictionary with the availability of metrics categorized by evaluation type.
324        """
325        return self.experiment_factory.get_sub_availability(
326            KEY_EVALUATION,
327            sub_type=eval_type
328        )
329
330    def get_available_rating_converters(self) -> Dict[str, Any]:
331        """Get the available data rating converters of the recommender system.
332
333        Returns:
334            a dictionary with the availability of rating converters.
335        """
336        return self.experiment_factory.get_sub_availability(
337            KEY_DATA,
338            sub_type=KEY_RATING_CONVERTER
339        )
340
341    def get_available_splitters(self) -> Dict[str, Any]:
342        """Get the available data splitters of the recommender system.
343
344        Returns:
345            a dictionary with the availability of data splitters.
346        """
347        return self.experiment_factory.get_sub_availability(
348            KEY_DATA,
349            sub_type=KEY_SPLITTING
350        )

The FairReckit Recommender System.

Defines the top level API intended for use by applications.

Public methods:

abort_computation run_experiment run_experiment_from_yml validate_experiment get_active_computations get_available_algorithms get_available_datasets get_available_data_filters get_available_metrics get_available_rating_converters get_available_splitters

RecommenderSystem(data_dir: str, result_dir: str, verbose: bool = True)
52    def __init__(self, data_dir: str, result_dir: str, verbose: bool=True):
53        """Construct the RecommenderSystem.
54
55        Initializes the data registry with available datasets on which the
56        recommender system depends and therefore the data directory is expected to exist.
57        The result directory however is created when non-existing.
58
59        Args:
60            data_dir: path to the directory that contains the datasets.
61            result_dir: path to the directory to store computation results.
62            verbose: whether the data registry should give verbose output on startup.
63
64        Raises:
65            IOError: when the specified data directory does not exist.
66        """
67        try:
68            self.data_registry = DataRegistry(data_dir, verbose=verbose)
69        except IOError as err:
70            raise IOError('Failed to initialize DataRegistry: '
71                          'unknown data directory => ' + data_dir) from err
72
73        self.result_dir = result_dir
74        if not os.path.isdir(self.result_dir):
75            os.mkdir(self.result_dir)
76
77        self.experiment_factory = create_experiment_factory(self.data_registry)
78        self.thread_processor = ThreadProcessor()

Construct the RecommenderSystem.

Initializes the data registry with available datasets on which the recommender system depends and therefore the data directory is expected to exist. The result directory however is created when non-existing.

Args: data_dir: path to the directory that contains the datasets. result_dir: path to the directory to store computation results. verbose: whether the data registry should give verbose output on startup.

Raises: IOError: when the specified data directory does not exist.

def abort_computation(self, computation_name: str) -> bool:
80    def abort_computation(self, computation_name: str) -> bool:
81        """Attempt to abort a running computation thread.
82
83        The name of the computation is the same as specified in the configuration
84        file when the computation is run. If the computation with the specified
85        name does not exist this function returns False.
86        Note that the computation is only requested to abort as soon as possible,
87        therefore it might take a while until the computation actually stops.
88
89        Args:
90            computation_name: name of the active computation thread to abort.
91
92        Returns:
93            whether the computation is successfully requested to abort.
94        """
95        if not self.thread_processor.is_active_thread(computation_name):
96            return False
97
98        self.thread_processor.stop(computation_name)
99        return True

Attempt to abort a running computation thread.

The name of the computation is the same as specified in the configuration file when the computation is run. If the computation with the specified name does not exist this function returns False. Note that the computation is only requested to abort as soon as possible, therefore it might take a while until the computation actually stops.

Args: computation_name: name of the active computation thread to abort.

Returns: whether the computation is successfully requested to abort.

def run_experiment( self, config: Union[src.fairreckitlib.experiment.experiment_config.PredictorExperimentConfig, src.fairreckitlib.experiment.experiment_config.RecommenderExperimentConfig], *, events: Dict[str, Callable[[Any], NoneType]] = None, num_threads: int = 0, verbose: bool = True, validate_config: bool = True) -> bool:
101    def run_experiment(
102            self,
103            config: Union[PredictorExperimentConfig, RecommenderExperimentConfig],
104            *,
105            events: Dict[str, Callable[[Any], None]] = None,
106            num_threads: int = 0,
107            verbose: bool = True,
108            validate_config: bool = True) -> bool:
109        """Run an experiment with the specified configuration.
110
111        It is advised to validate the configuration (default) before running the
112        experiment, to make sure the configuration describes a valid experiment.
113        The configuration is invalid when it contains no selected datasets or
114        models, only the evaluation is optional. If the configuration is invalidated
115        this function will return False.
116
117        Args:
118            events: the external events to dispatch during the experiment.
119            config: the configuration of the experiment.
120            num_threads: the max number of threads the experiment can use.
121            verbose: whether the internal events should give verbose output.
122            validate_config: whether to validate the configuration beforehand.
123
124        Raises:
125            KeyError: when a computation with the same name is already active.
126            IOError: when the experiment result directory already exists.
127            TypeError: when the provided configuration is not a valid experiment configuration.
128
129        Returns:
130            whether the experiment successfully started.
131        """
132        if not isinstance(config, (PredictorExperimentConfig, RecommenderExperimentConfig)):
133            raise TypeError('Invalid experiment configuration type.')
134
135        result_dir = os.path.join(self.result_dir, config.name)
136        if os.path.isdir(result_dir):
137            raise IOError('Result already exists: ' + result_dir)
138
139        if validate_config:
140            parser = ExperimentConfigParser(verbose)
141            config = parser.parse_experiment_config(config.to_yml_format(),
142                                                    self.data_registry,
143                                                    self.experiment_factory)
144            if config is None:
145                return False
146
147        self.thread_processor.start(ThreadExperiment(
148            config.name,
149            events,
150            verbose,
151            pipeline_config=ExperimentPipelineConfig(
152                result_dir,
153                self.data_registry,
154                self.experiment_factory,
155                config,
156                0,
157                1,
158                num_threads
159            )
160        ))
161
162        return True

Run an experiment with the specified configuration.

It is advised to validate the configuration (default) before running the experiment, to make sure the configuration describes a valid experiment. The configuration is invalid when it contains no selected datasets or models, only the evaluation is optional. If the configuration is invalidated this function will return False.

Args: events: the external events to dispatch during the experiment. config: the configuration of the experiment. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output. validate_config: whether to validate the configuration beforehand.

Raises: KeyError: when a computation with the same name is already active. IOError: when the experiment result directory already exists. TypeError: when the provided configuration is not a valid experiment configuration.

Returns: whether the experiment successfully started.

def run_experiment_from_yml( self, file_path: str, *, events: Dict[str, Callable[[Any], NoneType]] = None, num_threads: int = 0, verbose: bool = True) -> bool:
164    def run_experiment_from_yml(
165            self,
166            file_path: str,
167            *,
168            events: Dict[str, Callable[[Any], None]] = None,
169            num_threads: int = 0,
170            verbose: bool = True) -> bool:
171        """Run an experiment from a yml file.
172
173        The configuration in the file is validated before starting the experiment.
174        It is invalid when it contains no selected datasets or models,
175        only the evaluation is optional.  If the configuration is invalidated
176        this function will return False.
177
178        Args:
179            events: the external events to dispatch during the experiment.
180            file_path: path to the yml file without extension.
181            num_threads: the max number of threads the experiment can use.
182            verbose: whether the internal events should give verbose output.
183
184        Raises:
185            FileNotFoundError: when the specified yml file does not exist.
186            KeyError: when a computation with the same name is already active.
187            IOError: when the experiment result directory already exists.
188
189        Returns:
190            whether the experiment successfully started.
191        """
192        try:
193            parser = ExperimentConfigParser(verbose)
194            config = parser.parse_experiment_config_from_yml(file_path,
195                                                             self.data_registry,
196                                                             self.experiment_factory)
197            if config is None:
198                return False
199        except FileNotFoundError as err:
200            raise FileNotFoundError(errno.ENOENT, 'Config file not found', file_path) from err
201
202        return self.run_experiment(
203            config,
204            events=events,
205            num_threads=num_threads,
206            verbose=verbose,
207            validate_config=False
208        )

Run an experiment from a yml file.

The configuration in the file is validated before starting the experiment. It is invalid when it contains no selected datasets or models, only the evaluation is optional. If the configuration is invalidated this function will return False.

Args: events: the external events to dispatch during the experiment. file_path: path to the yml file without extension. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output.

Raises: FileNotFoundError: when the specified yml file does not exist. KeyError: when a computation with the same name is already active. IOError: when the experiment result directory already exists.

Returns: whether the experiment successfully started.

def validate_experiment( self, result_dir: str, num_runs: int, *, events: Dict[str, Callable[[Any], NoneType]] = None, num_threads: int = 0, verbose: bool = True) -> bool:
210    def validate_experiment(
211            self,
212            result_dir: str,
213            num_runs: int,
214            *,
215            events: Dict[str, Callable[[Any], None]] = None,
216            num_threads: int = 0,
217            verbose: bool = True) -> bool:
218        """Validate an experiment for an additional number of runs.
219
220        It is not possible to validate an active experiment computation until it is done.
221        The configuration file is expected to be stored in the specified result directory.
222        Moreover, the configuration is validated before starting the experiment validation.
223        If the configuration is invalidated this function will return False.
224
225        Args:
226            events: the external events to dispatch during the experiment.
227            result_dir: path to an existing experiment result directory.
228            num_runs: the number of runs to validate the experiment.
229            num_threads: the max number of threads the experiment can use.
230            verbose: whether the internal events should give verbose output.
231
232        Raises:
233            FileNotFoundError: when the configuration file does not exist in the result directory.
234            KeyError: when a computation with the same name is already active.
235            IOError: when the experiment result directory does not exist.
236
237        Returns:
238            whether the experiment successfully started.
239        """
240        result_dir = os.path.join(self.result_dir, result_dir)
241        if not os.path.isdir(result_dir):
242            raise IOError('Result does not exist: ' + result_dir)
243
244        config_path = os.path.join(result_dir, 'config')
245        try:
246            parser = ExperimentConfigParser(verbose)
247            config = parser.parse_experiment_config_from_yml(config_path,
248                                                             self.data_registry,
249                                                             self.experiment_factory)
250            if config is None:
251                return False
252        except FileNotFoundError as err:
253            raise FileNotFoundError(errno.ENOENT, 'Config file not found', config_path) from err
254
255        self.thread_processor.start(ThreadExperiment(
256            config.name,
257            events,
258            verbose,
259            pipeline_config=ExperimentPipelineConfig(
260                result_dir,
261                self.data_registry,
262                self.experiment_factory,
263                config,
264                resolve_experiment_start_run(result_dir),
265                num_runs,
266                num_threads
267            )
268        ))
269
270        return True

Validate an experiment for an additional number of runs.

It is not possible to validate an active experiment computation until it is done. The configuration file is expected to be stored in the specified result directory. Moreover, the configuration is validated before starting the experiment validation. If the configuration is invalidated this function will return False.

Args: events: the external events to dispatch during the experiment. result_dir: path to an existing experiment result directory. num_runs: the number of runs to validate the experiment. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output.

Raises: FileNotFoundError: when the configuration file does not exist in the result directory. KeyError: when a computation with the same name is already active. IOError: when the experiment result directory does not exist.

Returns: whether the experiment successfully started.

def get_active_computations(self) -> List[str]:
272    def get_active_computations(self) -> List[str]:
273        """Get the names of any active computations.
274
275        Returns:
276            a list of computations names that are currently running.
277        """
278        return self.thread_processor.get_active_threads()

Get the names of any active computations.

Returns: a list of computations names that are currently running.

def get_available_algorithms(self, model_type: str = None)
280    def get_available_algorithms(self, model_type: str = None):
281        """Get the available algorithms of the recommender system.
282
283        Args:
284            model_type: type of model to query for availability, accepted values are
285                TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
286
287        Returns:
288            a dictionary with the availability of algorithms categorized by API.
289        """
290        return self.experiment_factory.get_sub_availability(
291            KEY_MODELS,
292            sub_type=model_type
293        )

Get the available algorithms of the recommender system.

Args: model_type: type of model to query for availability, accepted values are TYPE_PREDICTION, TYPE_RECOMMENDATION or None.

Returns: a dictionary with the availability of algorithms categorized by API.

def get_available_datasets(self) -> Dict[str, Any]:
295    def get_available_datasets(self) -> Dict[str, Any]:
296        """Get the available datasets of the recommender system.
297
298        Returns:
299            a dictionary where the key corresponds to the dataset name and
300                the value corresponds to the matrix information dictionary.
301        """
302        return self.data_registry.get_info()

Get the available datasets of the recommender system.

Returns: a dictionary where the key corresponds to the dataset name and the value corresponds to the matrix information dictionary.

def get_available_data_filters(self) -> Dict[str, Any]:
304    def get_available_data_filters(self) -> Dict[str, Any]:
305        """Get the available data filters of the recommender system.
306
307        Returns:
308            a dictionary with the availability of data filters.
309        """
310        return self.experiment_factory.get_sub_availability(
311            KEY_DATA,
312            sub_type=KEY_DATA_SUBSET
313        )

Get the available data filters of the recommender system.

Returns: a dictionary with the availability of data filters.

def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]:
315    def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]:
316        """Get the available metrics of the recommender system.
317
318        Args:
319            eval_type(str): type of evaluation to query for availability, accepted values are
320                TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
321
322        Returns:
323            a dictionary with the availability of metrics categorized by evaluation type.
324        """
325        return self.experiment_factory.get_sub_availability(
326            KEY_EVALUATION,
327            sub_type=eval_type
328        )

Get the available metrics of the recommender system.

Args: eval_type(str): type of evaluation to query for availability, accepted values are TYPE_PREDICTION, TYPE_RECOMMENDATION or None.

Returns: a dictionary with the availability of metrics categorized by evaluation type.

def get_available_rating_converters(self) -> Dict[str, Any]:
330    def get_available_rating_converters(self) -> Dict[str, Any]:
331        """Get the available data rating converters of the recommender system.
332
333        Returns:
334            a dictionary with the availability of rating converters.
335        """
336        return self.experiment_factory.get_sub_availability(
337            KEY_DATA,
338            sub_type=KEY_RATING_CONVERTER
339        )

Get the available data rating converters of the recommender system.

Returns: a dictionary with the availability of rating converters.

def get_available_splitters(self) -> Dict[str, Any]:
341    def get_available_splitters(self) -> Dict[str, Any]:
342        """Get the available data splitters of the recommender system.
343
344        Returns:
345            a dictionary with the availability of data splitters.
346        """
347        return self.experiment_factory.get_sub_availability(
348            KEY_DATA,
349            sub_type=KEY_SPLITTING
350        )

Get the available data splitters of the recommender system.

Returns: a dictionary with the availability of data splitters.