src.fairreckitlib.recommender_system API documentation

RecommenderSystem(data_dir: str, result_dir: str, verbose: bool = True) View Source

52    def __init__(self, data_dir: str, result_dir: str, verbose: bool=True):
53        """Construct the RecommenderSystem.
54
55        Initializes the data registry with available datasets on which the
56        recommender system depends and therefore the data directory is expected to exist.
57        The result directory however is created when non-existing.
58
59        Args:
60            data_dir: path to the directory that contains the datasets.
61            result_dir: path to the directory to store computation results.
62            verbose: whether the data registry should give verbose output on startup.
63
64        Raises:
65            IOError: when the specified data directory does not exist.
66        """
67        try:
68            self.data_registry = DataRegistry(data_dir, verbose=verbose)
69        except IOError as err:
70            raise IOError('Failed to initialize DataRegistry: '
71                          'unknown data directory => ' + data_dir) from err
72
73        self.result_dir = result_dir
74        if not os.path.isdir(self.result_dir):
75            os.mkdir(self.result_dir)
76
77        self.experiment_factory = create_experiment_factory(self.data_registry)
78        self.thread_processor = ThreadProcessor()

Construct the RecommenderSystem.

Initializes the data registry with available datasets on which the recommender system depends and therefore the data directory is expected to exist. The result directory however is created when non-existing.

Args: data_dir: path to the directory that contains the datasets. result_dir: path to the directory to store computation results. verbose: whether the data registry should give verbose output on startup.

Raises: IOError: when the specified data directory does not exist.

def abort_computation(self, computation_name: str) -> bool: View Source

80    def abort_computation(self, computation_name: str) -> bool:
81        """Attempt to abort a running computation thread.
82
83        The name of the computation is the same as specified in the configuration
84        file when the computation is run. If the computation with the specified
85        name does not exist this function returns False.
86        Note that the computation is only requested to abort as soon as possible,
87        therefore it might take a while until the computation actually stops.
88
89        Args:
90            computation_name: name of the active computation thread to abort.
91
92        Returns:
93            whether the computation is successfully requested to abort.
94        """
95        if not self.thread_processor.is_active_thread(computation_name):
96            return False
97
98        self.thread_processor.stop(computation_name)
99        return True

Attempt to abort a running computation thread.

The name of the computation is the same as specified in the configuration file when the computation is run. If the computation with the specified name does not exist this function returns False. Note that the computation is only requested to abort as soon as possible, therefore it might take a while until the computation actually stops.

Args: computation_name: name of the active computation thread to abort.

Returns: whether the computation is successfully requested to abort.

def run_experiment( self, config: Union[src.fairreckitlib.experiment.experiment_config.PredictorExperimentConfig, src.fairreckitlib.experiment.experiment_config.RecommenderExperimentConfig], *, events: Dict[str, Callable[[Any], NoneType]] = None, num_threads: int = 0, verbose: bool = True, validate_config: bool = True) -> bool: View Source

101    def run_experiment(
102            self,
103            config: Union[PredictorExperimentConfig, RecommenderExperimentConfig],
104            *,
105            events: Dict[str, Callable[[Any], None]] = None,
106            num_threads: int = 0,
107            verbose: bool = True,
108            validate_config: bool = True) -> bool:
109        """Run an experiment with the specified configuration.
110
111        It is advised to validate the configuration (default) before running the
112        experiment, to make sure the configuration describes a valid experiment.
113        The configuration is invalid when it contains no selected datasets or
114        models, only the evaluation is optional. If the configuration is invalidated
115        this function will return False.
116
117        Args:
118            events: the external events to dispatch during the experiment.
119            config: the configuration of the experiment.
120            num_threads: the max number of threads the experiment can use.
121            verbose: whether the internal events should give verbose output.
122            validate_config: whether to validate the configuration beforehand.
123
124        Raises:
125            KeyError: when a computation with the same name is already active.
126            IOError: when the experiment result directory already exists.
127            TypeError: when the provided configuration is not a valid experiment configuration.
128
129        Returns:
130            whether the experiment successfully started.
131        """
132        if not isinstance(config, (PredictorExperimentConfig, RecommenderExperimentConfig)):
133            raise TypeError('Invalid experiment configuration type.')
134
135        result_dir = os.path.join(self.result_dir, config.name)
136        if os.path.isdir(result_dir):
137            raise IOError('Result already exists: ' + result_dir)
138
139        if validate_config:
140            parser = ExperimentConfigParser(verbose)
141            config = parser.parse_experiment_config(config.to_yml_format(),
142                                                    self.data_registry,
143                                                    self.experiment_factory)
144            if config is None:
145                return False
146
147        self.thread_processor.start(ThreadExperiment(
148            config.name,
149            events,
150            verbose,
151            pipeline_config=ExperimentPipelineConfig(
152                result_dir,
153                self.data_registry,
154                self.experiment_factory,
155                config,
156                0,
157                1,
158                num_threads
159            )
160        ))
161
162        return True

Run an experiment with the specified configuration.

It is advised to validate the configuration (default) before running the experiment, to make sure the configuration describes a valid experiment. The configuration is invalid when it contains no selected datasets or models, only the evaluation is optional. If the configuration is invalidated this function will return False.

Args: events: the external events to dispatch during the experiment. config: the configuration of the experiment. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output. validate_config: whether to validate the configuration beforehand.

Raises: KeyError: when a computation with the same name is already active. IOError: when the experiment result directory already exists. TypeError: when the provided configuration is not a valid experiment configuration.

Returns: whether the experiment successfully started.

def run_experiment_from_yml( self, file_path: str, *, events: Dict[str, Callable[[Any], NoneType]] = None, num_threads: int = 0, verbose: bool = True) -> bool: View Source

164    def run_experiment_from_yml(
165            self,
166            file_path: str,
167            *,
168            events: Dict[str, Callable[[Any], None]] = None,
169            num_threads: int = 0,
170            verbose: bool = True) -> bool:
171        """Run an experiment from a yml file.
172
173        The configuration in the file is validated before starting the experiment.
174        It is invalid when it contains no selected datasets or models,
175        only the evaluation is optional.  If the configuration is invalidated
176        this function will return False.
177
178        Args:
179            events: the external events to dispatch during the experiment.
180            file_path: path to the yml file without extension.
181            num_threads: the max number of threads the experiment can use.
182            verbose: whether the internal events should give verbose output.
183
184        Raises:
185            FileNotFoundError: when the specified yml file does not exist.
186            KeyError: when a computation with the same name is already active.
187            IOError: when the experiment result directory already exists.
188
189        Returns:
190            whether the experiment successfully started.
191        """
192        try:
193            parser = ExperimentConfigParser(verbose)
194            config = parser.parse_experiment_config_from_yml(file_path,
195                                                             self.data_registry,
196                                                             self.experiment_factory)
197            if config is None:
198                return False
199        except FileNotFoundError as err:
200            raise FileNotFoundError(errno.ENOENT, 'Config file not found', file_path) from err
201
202        return self.run_experiment(
203            config,
204            events=events,
205            num_threads=num_threads,
206            verbose=verbose,
207            validate_config=False
208        )

Run an experiment from a yml file.

The configuration in the file is validated before starting the experiment. It is invalid when it contains no selected datasets or models, only the evaluation is optional. If the configuration is invalidated this function will return False.

Args: events: the external events to dispatch during the experiment. file_path: path to the yml file without extension. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output.

Raises: FileNotFoundError: when the specified yml file does not exist. KeyError: when a computation with the same name is already active. IOError: when the experiment result directory already exists.

Returns: whether the experiment successfully started.

def validate_experiment( self, result_dir: str, num_runs: int, *, events: Dict[str, Callable[[Any], NoneType]] = None, num_threads: int = 0, verbose: bool = True) -> bool: View Source

210    def validate_experiment(
211            self,
212            result_dir: str,
213            num_runs: int,
214            *,
215            events: Dict[str, Callable[[Any], None]] = None,
216            num_threads: int = 0,
217            verbose: bool = True) -> bool:
218        """Validate an experiment for an additional number of runs.
219
220        It is not possible to validate an active experiment computation until it is done.
221        The configuration file is expected to be stored in the specified result directory.
222        Moreover, the configuration is validated before starting the experiment validation.
223        If the configuration is invalidated this function will return False.
224
225        Args:
226            events: the external events to dispatch during the experiment.
227            result_dir: path to an existing experiment result directory.
228            num_runs: the number of runs to validate the experiment.
229            num_threads: the max number of threads the experiment can use.
230            verbose: whether the internal events should give verbose output.
231
232        Raises:
233            FileNotFoundError: when the configuration file does not exist in the result directory.
234            KeyError: when a computation with the same name is already active.
235            IOError: when the experiment result directory does not exist.
236
237        Returns:
238            whether the experiment successfully started.
239        """
240        result_dir = os.path.join(self.result_dir, result_dir)
241        if not os.path.isdir(result_dir):
242            raise IOError('Result does not exist: ' + result_dir)
243
244        config_path = os.path.join(result_dir, 'config')
245        try:
246            parser = ExperimentConfigParser(verbose)
247            config = parser.parse_experiment_config_from_yml(config_path,
248                                                             self.data_registry,
249                                                             self.experiment_factory)
250            if config is None:
251                return False
252        except FileNotFoundError as err:
253            raise FileNotFoundError(errno.ENOENT, 'Config file not found', config_path) from err
254
255        self.thread_processor.start(ThreadExperiment(
256            config.name,
257            events,
258            verbose,
259            pipeline_config=ExperimentPipelineConfig(
260                result_dir,
261                self.data_registry,
262                self.experiment_factory,
263                config,
264                resolve_experiment_start_run(result_dir),
265                num_runs,
266                num_threads
267            )
268        ))
269
270        return True

Validate an experiment for an additional number of runs.

It is not possible to validate an active experiment computation until it is done. The configuration file is expected to be stored in the specified result directory. Moreover, the configuration is validated before starting the experiment validation. If the configuration is invalidated this function will return False.

Args: events: the external events to dispatch during the experiment. result_dir: path to an existing experiment result directory. num_runs: the number of runs to validate the experiment. num_threads: the max number of threads the experiment can use. verbose: whether the internal events should give verbose output.

Raises: FileNotFoundError: when the configuration file does not exist in the result directory. KeyError: when a computation with the same name is already active. IOError: when the experiment result directory does not exist.

Returns: whether the experiment successfully started.

def get_active_computations(self) -> List[str]: View Source

272    def get_active_computations(self) -> List[str]:
273        """Get the names of any active computations.
274
275        Returns:
276            a list of computations names that are currently running.
277        """
278        return self.thread_processor.get_active_threads()

Get the names of any active computations.

Returns: a list of computations names that are currently running.

def get_available_algorithms(self, model_type: str = None) View Source

280    def get_available_algorithms(self, model_type: str = None):
281        """Get the available algorithms of the recommender system.
282
283        Args:
284            model_type: type of model to query for availability, accepted values are
285                TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
286
287        Returns:
288            a dictionary with the availability of algorithms categorized by API.
289        """
290        return self.experiment_factory.get_sub_availability(
291            KEY_MODELS,
292            sub_type=model_type
293        )

Get the available algorithms of the recommender system.

Args: model_type: type of model to query for availability, accepted values are TYPE_PREDICTION, TYPE_RECOMMENDATION or None.

Returns: a dictionary with the availability of algorithms categorized by API.

def get_available_datasets(self) -> Dict[str, Any]: View Source

295    def get_available_datasets(self) -> Dict[str, Any]:
296        """Get the available datasets of the recommender system.
297
298        Returns:
299            a dictionary where the key corresponds to the dataset name and
300                the value corresponds to the matrix information dictionary.
301        """
302        return self.data_registry.get_info()

Get the available datasets of the recommender system.

Returns: a dictionary where the key corresponds to the dataset name and the value corresponds to the matrix information dictionary.

def get_available_data_filters(self) -> Dict[str, Any]: View Source

304    def get_available_data_filters(self) -> Dict[str, Any]:
305        """Get the available data filters of the recommender system.
306
307        Returns:
308            a dictionary with the availability of data filters.
309        """
310        return self.experiment_factory.get_sub_availability(
311            KEY_DATA,
312            sub_type=KEY_DATA_SUBSET
313        )

Get the available data filters of the recommender system.

Returns: a dictionary with the availability of data filters.

def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]: View Source

315    def get_available_metrics(self, eval_type: str = None) -> Dict[str, Any]:
316        """Get the available metrics of the recommender system.
317
318        Args:
319            eval_type(str): type of evaluation to query for availability, accepted values are
320                TYPE_PREDICTION, TYPE_RECOMMENDATION or None.
321
322        Returns:
323            a dictionary with the availability of metrics categorized by evaluation type.
324        """
325        return self.experiment_factory.get_sub_availability(
326            KEY_EVALUATION,
327            sub_type=eval_type
328        )

Get the available metrics of the recommender system.

Args: eval_type(str): type of evaluation to query for availability, accepted values are TYPE_PREDICTION, TYPE_RECOMMENDATION or None.

Returns: a dictionary with the availability of metrics categorized by evaluation type.

def get_available_rating_converters(self) -> Dict[str, Any]: View Source

330    def get_available_rating_converters(self) -> Dict[str, Any]:
331        """Get the available data rating converters of the recommender system.
332
333        Returns:
334            a dictionary with the availability of rating converters.
335        """
336        return self.experiment_factory.get_sub_availability(
337            KEY_DATA,
338            sub_type=KEY_RATING_CONVERTER
339        )

Get the available data rating converters of the recommender system.

Returns: a dictionary with the availability of rating converters.

def get_available_splitters(self) -> Dict[str, Any]: View Source

341    def get_available_splitters(self) -> Dict[str, Any]:
342        """Get the available data splitters of the recommender system.
343
344        Returns:
345            a dictionary with the availability of data splitters.
346        """
347        return self.experiment_factory.get_sub_availability(
348            KEY_DATA,
349            sub_type=KEY_SPLITTING
350        )

Get the available data splitters of the recommender system.

Returns: a dictionary with the availability of data splitters.