''''''
"""
POLIXIR REVIVE, copyright (C) 2021-2023 Polixir Technologies Co., Ltd., is
distributed under the GNU Lesser General Public License (GNU LGPL).
POLIXIR REVIVE is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
"""
import os
import sys
import json
import torch
import numpy as np
import logging, copy
from ray.tune.experiment.trial import Trial
from ray.tune.utils import merge_dicts, flatten_dict
logger = logging.getLogger(__name__)
from typing import Dict, List
from collections import defaultdict
from torch.utils.tensorboard import SummaryWriter
from zoopt.parameter import ToolFunction
from ray.tune.logger import LoggerCallback, CSVLoggerCallback, JsonLoggerCallback
from ray.tune.utils import flatten_dict
from ray.tune.error import TuneError
from ray.tune import Stopper
from ray.tune import CLIReporter as _CLIReporter
from ray.tune.search.basic_variant import _flatten_resolved_vars, _count_spec_samples, _count_variants, _TrialIterator
from ray.tune.experiment import _convert_to_experiment_list
from ray.tune.search.basic_variant import warnings, Union, List, itertools, SERIALIZATION_THRESHOLD
from ray.tune.search.zoopt.zoopt_search import DEFAULT_METRIC, Solution, zoopt
from ray.tune.search.zoopt import ZOOptSearch as _ZOOptSearch
from ray.tune.search import BasicVariantGenerator, SearchGenerator, Searcher
from ray.tune.search.variant_generator import format_vars, _resolve_nested_dict, _flatten_resolved_vars
from ray.tune.experiment.config_parser import _create_trial_from_spec
VALID_SUMMARY_TYPES = [int, float, np.float32, np.float64, np.int32, np.int64]
[docs]class SysStopper(Stopper):
"""Customizing the training mechanism of ray
Reference : https://docs.ray.io/en/latest/tune/api/stoppers.html
"""
def __init__(self, workspace, max_iter: int = 0, stop_callback = None):
self._workspace = workspace
self._max_iter = max_iter
self._iter = defaultdict(lambda: 0)
self.stop_callback = stop_callback
# Customizing the stopping mechanism for a single trail
[docs] def __call__(self, trial_id, result):
if self._max_iter > 0:
self._iter[trial_id] += 1
if self._iter[trial_id] >= self._max_iter:
return True
if result["stop_flag"]:
if self.stop_callback:
self.stop_callback()
return True
return False
# Customize the stopping mechanism for the entire training process
[docs] def stop_all(self):
if os.path.exists(os.path.join(self._workspace,'.env.json')):
with open(os.path.join(self._workspace,'.env.json'), 'r') as f:
_data = json.load(f)
if _data["REVIVE_STOP"]:
if self.stop_callback:
self.stop_callback()
return _data["REVIVE_STOP"]
else:
return False
[docs]class TuneTBLoggerCallback(LoggerCallback):
r"""
custom tensorboard logger for ray tune
modified from ray.tune.logger.TBXLogger
Reference: https://docs.ray.io/en/latest/tune/api/doc/ray.tune.logger.LoggerCallback.html
"""
def _init(self):
self._file_writer = SummaryWriter(self.logdir)
self.last_result = None
self.step = 0
[docs] def on_result(self, result):
self.step += 1
tmp = result.copy()
flat_result = flatten_dict(tmp, delimiter="/")
for k, v in flat_result.items():
if type(v) in VALID_SUMMARY_TYPES:
self._file_writer.add_scalar(k, float(v), global_step=self.step)
elif isinstance(v, torch.Tensor):
v = v.view(-1)
self._file_writer.add_histogram(k, v, global_step=self.step)
self.last_result = flat_result
self.flush()
[docs] def flush(self):
if self._file_writer is not None:
self._file_writer.flush()
[docs]def get_tune_callbacks():
TUNELOGGERCallbacks = [CSVLoggerCallback, JsonLoggerCallback, TuneTBLoggerCallback]
TUNELOGGERCallbacks = [callback() for callback in TUNELOGGERCallbacks]
return TUNELOGGERCallbacks
[docs]class CLIReporter(_CLIReporter):
"""Modifying the Command line reporter to support logging to loguru
Reference : https://docs.ray.io/en/latest/tune/api/doc/ray.tune.CLIReporter.html
"""
[docs] def report(self, trials: List, done: bool, *sys_info: Dict):
message = self._progress_str(trials, done, *sys_info)
from loguru import logger
logger.info(f"{message}")
[docs]class CustomSearchGenerator(SearchGenerator):
"""
Customize the SearchGenerator by placing tags in the spec's config
Reference : https://github.com/ray-project/ray/blob/master/python/ray/tune/search/search_generator.py
"""
[docs] def create_trial_if_possible(self, experiment_spec, output_path):
logger.debug("creating trial")
trial_id = Trial.generate_id()
suggested_config = self.searcher.suggest(trial_id)
if suggested_config == Searcher.FINISHED:
self._finished = True
logger.debug("Searcher has finished.")
return
if suggested_config is None:
return
spec = copy.deepcopy(experiment_spec)
spec["config"] = merge_dicts(spec["config"],copy.deepcopy(suggested_config))
# Create a new trial_id if duplicate trial is created
flattened_config = _resolve_nested_dict(spec["config"])
self._counter += 1
tag = "{0}_{1}".format(str(self._counter), format_vars(flattened_config))
spec['config']['tag'] = tag # pass down the tag
trial = _create_trial_from_spec(
spec,
output_path,
self._parser,
evaluated_params=flatten_dict(suggested_config),
experiment_tag=tag,
trial_id=trial_id)
return trial
[docs]class TrialIterator(_TrialIterator):
"""
Customize the _TrialIterator by placing tags in the spec's config
Reference : https://github.com/ray-project/ray/blob/master/python/ray/tune/search/basic_variant.py
"""
[docs] def create_trial(self, resolved_vars, spec):
trial_id = self.uuid_prefix + ("%05d" % self.counter)
experiment_tag = str(self.counter)
# Always append resolved vars to experiment tag?
if resolved_vars:
experiment_tag += "_{}".format(format_vars(resolved_vars))
spec['config']['tag'] = experiment_tag
self.counter += 1
return _create_trial_from_spec(
spec,
self.output_path,
self.parser,
evaluated_params=_flatten_resolved_vars(resolved_vars),
trial_id=trial_id,
experiment_tag=experiment_tag)
[docs]class CustomBasicVariantGenerator(BasicVariantGenerator):
"""
Using custom TrialIterator instead _TrialIterator
Reference : https://github.com/ray-project/ray/blob/master/python/ray/tune/search/basic_variant.py
"""
[docs] def add_configurations(
self, experiments: Union["Experiment", List["Experiment"], Dict[str, Dict]]
):
"""Chains generator given experiment specifications.
Arguments:
experiments (Experiment | list | dict): Experiments to run.
"""
experiment_list = _convert_to_experiment_list(experiments)
for experiment in experiment_list:
grid_vals = _count_spec_samples(experiment.spec, num_samples=1)
lazy_eval = grid_vals > SERIALIZATION_THRESHOLD
if lazy_eval:
warnings.warn(
f"The number of pre-generated samples ({grid_vals}) "
"exceeds the serialization threshold "
f"({int(SERIALIZATION_THRESHOLD)}). Resume ability is "
"disabled. To fix this, reduce the number of "
"dimensions/size of the provided grid search.")
previous_samples = self._total_samples
points_to_evaluate = copy.deepcopy(self._points_to_evaluate)
self._total_samples += _count_variants(experiment.spec,
points_to_evaluate)
iterator = TrialIterator(
uuid_prefix=self._uuid_prefix,
num_samples=experiment.spec.get("num_samples", 1),
unresolved_spec=experiment.spec,
constant_grid_search=self._constant_grid_search,
output_path=experiment.dir_name,
points_to_evaluate=points_to_evaluate,
lazy_eval=lazy_eval,
start=previous_samples)
self._iterators.append(iterator)
self._trial_generator = itertools.chain(self._trial_generator,
iterator)
[docs]class Parameter(zoopt.Parameter):
"""
Customize Zoom resource allocation method to fully utilize resources
"""
def __init__(self, *args, **kwargs):
self.parallel_num = kwargs.pop('parallel_num')
super(Parameter, self).__init__(*args, **kwargs)
[docs] def auto_set(self, budget):
"""
Set train_size, positive_size, negative_size by following rules:
budget < 3 --> error;
budget < 3 --> train_size = p, positive_size = (0.2*self.parallel_num);
:param budget: number of calls to the objective function
:return: no return value
"""
if budget < 3:
ToolFunction.log('parameter.py: budget too small')
sys.exit(1)
else:
if self.parallel_num < 4:
super(Parameter, self).auto_set(budget)
return
else:
self.__train_size = self.parallel_num
self.__positive_size = max(int(0.2 * self.parallel_num),1)
self.__negative_size = self.__train_size - self.__positive_size
[docs]class ZOOptSearch(_ZOOptSearch):
"""
Customize Zoom resource allocation method to fully utilize resources
"""
def _setup_zoopt(self):
if self._metric is None and self._mode:
# If only a mode was passed, use anonymous metric
self._metric = DEFAULT_METRIC
_dim_list = []
for k in self._dim_dict:
self._dim_keys.append(k)
_dim_list.append(self._dim_dict[k])
init_samples = None
if self._points_to_evaluate:
logger.warning(
"`points_to_evaluate` is ignored by ZOOpt in versions <= 0.4.1."
)
init_samples = [
Solution(x=tuple(point[dim] for dim in self._dim_keys))
for point in self._points_to_evaluate
]
dim = zoopt.Dimension2(_dim_list)
par = Parameter(budget=self._budget, init_samples=init_samples,parallel_num=self.parallel_num)
if self._algo == "sracos" or self._algo == "asracos":
from zoopt.algos.opt_algorithms.racos.sracos import SRacosTune
self.optimizer = SRacosTune(
dimension=dim,
parameter=par,
parallel_num=self.parallel_num,
**self.kwargs
)