ocl.scheduling

Scheduling of learning rate and hyperparameters.

`HPScheduler`

Bases: torch.nn.Module

Base class for scheduling of scalar hyperparameters based on the number of training steps.

A separate callback ocl.callbacks.UpdateHyperparameterScheduling calls update_global_step to update the state of the hyperparameter according to the scheduling.

This class can be used in computations similar to a regular float if operations are applied from the left otherwise it needs to be converted using float(instance) which will return the currently scheduled value of the hyperparameter.

Source code in ocl/scheduling.py

class HPScheduler(torch.nn.Module, metaclass=abc.ABCMeta):
    """Base class for scheduling of scalar hyperparameters based on the number of training steps.

    A separate callback [ocl.callbacks.UpdateHyperparameterScheduling][] calls
    `update_global_step` to update the state of the hyperparameter according
    to the scheduling.

    This class can be used in computations similar to a regular float if operations
    are applied from the left otherwise it needs to be converted using
    `float(instance)` which will return the currently scheduled value of the
    hyperparameter.


    """

    def __init__(self):
        super().__init__()
        self.last_global_step: Optional[int] = None

    def update_global_step(self, global_step: int):
        """Update global step used in `compute_scheduled_value`.

        This should be called by the
        [ocl.callbacks.UpdateHyperparameterScheduling][] callback.

        Args:
            global_step: The current global step.
        """
        self.last_global_step = global_step

    @abc.abstractmethod
    def compute_scheduled_value(self) -> float:
        """Return current value of hyperparameter based on global step.

        Returns:
            The scheduled hyperparameter value.
        """
        pass

    def __float__(self):
        if self.last_global_step is None:
            raise RuntimeError(
                "HPScheduler was not provided with last_global_step. "
                "Make sure UpdateHyperparameterScheduling callback is called."
            )
        return self.compute_scheduled_value()

    def __add__(self, other):
        return float(self) + other

    def __sub__(self, other):
        return float(self) - other

    def __mul__(self, other):
        return float(self) * other

    def __div__(self, other):
        return float(self) / other

`update_global_step`

Update global step used in compute_scheduled_value.

This should be called by the ocl.callbacks.UpdateHyperparameterScheduling callback.

Parameters:

Name	Type	Description	Default
`global_step`	`int`	The current global step.	required

Source code in ocl/scheduling.py

def update_global_step(self, global_step: int):
    """Update global step used in `compute_scheduled_value`.

    This should be called by the
    [ocl.callbacks.UpdateHyperparameterScheduling][] callback.

    Args:
        global_step: The current global step.
    """
    self.last_global_step = global_step

`compute_scheduled_value` `abstractmethod`

Return current value of hyperparameter based on global step.

Returns:

Type	Description
`float`	The scheduled hyperparameter value.

Source code in ocl/scheduling.py

@abc.abstractmethod
def compute_scheduled_value(self) -> float:
    """Return current value of hyperparameter based on global step.

    Returns:
        The scheduled hyperparameter value.
    """
    pass

`LinearHPScheduler`

Bases: HPScheduler

Linearly increase value of a hyperparameter.

Source code in ocl/scheduling.py

class LinearHPScheduler(HPScheduler):
    """Linearly increase value of a hyperparameter."""

    def __init__(
        self, end_value: float, end_step: int, start_value: float = 0.0, start_step: int = 0
    ):
        """Initialize LinearHPScheduler.

        Args:
            end_value: Value after scheduling.
            end_step: `global_step` at which `end_value` should be reached.
            start_value: Value to be used prior to `start_step`
            start_step: Value at which linear scheduling schould start.
        """
        super().__init__()
        if start_step > end_step:
            raise ValueError("`start_step` needs to be smaller equal to `end_step`.")

        self.start_value = start_value
        self.end_value = end_value
        self.start_step = start_step
        self.end_step = end_step

    def compute_scheduled_value(self) -> float:
        step: int = self.last_global_step
        if step < self.start_step:
            return self.start_value
        elif step > self.end_step:
            return self.end_value
        else:
            t = step - self.start_step
            T = self.end_step - self.start_step
            return self.start_value + t * (self.end_value - self.start_value) / T

`init`

Initialize LinearHPScheduler.

Parameters:

Name	Type	Description	Default
`end_value`	`float`	Value after scheduling.	required
`end_step`	`int`	`global_step` at which `end_value` should be reached.	required
`start_value`	`float`	Value to be used prior to `start_step`	`0.0`
`start_step`	`int`	Value at which linear scheduling schould start.	`0`

Source code in ocl/scheduling.py

def __init__(
    self, end_value: float, end_step: int, start_value: float = 0.0, start_step: int = 0
):
    """Initialize LinearHPScheduler.

    Args:
        end_value: Value after scheduling.
        end_step: `global_step` at which `end_value` should be reached.
        start_value: Value to be used prior to `start_step`
        start_step: Value at which linear scheduling schould start.
    """
    super().__init__()
    if start_step > end_step:
        raise ValueError("`start_step` needs to be smaller equal to `end_step`.")

    self.start_value = start_value
    self.end_value = end_value
    self.start_step = start_step
    self.end_step = end_step

`StepHPScheduler`

Bases: HPScheduler

Schedule hyperparameter using discrete step.

Source code in ocl/scheduling.py

class StepHPScheduler(HPScheduler):
    """Schedule hyperparameter using discrete step."""

    def __init__(self, end_value: float, switch_step: int, start_value: float = 0.0):
        """Initialize StepHPScheduler.

        Args:
            end_value: Value after `switch_step`.
            switch_step: `global_step` at which to switch from `start_value` to `end_value`
            start_value: Value to be used prior to `switch_step`
        """
        super().__init__()
        self.start_value = start_value
        self.end_value = end_value
        self.switch_step = switch_step

    def compute_scheduled_value(self) -> float:
        if self.last_global_step < self.switch_step:
            return self.start_value
        else:
            return self.end_value

`init`

Initialize StepHPScheduler.

Parameters:

Name	Type	Description	Default
`end_value`	`float`	Value after `switch_step`.	required
`switch_step`	`int`	`global_step` at which to switch from `start_value` to `end_value`	required
`start_value`	`float`	Value to be used prior to `switch_step`	`0.0`

Source code in ocl/scheduling.py

def __init__(self, end_value: float, switch_step: int, start_value: float = 0.0):
    """Initialize StepHPScheduler.

    Args:
        end_value: Value after `switch_step`.
        switch_step: `global_step` at which to switch from `start_value` to `end_value`
        start_value: Value to be used prior to `switch_step`
    """
    super().__init__()
    self.start_value = start_value
    self.end_value = end_value
    self.switch_step = switch_step

`CosineAnnealingHPScheduler`

Bases: HPScheduler

Cosine annealing of hyperparameter.

Source code in ocl/scheduling.py

class CosineAnnealingHPScheduler(HPScheduler):
    """Cosine annealing of hyperparameter."""

    def __init__(self, start_value: float, end_value: float, start_step: int, end_step: int):
        """Initialize CosineAnnealingHPScheduler.

        Args:
            end_value: Value after scheduling.
            end_step: `global_step` at which `end_value` should be reached.
            start_value: Value to be used prior to `start_step`
            start_step: Value at which cosine scheduling schould start.
        """
        super().__init__()
        assert start_value >= end_value
        assert start_step <= end_step
        self.start_value = start_value
        self.end_value = end_value
        self.start_step = start_step
        self.end_step = end_step

    def compute_scheduled_value(self) -> float:
        step: int = self.last_global_step

        if step < self.start_step:
            value = self.start_value
        elif step >= self.end_step:
            value = self.end_value
        else:
            a = 0.5 * (self.start_value - self.end_value)
            b = 0.5 * (self.start_value + self.end_value)
            progress = (step - self.start_step) / (self.end_step - self.start_step)
            value = a * math.cos(math.pi * progress) + b

        return value

`init`

Initialize CosineAnnealingHPScheduler.

Parameters:

Name	Type	Description	Default
`end_value`	`float`	Value after scheduling.	required
`end_step`	`int`	`global_step` at which `end_value` should be reached.	required
`start_value`	`float`	Value to be used prior to `start_step`	required
`start_step`	`int`	Value at which cosine scheduling schould start.	required

Source code in ocl/scheduling.py

def __init__(self, start_value: float, end_value: float, start_step: int, end_step: int):
    """Initialize CosineAnnealingHPScheduler.

    Args:
        end_value: Value after scheduling.
        end_step: `global_step` at which `end_value` should be reached.
        start_value: Value to be used prior to `start_step`
        start_step: Value at which cosine scheduling schould start.
    """
    super().__init__()
    assert start_value >= end_value
    assert start_step <= end_step
    self.start_value = start_value
    self.end_value = end_value
    self.start_step = start_step
    self.end_step = end_step

`exponential_decay_with_optional_warmup`

Return pytorch lighting optimizer configuration for exponential decay with optional warmup.

Exponential decay is applied at each optimization step. Exponential decay starts while warmup is still taking place. This is in line with the typical scheduling used to train Transformer models.

Parameters:

Name	Type	Description	Default
`optimizer`	`Optimizer`	Pytorch lighting optimizer of which the learning rate should be scheduled.	required
`decay_rate`	`float`	Decay rate of exponential decay.	`1.0`
`decay_steps`	`int`	Number of optimization steps after which learning rate should be decayed by decay factor.	`10000`
`warmup_steps`	`int`	Number of warmup steps.	`0`

Returns:

Type	Description
`Dict[str, Any]`	Dict with structure compatible with ptl. See pytorch lightning documentation

Source code in ocl/scheduling.py

def exponential_decay_with_optional_warmup(
    optimizer: Optimizer, decay_rate: float = 1.0, decay_steps: int = 10000, warmup_steps: int = 0
) -> Dict[str, Any]:
    """Return pytorch lighting optimizer configuration for exponential decay with optional warmup.

    Exponential decay is applied at each optimization step.  Exponential decay starts
    **while** warmup is still taking place.  This is in line with the typical scheduling
    used to train Transformer models.

    Args:
        optimizer: Pytorch lighting optimizer of which the learning rate should be scheduled.
        decay_rate: Decay rate of exponential decay.
        decay_steps: Number of optimization steps after which learning rate should be decayed
            by decay factor.
        warmup_steps: Number of warmup steps.

    Returns:
        Dict with structure compatible with ptl.  See
            [pytorch lightning documentation](
                https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers)

    """
    decay_fn = functools.partial(
        _exp_decay_with_warmup_fn,
        decay_rate=decay_rate,
        decay_steps=decay_steps,
        warmup_steps=warmup_steps,
    )

    return {"lr_scheduler": {"scheduler": LambdaLR(optimizer, decay_fn), "interval": "step"}}

`exponential_decay_after_optional_warmup`

Return pytorch lighting optimizer configuration for exponential decay with optional warmup.

Exponential decay is applied at each optimization step. Exponential decay starts after warmup is took place.

Parameters:

Name	Type	Description	Default
`optimizer`	`Optimizer`	Pytorch lighting optimizer of which the learning rate should be scheduled.	required
`decay_rate`	`float`	Decay rate of exponential decay.	`1.0`
`decay_steps`	`int`	Number of optimization steps after which learning rate should be decayed by decay factor.	`10000`
`warmup_steps`	`int`	Number of warmup steps.	`0`

Returns:

Type	Description
`Dict[str, Any]`	Dict with structure compatible with ptl. See pytorch lightning documentation

Source code in ocl/scheduling.py

def exponential_decay_after_optional_warmup(
    optimizer: Optimizer, decay_rate: float = 1.0, decay_steps: int = 10000, warmup_steps: int = 0
) -> Dict[str, Any]:
    """Return pytorch lighting optimizer configuration for exponential decay with optional warmup.

    Exponential decay is applied at each optimization step.  Exponential decay starts
    **after** warmup is took place.

    Args:
        optimizer: Pytorch lighting optimizer of which the learning rate should be scheduled.
        decay_rate: Decay rate of exponential decay.
        decay_steps: Number of optimization steps after which learning rate should be decayed
            by decay factor.
        warmup_steps: Number of warmup steps.

    Returns:
        Dict with structure compatible with ptl.  See
            [pytorch lightning documentation](
                https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers)
    """
    decay_fn = functools.partial(
        _exp_decay_after_warmup_fn,
        decay_rate=decay_rate,
        decay_steps=decay_steps,
        warmup_steps=warmup_steps,
    )

    return {"lr_scheduler": {"scheduler": LambdaLR(optimizer, decay_fn), "interval": "step"}}

`plateau_decay`

Return pytorch lighting optimizer configuration for plato decay.

Parameters:

Name	Type	Description	Default
`optimizer`	`Optimizer`	Pytorch lighting optimizer of which the learning rate should be scheduled.	required
`decay_rate`	`float`	Factor by which learning rate should be decayed when plateau is reached.	`1.0`
`patience`	`int`	Number of epochs to wait for improvement.	`10`
`mode`	`str`	`min` or `max`.	`'min'`

Returns:

Type	Description
`Dict[str, Any]`	Dict with structure compatible with ptl. See pytorch lightning documentation

Source code in ocl/scheduling.py

def plateau_decay(
    optimizer: Optimizer,
    decay_rate: float = 1.0,
    patience: int = 10,
    monitor_metric: str = "val/lotal_loss",
    mode: str = "min",
) -> Dict[str, Any]:
    """Return pytorch lighting optimizer configuration for plato decay.

    Args:
        optimizer: Pytorch lighting optimizer of which the learning rate should be scheduled.
        decay_rate: Factor by which learning rate should be decayed when plateau is reached.
        patience: Number of epochs to wait for improvement.
        mode: `min` or `max`.

    Returns:
        Dict with structure compatible with ptl.  See
            [pytorch lightning documentation](
                https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers)
    """
    plateau_scheduler = ReduceLROnPlateau(
        optimizer=optimizer, mode=mode, factor=decay_rate, patience=patience
    )
    return {
        "lr_scheduler": {
            "scheduler": plateau_scheduler,
            "interval": "epoch",
            "monitor": monitor_metric,
        }
    }

`cosine_annealing_with_optional_warmup`

Return pytorch lighting optimizer configuration for cosine annealing with warmup.

Parameters:

Name	Type	Description	Default
`optimizer`	`Optimizer`	Pytorch lighting optimizer of which the learning rate should be scheduled.	required
`T_max`	`int`	The length of the scheduling in steps.	`100000`
`eta_min`	`float`	Minimal fraction of initial learning rate that should be reached when scheduling cycle is complete.	`0.0`
`warmup_steps`	`int`	Number of warmup steps.	`0`
`error_on_exceeding_steps`	`bool`	Raise error if more than `T_max` steps are trained.	`False`

Returns:

Type	Description
`Dict[str, Any]`	Dict with structure compatible with ptl. See pytorch lightning documentation

Source code in ocl/scheduling.py

def cosine_annealing_with_optional_warmup(
    optimizer: Optimizer,
    T_max: int = 100000,
    eta_min: float = 0.0,
    warmup_steps: int = 0,
    error_on_exceeding_steps: bool = False,
) -> Dict[str, Any]:
    """Return pytorch lighting optimizer configuration for cosine annealing with warmup.

    Args:
        optimizer: Pytorch lighting optimizer of which the learning rate should be scheduled.
        T_max: The length of the scheduling in steps.
        eta_min: Minimal fraction of initial learning rate that should be reached when
            scheduling cycle is complete.
        warmup_steps: Number of warmup steps.
        error_on_exceeding_steps: Raise error if more than `T_max` steps are trained.

    Returns:
        Dict with structure compatible with ptl.  See
            [pytorch lightning documentation](
                https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers)
    """
    return {
        "lr_scheduler": {
            "scheduler": _CosineAnnealingWithWarmup(
                optimizer,
                T_max,
                eta_min=eta_min,
                warmup_steps=warmup_steps,
                error_on_exceeding_steps=error_on_exceeding_steps,
            ),
            "interval": "step",
        }
    }

ocl.scheduling

HPScheduler

update_global_step

compute_scheduled_value abstractmethod

LinearHPScheduler

__init__

StepHPScheduler

__init__

CosineAnnealingHPScheduler

__init__

exponential_decay_with_optional_warmup

exponential_decay_after_optional_warmup

plateau_decay

cosine_annealing_with_optional_warmup

`HPScheduler`

`update_global_step`

`compute_scheduled_value` `abstractmethod`

`LinearHPScheduler`

`init`

`StepHPScheduler`

`init`

`CosineAnnealingHPScheduler`

`init`

`exponential_decay_with_optional_warmup`

`exponential_decay_after_optional_warmup`

`plateau_decay`

`cosine_annealing_with_optional_warmup`