Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
"""
The class MetaLayer is an extension of the backend Layer class
with a number of methods and helpers to facilitate writing new custom layers
in such a way that the new custom layer don't need to rely in anything backend-dependent
The class MetaLayer is an extension of the backend Layer class
with a number of methods and helpers to facilitate writing new custom layers
in such a way that the new custom layer don't need to rely in anything backend-dependent

In other words, if you want to implement a new layer and need functions not included here
it is better to add a new method which is just a call to the relevant backend-dependent function
For instance: np_to_tensor is just a call to K.constant
In other words, if you want to implement a new layer and need functions not included here
it is better to add a new method which is just a call to the relevant backend-dependent function
For instance: np_to_tensor is just a call to K.constant
"""

from keras.initializers import Constant, RandomUniform, glorot_normal, glorot_uniform
from keras.initializers import Constant, RandomUniform, VarianceScaling, glorot_uniform
from keras.layers import Layer

# Define in this dictionary new initializers as well as the arguments they accept (with default values if needed be)
initializers = {
"random_uniform": (RandomUniform, {"minval": -0.5, "maxval": 0.5}),
"glorot_uniform": (glorot_uniform, {}),
"glorot_normal": (glorot_normal, {}),
# glorot_normal expressed via VarianceScaling so its width is tunable through `scale`:
# scale=1.0 reproduces keras' glorot_normal exactly; weight std scales as sqrt(scale).
"glorot_normal": (
VarianceScaling,
{"scale": 1.0, "mode": "fan_avg", "distribution": "truncated_normal"},
),
}


Expand Down Expand Up @@ -91,10 +96,11 @@ def select_initializer(ini_name, seed=None, **kwargs):
) from e

ini_class = ini_tuple[0]
ini_args = ini_tuple[1]
# Copy so per-call overrides (seed, scale, ...) don't leak into the shared defaults
ini_args = dict(ini_tuple[1])
ini_args["seed"] = seed

for key, value in kwargs.items():
if key in ini_args.keys():
if key in ini_args:
ini_args[key] = value
return ini_class(**ini_args)
59 changes: 59 additions & 0 deletions n3fit/src/n3fit/backends/keras_backend/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"""

import logging
from pathlib import Path
from time import time

from keras import backend as K
Expand Down Expand Up @@ -196,6 +197,64 @@ def on_step_end(self, epoch, logs=None):
self._update_weights()


class StoreCallback(CallbackStep):
"""
Given a ``savedir``, the callback will store the model parameters in
that directory every ``check_freq`` epochs.

Parameters
----------
pdf_model: MetaModel
The multi-replica PDF model
replica_paths: list[Path]
One path for replica. Weights are saved under <path>/weights/.
check_freq: int
Save every this many epochs (default: 100)
"""

def __init__(self, pdf_model, replica_paths, stopping_object, check_freq=100):
super().__init__()
self.check_freq = check_freq
self.pdf_model = pdf_model
self.weight_dirs = []
self.stopping_object = stopping_object
for path in replica_paths:
weight_dir = path / "parameters"
weight_dir.mkdir(parents=True, exist_ok=True)
self.weight_dirs.append(weight_dir)

def _save_weights(self, epoch, tr_weights, weight_dir):
filepath = weight_dir / f"params_{epoch}.npz"
# save parameters as expected by colibri
trainable_weights_flat = np.concatenate([np.asarray(w).flatten() for w in tr_weights])
np.savez(filepath, params=trainable_weights_flat)
log.info(f"Saved parameters at epoch {epoch} in {filepath}")

def on_step_end(self, epoch, logs=None):
"""Function to be called at the end of every epoch
Every ``check_freq`` number of epochs, the parameters of the model will
be stored in the indicated directory.
"""
if ((epoch + 1) % self.check_freq) == 0:
pdf_replicas = self.pdf_model.split_replicas()
for replica_model, weight_dir in zip(pdf_replicas, self.weight_dirs):
weights = replica_model.trainable_weights
self._save_weights(epoch + 1, weights, weight_dir)

def on_train_end(self, logs=None):
"""Store the best parameters"""
for idx, weight_dir in enumerate(self.weight_dirs):
weights = self.stopping_object._best_weights[idx]
if weights is not None:
best_weights = weights['all_NNs']
best_epoch = self.stopping_object._best_epochs[idx]
self._save_weights(best_epoch, best_weights, weight_dir)
else:
log.warning(
f"No best weights found for replica {idx+1}, skipping saving best parameters."
)


def gen_tensorboard_callback(log_dir, profiling=False, histogram_freq=0):
"""
Generate tensorboard logging details at ``log_dir``.
Expand Down
39 changes: 25 additions & 14 deletions n3fit/src/n3fit/io/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,9 @@


class WriterWrapper:
def __init__(self, replica_numbers, pdf_objects, stopping_object, all_chi2s, theory, timings, trials):
def __init__(
self, replica_numbers, pdf_objects, stopping_object, all_chi2s, theory, timings, trials
):
"""
Initializes the writer for all replicas.

Expand Down Expand Up @@ -298,18 +300,18 @@ def _hyperparam_settings(self, replica_number):
trials_number = self.trials["number_of_trials"]
idx_trial = replica_number % trials_number
hyperparam_info = {}
hyperparam_info["optimizer"]=self.trials["optimizer"][idx_trial]
hyperparam_info["learning_rate"]=self.trials["learning_rate"][idx_trial]
hyperparam_info["clipnorm"]=self.trials["clipnorm"][idx_trial]
hyperparam_info["epochs"]=self.trials["epochs"][idx_trial]
hyperparam_info["stopping_patience"]=self.trials["stopping_patience"][idx_trial]
hyperparam_info["initial"]=self.trials["initial"][idx_trial]
hyperparam_info["nodes_per_layer"]=self.trials["nodes_per_layer"][idx_trial]
hyperparam_info["number_of_layers"]=self.trials["number_of_layers"][idx_trial]
hyperparam_info["activation"]=self.trials["activation_per_layer"][idx_trial]
hyperparam_info["layer_type"]=self.trials["layer_type"][idx_trial]
hyperparam_info["initializer"]=self.trials["initializer"][idx_trial]
hyperparam_info["dropout"]=self.trials["dropout"][idx_trial]
hyperparam_info["optimizer"] = self.trials["optimizer"][idx_trial]
hyperparam_info["learning_rate"] = self.trials["learning_rate"][idx_trial]
hyperparam_info["clipnorm"] = self.trials["clipnorm"][idx_trial]
hyperparam_info["epochs"] = self.trials["epochs"][idx_trial]
hyperparam_info["stopping_patience"] = self.trials["stopping_patience"][idx_trial]
hyperparam_info["initial"] = self.trials["initial"][idx_trial]
hyperparam_info["nodes_per_layer"] = self.trials["nodes_per_layer"][idx_trial]
hyperparam_info["number_of_layers"] = self.trials["number_of_layers"][idx_trial]
hyperparam_info["activation"] = self.trials["activation_per_layer"][idx_trial]
hyperparam_info["layer_type"] = self.trials["layer_type"][idx_trial]
hyperparam_info["initializer"] = self.trials["initializer"][idx_trial]
hyperparam_info["dropout"] = self.trials["dropout"][idx_trial]
return hyperparam_info
else:
hyperparam_info = "from runcard"
Expand All @@ -329,6 +331,11 @@ def _write_metadata_json(self, i, replica_number, out_path):
# Note: the 2 arguments below are the same for all replicas, unless run separately
timing=self.timings,
stop_epoch=self.stopping_object.stop_epoch,
would_stop_epoch=(
self.stopping_object.would_stop_epoch
if self.stopping_object._dont_stop
else self.stopping_object.stop_epoch
),
)

with open(out_path, "w", encoding="utf-8") as fs:
Expand Down Expand Up @@ -373,6 +380,7 @@ def jsonfit(
true_chi2,
stop_epoch,
timing,
would_stop_epoch,
hyperparam_info,
):
"""Generates a dictionary containing all relevant metadata for the fit
Expand All @@ -399,7 +407,9 @@ def jsonfit(
epoch at which the stopping stopped (not the one for the best fit!)
timing: dict
dictionary of the timing of the different events that happened
hyperparam_info: dict
would_stop_epoch: int
epoch at which the stopping would have stopped if it were not set to "dont_stop"
hyperparam_info: dict
dictionary of hyperparameter settings
"""
all_info = {}
Expand All @@ -415,6 +425,7 @@ def jsonfit(
all_info["arc_lengths"] = arc_lengths
all_info["integrability"] = integrability_numbers
all_info["timing"] = timing
all_info["would_stop_epoch"] = would_stop_epoch
all_info["hyperparameters"] = hyperparam_info
# Versioning info
all_info["version"] = version()
Expand Down
11 changes: 9 additions & 2 deletions n3fit/src/n3fit/model_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,9 @@ class ReplicaSettings:
e.g. ``dense`` or ``dense_per_flavour``
initializer: str
initializer to be used for this replica
initializer_scale: float
width multiplier for the initializer distribution. Only affects ``glorot_normal``
(weight std scales as sqrt(scale)); 1.0 reproduces standard glorot_normal
dropout: float
rate of dropout for each layer
regularizer: str
Expand All @@ -360,6 +363,7 @@ class ReplicaSettings:
activations: list[str]
architecture: str = "dense"
initializer: str = "glorot_normal"
initializer_scale: float = 1.0
dropout_rate: float = 0.0
regularizer: str = None
regularizer_args: dict = field(default_factory=dict)
Expand Down Expand Up @@ -806,6 +810,7 @@ def _generate_nn(
activations: list[str] = None,
architecture: str = "dense",
initializer: str = None,
initializer_scale: float = 1.0,
dropout_rate: float = 0.0,
regularizer: str = None,
regularizer_args: dict = field(default_factory=dict),
Expand Down Expand Up @@ -848,7 +853,7 @@ def layer_generator(i_layer, nodes_out, activation):
"""Generate the ``i_layer``-th dense_per_flavour layer for all replicas."""
l_seed = int(seed + i_layer * n_flavours)
initializers = [
MetaLayer.select_initializer(initializer, seed=l_seed + b)
MetaLayer.select_initializer(initializer, seed=l_seed + b, scale=initializer_scale)
for b in range(n_flavours)
]
layer = base_layer_selector(
Expand All @@ -863,7 +868,9 @@ def layer_generator(i_layer, nodes_out, activation):
elif architecture == "dense":

def layer_generator(i_layer, nodes_out, activation):
kini = MetaLayer.select_initializer(initializer, seed=int(seed + i_layer))
kini = MetaLayer.select_initializer(
initializer, seed=int(seed + i_layer), scale=initializer_scale
)
return base_layer_selector(
architecture,
kernel_initializer=kini,
Expand Down
64 changes: 61 additions & 3 deletions n3fit/src/n3fit/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from itertools import zip_longest
import json
import logging
import pickle

import numpy as np

Expand Down Expand Up @@ -113,6 +114,10 @@ def __init__(
theoryid=None,
lux_params=None,
replicas=None,
save_checkpoints=False,
replica_path=None,
checkpoint_freq=100,
dont_stop=False,
trials=None,
load_weights_dict=None,
):
Expand Down Expand Up @@ -155,6 +160,15 @@ def __init__(
if not give, the photon is not generated
replicas: list
list with the replicas ids to be fitted
save_checkpoints: bool
whether to save checkpoints (i.e. model parameters) during the fit. This requires
`replica_path` to be set as well. Not doing this will raise an error.
replica_path: Path
root path for all replicas.
checkpoint_freq: int
frequency (in epochs) at which to save checkpoints. Only relevant if `save_checkpoints` is True.
dont_stop: bool
whether to disable the stopping mechanism, i.e. to run for all epochs regardless of the validation chi2
trials: str
name of the file containing the trials defining the methodology
"""
Expand All @@ -173,6 +187,14 @@ def __init__(
self.lux_params = lux_params
self.replicas = replicas
self.experiments_data = experiments_data
self.dont_stop = dont_stop

# Checkpointing options
self.save_checkpoints = save_checkpoints
self.replica_path = replica_path
self.checkpoint_freq = checkpoint_freq
if self.save_checkpoints and self.replica_path is None:
raise ValueError("To save checkpoints, the 'replica_path' key must be set as well.")
self.trials = trials

# Initialise internal variables which define behaviour
Expand Down Expand Up @@ -728,11 +750,24 @@ def _train_and_fit(self, training_model, stopping_object, epochs=100) -> bool:
self.training["integmultipliers"],
update_freq=PUSH_INTEGRABILITY_EACH,
)
callback_list = [callback_st, callback_pos, callback_integ]

if self.save_checkpoints:
pdf_model = training_model.get_layer("PDFs")
# Save parameters where colibri will look for checkpoints
replica_paths = [
self.replica_path.parent / f"fit_replicas/replica_{r}" for r in self.replicas
]
checpoint_callback = callbacks.StoreCallback(
pdf_model=pdf_model,
replica_paths=replica_paths,
check_freq=self.checkpoint_freq,
stopping_object=stopping_object,
)
callback_list.append(checpoint_callback)

training_model.perform_fit(
epochs=epochs,
verbose=False,
callbacks=self.callbacks + [callback_st, callback_pos, callback_integ],
epochs=epochs, verbose=False, callbacks=self.callbacks + callback_list
)

def _hyperopt_override(self, params):
Expand Down Expand Up @@ -928,6 +963,7 @@ def hyperparametrizable(self, params):
nodes=params["nodes_per_layer"],
activations=params["activation_per_layer"],
initializer=params["initializer"],
initializer_scale=params.get("initializer_scale", 1.0),
architecture=params["layer_type"],
dropout_rate=params["dropout"],
regularizer=params.get("regularizer"),
Expand All @@ -949,13 +985,34 @@ def hyperparametrizable(self, params):
nodes=self.trials["nodes_per_layer"][idx_hyperparamters],
activations=activations,
initializer=self.trials["initializer"][idx_hyperparamters],
initializer_scale=params.get("initializer_scale", 1.0),
architecture=self.trials["layer_type"][idx_hyperparamters],
dropout_rate=self.trials["dropout"][idx_hyperparamters],
regularizer=params.get("regularizer"),
regularizer_args=params.get("regularizer_args"),
)
replicas_settings.append(tmp)

# TODO: tempoerary fix to use NTK utilities in colibri
# Create model pkl for colibri n3fit module
_init_args = {
"flav_info": self.flavinfo,
"replica_range_settings": {
"min_replica": np.sort(self.replicas)[0],
"max_replica": np.sort(self.replicas)[0],
},
"impose_sumrule": self.impose_sumrule,
"fitbasis": self.fitbasis,
"nodes": params["nodes_per_layer"],
"activations": params["activation_per_layer"],
"initializer_name": params["initializer"],
"layer_type": params["layer_type"],
}
state = {"_init_args": _init_args}

with open(self.replica_path.parent / "pdf_model.pkl", "wb") as file:
pickle.dump(state, file)

### Training loop
for k, partition in enumerate(self.kpartitions):

Expand Down Expand Up @@ -1030,6 +1087,7 @@ def hyperparametrizable(self, params):
stopping_patience=stopping_epochs,
threshold_positivity=threshold_pos,
threshold_chi2=threshold_chi2,
dont_stop=self.dont_stop,
)

if self.mode_hyperopt or (not self.trials):
Expand Down
Loading
Loading