Line data Source code
1 : // Copyright 2024, UChicago Argonne, LLC
2 : // All Rights Reserved
3 : // Software Name: NEML2 -- the New Engineering material Model Library, version 2
4 : // By: Argonne National Laboratory
5 : // OPEN SOURCE LICENSE (MIT)
6 : //
7 : // Permission is hereby granted, free of charge, to any person obtaining a copy
8 : // of this software and associated documentation files (the "Software"), to deal
9 : // in the Software without restriction, including without limitation the rights
10 : // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 : // copies of the Software, and to permit persons to whom the Software is
12 : // furnished to do so, subject to the following conditions:
13 : //
14 : // The above copyright notice and this permission notice shall be included in
15 : // all copies or substantial portions of the Software.
16 : //
17 : // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 : // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 : // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 : // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 : // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 : // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 : // THE SOFTWARE.
24 :
25 : #include <iostream>
26 : #include <iomanip>
27 :
28 : #include "neml2/solvers/NewtonWithTrustRegion.h"
29 : #include "neml2/tensors/Scalar.h"
30 : #include "neml2/tensors/functions/where.h"
31 : #include "neml2/tensors/functions/bmv.h"
32 : #include "neml2/tensors/functions/bvv.h"
33 : #include "neml2/tensors/functions/linalg/vector_norm.h"
34 :
35 : namespace neml2
36 : {
37 : register_NEML2_object(NewtonWithTrustRegion);
38 :
39 : OptionSet
40 5 : NewtonWithTrustRegion::expected_options()
41 : {
42 5 : OptionSet options = Newton::expected_options();
43 5 : options.doc() =
44 : "A trust-region Newton solver. The step size and direction are modified by solving a "
45 : "constrained minimization problem using the local quadratic approximation. The default "
46 : "solver parameters are chosen based on a limited set of problems we tested on and are "
47 5 : "expected to be tuned.";
48 :
49 10 : options.set<double>("delta_0") = 1.0;
50 10 : options.set("delta_0").doc() = "Initial trust region radius";
51 :
52 10 : options.set<double>("delta_max") = 10.0;
53 10 : options.set("delta_max").doc() = "Maximum trust region radius";
54 :
55 10 : options.set<double>("reduce_criteria") = 0.25;
56 15 : options.set("reduce_criteria").doc() = "The trust region radius is reduced when the merit "
57 5 : "function reduction is below this threshold";
58 :
59 10 : options.set<double>("expand_criteria") = 0.75;
60 15 : options.set("expand_criteria").doc() = "The trust region radius is increased when the merit "
61 5 : "function reduction is above this threshold";
62 :
63 10 : options.set<double>("reduce_factor") = 0.25;
64 10 : options.set("reduce_factor").doc() = "Factor to apply when reducing the trust region radius";
65 :
66 10 : options.set<double>("expand_factor") = 2.0;
67 10 : options.set("expand_factor").doc() = "Factor to apply when increasing the trust region radius";
68 :
69 10 : options.set<double>("accept_criteria") = 0.1;
70 15 : options.set("accept_criteria").doc() =
71 5 : "Reject the current step when the merit function reduction is below this threshold";
72 :
73 10 : options.set<double>("subproblem_rel_tol") = 1e-6;
74 10 : options.set("subproblem_rel_tol").doc() = "Relative tolerance used for the quadratic sub-problem";
75 :
76 10 : options.set<double>("subproblem_abs_tol") = 1e-8;
77 10 : options.set("subproblem_abs_tol").doc() = "Absolute tolerance used for the quadratic sub-problem";
78 :
79 10 : options.set<unsigned int>("subproblem_max_its") = 10;
80 10 : options.set("subproblem_max_its").doc() =
81 5 : "Maximum number of allowable iterations when solving the quadratic sub-problem";
82 :
83 5 : return options;
84 0 : }
85 :
86 3 : NewtonWithTrustRegion::NewtonWithTrustRegion(const OptionSet & options)
87 : : Newton(options),
88 3 : _subproblem(subproblem_options(options)),
89 3 : _subproblem_solver(subproblem_solver_options(options)),
90 6 : _delta_0(options.get<double>("delta_0")),
91 6 : _delta_max(options.get<double>("delta_max")),
92 6 : _reduce_criteria(options.get<double>("reduce_criteria")),
93 6 : _expand_criteria(options.get<double>("expand_criteria")),
94 6 : _reduce_factor(options.get<double>("reduce_factor")),
95 6 : _expand_factor(options.get<double>("expand_factor")),
96 9 : _accept_criteria(options.get<double>("accept_criteria"))
97 : {
98 3 : }
99 :
100 : OptionSet
101 3 : NewtonWithTrustRegion::subproblem_options(const OptionSet & /*options*/) const
102 : {
103 : // By default the nonlinear system turns off automatic scaling (which is what we want here)
104 3 : return TrustRegionSubProblem::expected_options();
105 : }
106 :
107 : OptionSet
108 3 : NewtonWithTrustRegion::subproblem_solver_options(const OptionSet & options) const
109 : {
110 3 : auto solver_options = Newton::expected_options();
111 12 : solver_options.set<double>("abs_tol") = options.get<double>("subproblem_abs_tol");
112 12 : solver_options.set<double>("rel_tol") = options.get<double>("subproblem_rel_tol");
113 9 : solver_options.set<unsigned int>("max_its") = options.get<unsigned int>("subproblem_max_its");
114 3 : return solver_options;
115 0 : }
116 :
117 : void
118 3 : NewtonWithTrustRegion::prepare(const NonlinearSystem & /*system*/,
119 : const NonlinearSystem::Sol<true> & x)
120 : {
121 3 : _delta = Scalar::full(x.batch_sizes(), _delta_0, x.options());
122 3 : }
123 :
124 : void
125 18 : NewtonWithTrustRegion::update(NonlinearSystem & system,
126 : NonlinearSystem::Sol<true> & x,
127 : const NonlinearSystem::Res<true> & r,
128 : const NonlinearSystem::Jac<true> & J)
129 : {
130 18 : auto p = solve_direction(r, J);
131 :
132 : // Predicted reduction in the merit function
133 18 : auto nr = linalg::vector_norm(r);
134 18 : auto red_b = merit_function_reduction(r, J, p);
135 :
136 : // Actual reduction in the objective function
137 18 : NonlinearSystem::Sol<true> xp(Tensor(x) + Tensor(p));
138 18 : auto rp = system.residual(xp);
139 18 : auto nrp = linalg::vector_norm(rp);
140 18 : auto red_a = 0.5 * pow(nr, 2.0) - 0.5 * pow(nrp, 2.0);
141 :
142 : // Quality of the subproblem solution compared to the quadratic model
143 18 : auto rho = red_a / red_b;
144 :
145 : // Adjust the trust region based on the quality of the subproblem
146 54 : _delta.batch_index_put_({rho < _reduce_criteria},
147 72 : _reduce_factor * _delta.batch_index({rho < _reduce_criteria}));
148 54 : _delta.batch_index_put_({rho > _expand_criteria},
149 90 : at::clamp(_expand_factor * _delta.batch_index({rho > _expand_criteria}),
150 : c10::nullopt,
151 18 : _delta_max));
152 :
153 : // Accept or reject the current step
154 18 : auto accept = (rho >= _accept_criteria).unsqueeze(-1);
155 :
156 : // Do some printing if verbose
157 18 : if (verbose)
158 : {
159 0 : std::cout << " RHO MIN/MAX : " << std::scientific << at::min(rho).item<double>()
160 0 : << "/" << std::scientific << at::max(rho).item<double>() << std::endl;
161 0 : std::cout << " ACCEPTANCE RATE : " << at::sum(accept).item<Size>() << "/"
162 0 : << utils::storage_size(_delta.batch_sizes().concrete()) << std::endl;
163 0 : std::cout << " ADJUSTED DELTA MIN/MAX : " << std::scientific
164 0 : << at::min(_delta).item<double>() << "/" << std::scientific
165 0 : << at::max(_delta).item<double>() << std::endl;
166 : }
167 :
168 18 : x = NonlinearSystem::Sol<true>(neml2::where(accept, Tensor(xp), x.variable_data()));
169 162 : }
170 :
171 : NonlinearSystem::Sol<true>
172 18 : NewtonWithTrustRegion::solve_direction(const NonlinearSystem::Res<true> & r,
173 : const NonlinearSystem::Jac<true> & J)
174 : {
175 : // The full Newton step
176 18 : auto p_newton = Newton::solve_direction(r, J);
177 :
178 : // The trust region step (obtained by solving the bound constrained subproblem)
179 18 : _subproblem.reinit(r, J, _delta);
180 : auto res = _subproblem_solver.solve(_subproblem,
181 18 : NonlinearSystem::Sol<false>(Tensor::zeros_like(_delta)));
182 :
183 : // Do some printing if verbose
184 18 : if (verbose)
185 : {
186 0 : std::cout << " TRUST-REGION ITERATIONS: " << res.iterations << std::endl;
187 0 : std::cout << " ACTIVE CONSTRAINTS : " << at::sum(res.solution > 0).item<Size>() << "/"
188 0 : << utils::storage_size(res.solution.batch_sizes().concrete()) << std::endl;
189 : }
190 :
191 18 : auto s = Scalar(at::clamp(res.solution, 0.0), _delta.batch_sizes());
192 18 : auto p_trust = -_subproblem.preconditioned_direction(s);
193 :
194 : // Now select between the two... Basically take the full Newton step whenever possible
195 : auto newton_inside_trust_region =
196 18 : (linalg::vector_norm(p_newton) <= sqrt(2.0 * _delta)).unsqueeze(-1);
197 :
198 : return NonlinearSystem::Sol<true>(
199 36 : Tensor(at::where(newton_inside_trust_region, p_newton, p_trust), p_newton.batch_sizes()));
200 18 : }
201 :
202 : Scalar
203 18 : NewtonWithTrustRegion::merit_function_reduction(const NonlinearSystem::Res<true> & r,
204 : const NonlinearSystem::Jac<true> & J,
205 : const NonlinearSystem::Sol<true> & p) const
206 : {
207 18 : auto Jp = bmv(J, p);
208 36 : return -bvv(r, Jp) - 0.5 * bvv(Jp, Jp);
209 18 : }
210 :
211 : } // namespace neml2
|