From e46e9192f9a5f80b1ba146120774d21a35e87344 Mon Sep 17 00:00:00 2001 From: Nickolai Belakovski Date: Thu, 20 Jul 2023 11:19:59 +0900 Subject: [PATCH] Initial commit of Python translation --- python/README.md | 5 + python/pyproject.toml | 10 + python/src/prima/__init__.py | 0 python/src/prima/cobyla/__init__.py | 0 python/src/prima/cobyla/cobylb.py | 560 ++++++++++++++++++ python/src/prima/cobyla/geometry.py | 188 ++++++ python/src/prima/cobyla/trustregion.py | 447 ++++++++++++++ python/src/prima/cobyla/update.py | 164 +++++ python/src/prima/common/__init__.py | 0 python/src/prima/common/consts.py | 22 + python/src/prima/common/evaluate.py | 36 ++ python/src/prima/common/linalg.py | 49 ++ python/src/prima/common/output.py | 20 + python/src/prima/common/powalg.py | 109 ++++ python/src/prima/common/selectx.py | 179 ++++++ .../prima/examples/cobyla/cobyla_example.py | 322 ++++++++++ 16 files changed, 2111 insertions(+) create mode 100644 python/README.md create mode 100644 python/pyproject.toml create mode 100644 python/src/prima/__init__.py create mode 100644 python/src/prima/cobyla/__init__.py create mode 100644 python/src/prima/cobyla/cobylb.py create mode 100644 python/src/prima/cobyla/geometry.py create mode 100644 python/src/prima/cobyla/trustregion.py create mode 100644 python/src/prima/cobyla/update.py create mode 100644 python/src/prima/common/__init__.py create mode 100644 python/src/prima/common/consts.py create mode 100644 python/src/prima/common/evaluate.py create mode 100644 python/src/prima/common/linalg.py create mode 100644 python/src/prima/common/output.py create mode 100644 python/src/prima/common/powalg.py create mode 100644 python/src/prima/common/selectx.py create mode 100644 python/src/prima/examples/cobyla/cobyla_example.py diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000000..d325b1cc99 --- /dev/null +++ b/python/README.md @@ -0,0 +1,5 @@ +To develop, `cd` into the `src` directory and run + +```pip install --editable .``` + +This will install prima locally in an editable fashion. From there you can run the examples/cobyla/cobyla_example.py (from any directory) and go from there. diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000000..d95d6147de --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "prima" +version = "0.0.1" +dependencies = [ + "numpy" +] + +[tool.setuptools.packages.find] +where = ["src"] # ["."] by default +exclude = ["prima.examples*"] # empty by default diff --git a/python/src/prima/__init__.py b/python/src/prima/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/cobyla/__init__.py b/python/src/prima/cobyla/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/cobyla/cobylb.py b/python/src/prima/cobyla/cobylb.py new file mode 100644 index 0000000000..914d68c581 --- /dev/null +++ b/python/src/prima/cobyla/cobylb.py @@ -0,0 +1,560 @@ +import numpy as np +from prima.common.consts import INFO_DEFAULT, REALMAX, EPS, CONSTRMAX, \ + MAXTR_REACHED, DAMAGING_ROUNDING, SMALL_TR_RADIUS, \ + NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED +from prima.common.evaluate import evaluate, moderatec, moderatef +from prima.cobyla.update import updatepole, findpole, updatexfc +from prima.cobyla.geometry import assess_geo, setdrop_tr, geostep, setdrop_geo +from prima.common.selectx import savefilt , selectx +from prima.cobyla.trustregion import trstlp, redrat, trrad + +def fcratio(fval, conmat): + ''' + This function calculates the ratio between the "typical changre" of f and that of constr. + See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio. + ''' + cmin = np.min(conmat, axis=1) + cmax = np.max(conmat, axis=1) + if any(cmin < cmax/2): + denom = np.min(max(*cmax, 0) - cmin, where=cmin < cmax/2) + r = (max(fval) - min(fval)) / denom + else: + r = 0 + + return r + +def redrho(rho, rhoend): + ''' + This function calculated rho when it needs to be reduced. + The sceme is shared by UOBYQA, NEWUOA, BOBYQA, LINCOA. For COBYLA, Powell's code reduces rho by + 'rho /= 2; if rho <= 1.5 * rhoend: rho = rhoend' as specified in (11) of the COBYLA + paper. However, this scheme seems to work better, especially after we introduce delta. + ''' + + rho_ratio = rho / rhoend + + if rho_ratio > 250: + rho /= 10 + elif rho_ratio <= 16: + rho = rhoend + else: + rho = np.sqrt(rho_ratio) * rhoend # rho = np.sqrt(rho * rhoend) + + return rho + +def checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x): + info = INFO_DEFAULT + + # Although x should not contain nan unless there is a bug, we include the following for security. + # x can be inf, as finite + finite can be inf numerically. + if any(np.isnan(x)) or any(np.isinf(x)): + info = NAN_INF_X + + # Although NAN_INF_X should not happen unless there is a bug, we include the following for security. + if np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv): + info = NAN_INF_F + + if cstrv <= ctol and f <= ftarget: + info = FTARGET_ACHIEVED + + if nf >= maxfun: + info = MAXFUN_REACHED + return info + +def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, nf, + conmat, cval, fval): + num_constraints = conmat.shape[0] + num_vars = x0.size + sim = np.eye(num_vars, num_vars+1) * rhobeg + sim[:, -1] = x0 + solver="COBYLA" + subinfo = INFO_DEFAULT + + simi = np.eye(num_vars) / rhobeg + + evaluated = np.zeros(num_vars+1, dtype=bool) + + # TODO: Keep evaluating from here + for k in range(num_vars + 1): + x = sim[:, num_vars].copy() + # We will evaluate F corresponding to SIM(:, J). + if k == 0: + j = num_vars + f = moderatef(f0) + constr = moderatec(constr0) + cstrv = max([-constr, 0]) + else: + j = k - 1 + x[j] += rhobeg + f, constr, cstrv = evaluate(calcfc, x) + + # Print a message about the function/constraint evaluation according to IPRINT. + # TODO: Finish implementing fmsg, if decided that its worth it + # fmsg(solver, iprint, k, f, x, cstrv, constr) + + # Save X, F, CONSTR, CSTRV into the history. + # TODO: Implement history (maybe we need it for the iterations?) + # savehist(k, x, xhist, f, fhist, cstrv, chist, constr, conhist) + + # Save F, CONSTR, and CSTRV to FVAL, CONMAT, and CVAL respectively. + evaluated[j] = True + fval[j] = f + conmat[:, j] = constr + cval[j] = cstrv + + # Check whether to exit. + # subinfo = checkexit(maxfun, k, cstrv, ctol, f, ftarget, x) + # if subinfo != INFO_DEFAULT: + # info = subinfo + # break + + # Exchange the new vertex of the initial simplex with the optimal vertex if necessary. + # This is the ONLY part that is essentially non-parallel. + if j <= num_vars and fval[j] < fval[-1]: + fval[j], fval[-1] = fval[-1], fval[j] + cval[j], cval[-1] = cval[-1], cval[j] + conmat[:, j], conmat[:, -1] = conmat[:, -1], conmat[:, j] + sim[:, -1] = x + sim[j, :j] = -rhobeg + + nf = np.count_nonzero(evaluated) + if evaluated.all(): + simi = np.linalg.inv(sim[:, :-1]) + # Switch the optimal vertex (located by FINDPOLE) to SIM(:, N+1), which Powell called the "pole + # position". We call UPDATEPOLE with CPEN = ZERO, which is the initial value of CPEN. + conmat, cval, fval, sim, simi, info = updatepole(0, conmat, cval, fval, sim, simi) + + return evaluated, sim, simi + + +def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt): + ''' + This function initializes the filter (xflit, etc) that will be used when selecting + x at the end of the solver. + N.B.: + 1. Why not initialize the filters using XHIST, etc? Because the history is empty if + the user chooses not to output it. + 2. We decouple initxfc and initfilt so that it is easier to parallelize the former + if needed. + ''' + num_constraints = conmat.shape[0] + num_vars = sim.shape[0] + maxfilt = len(ffilt) + + nfilt = 0 + for i in range(num_vars+1): + if evaluated[i]: + x = sim[:, i] + if i <= num_vars: + x += sim[:, -1] + nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cval[i], ctol, cweight, fval[i], x, nfilt, cfilt, ffilt, xfilt, conmat[:, i], confilt) + + + return nfilt + + +def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget, + gamma1, gamma2, rhobeg, rhoend, constr, f, x, + cstrv): + num_constraints = len(constr) + num_vars = len(x) + # maxxhist = xhist.shape[1] + # maxfhist = len(fhist) + # maxconhist = conhist.shape[1] + # maxchist = len(chist) + # maxhist = max(maxxhist, maxfhist, maxconhist, maxchist) + conmat = np.zeros((num_constraints, num_vars+1)) - REALMAX + cval = np.zeros(num_vars+1) + REALMAX + fval = np.zeros(num_vars+1) + REALMAX + nf = 0 + + subinfo = 0 + evaluated, sim, simi = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, nf, + conmat, cval, fval) + # TODO: Need to initialize history with f and x and constr + + # Initialize the filter, including xfilt, ffilt, confiolt, cfilt, and nfilt. + # N.B.: The filter is used only when selecting which iterate to return. It does not + # interfere with the iterations. COBYLA is NOT a filter method but a trust-region method + # based on an L-inifinity merit function. Powell's implementation does not use a + # filter to select the iterate, possibly returning a suboptimal iterate. + cfilt = min(max(maxfilt, 1), maxfun) + confilt = np.zeros((len(constr), len(cfilt))) + ffilt = np.zeros(len(cfilt)) + xfilt = np.zeros((len(x), len(cfilt))) + nfilt = initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt) + + # Check whether to return due to abnormal cases that may occur during the initialization. + if subinfo != INFO_DEFAULT: + info = subinfo + # Return the best calculated values of the variables + # N.B: Selectx and findpole choose X by different standards, one cannot replace the other + kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], cweight, ctol) + x = xfilt[:, kopt] + f = ffilt[kopt] + constr = confilt[:, kopt] + cstrv = cfilt[kopt] + # Arrange chist, conhist, fhist, and xhist so that they are in chronological order. + # TODO: Implement me + # rangehist(nf, xhist, fhist, chist, conhist) + # print a return message according to IPRINT. + # TODO: Implement me + # retmsg("COBYLA", info, iprint, nf, f, x, cstrv, constr) + + # Set some more initial values + rho = rhobeg + delta = rhobeg + cpen = 0 + prerec = -REALMAX + preref = -REALMAX + prerem = -REALMAX + actrem = -REALMAX + ratio = -1 + jdrop_tr = 0 + jdrop_geo = 0 + + # maxtr is the maximal number of trust region iterations. Normally each trust-region + # iteration takes 1 or 2 function evaluations except for the following cases: + # 1. The update of cpen alters the optimal vertex + # 2. The trust-region step is short or fails to reduce either the + # linearized objective or the linearized constraint violation but + # the geometry step is not invoked. + # The following maxtr is unlikely to be reached. + maxtr = max(maxfun, 2*maxfun) # TODO: In Fortran this was done because 2xmaxfun could overflow and be <0. Not sure what the approriate python remedy is + info = MAXTR_REACHED + + # A[:, :num_constraints] contains the approximate gradient for the constraints, and + # A[:, num_constraints] is minus the approximate gradient for the objective function + A = np.zeros((num_vars, num_constraints+1)) + + # N.B.: factor_alpha/beta/gamma/delta are four factors the COBYLB uses when + # managing the simplex. Note the following: + # 1. factor_alpha < factor_gamma < 1 < factor_delta <= factor_beta + # 2. factor_delta has nothing to do with delta, which is the trust-region radius + # 3. factor_gamma has nothing to do with gamma1 and gamma2, which are the contracting/expanding + # factors for updating the trust-region radius delta. + factor_alpha = 0.25 # the factor alpha in the COBYLA paper + factor_beta = 2.1 # the factor beta in the COBYLA paper + factor_delta = 1.1 # the factor delta in the COBYLA paper + factor_gamma = 0.5 # the factor gamma in the COBYLA paper + + # Begin the iterative procedure + # After solving a trust-region subproblem, we use 3 boolean variables to control the workflow + # shortd - Is the trust-region trial step too short to invoke a function evaluation? + # improve_geo - Will we improve the model after the trust-region iteration? If yes, a geometry + # step will be taken, corresponding to the "Branch (Delta)" in the COBYLA paper. + # reduce_rho - Will we reduce rho after the trust-region iteration? + # COBYLA never sets improve_geo and reduce_rho to True simultaneously. + + for tr in range(maxtr): + # Before the trust region step, updatepole has been called either implicitly by initxfc/updatexfc + # or explicitly after cpen is updated, so that sim[:, num_vars+1] is the optimal vertex. + + # Does the interpolation set have acceptable geometry? It affects improve_geo and reduce_rho + adequate_geo = assess_geo(delta, factor_alpha, factor_beta, sim, simi) + + # Calculate the linear approximations to the objective and constraint functions, placing + # minus the objective function gradient after the constraint gradients in the array A. + # N.B.: TRSTLP accesses A mostly by colums, so it is more reasonable to save A instead of A.T + A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T + A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + + # Theoretically, but not numerically, the last entry of b does not affect the result of TRSTLP + # We set it to -fval[num_vars] following Powell's code. + b = np.hstack((-conmat[:, num_vars], -fval[:num_vars])) + # Calculate the trust-region trial step d. Note that d does NOT depend on cpen. + d = trstlp(A, b, delta) + dnorm = min(delta, np.linalg.norm(d)) + + # Is the trust-region trial step short? N.B.: we compare dnorm with rho, not delta. + # Powell's code especially defines shortd by shortd = (dnorm < 0.5 * rho). In out tests + # 1/10 seems to work better than 1/2 or 1/4, especially for linearly constrained + # problems. Note that LINCOA has a slightly more sophisticated way of defining shortd, + # taking into account whether d casues a change to the active set. Should we try the same here? + shortd = (dnorm < 0.1 * rho) + + # predict the change to f (preref) and to the constraint violation (prerec) due to d. + # We have the following in precise arithmetic. They may fail to hold due to rounding errors. + # 1. b[:num_constraints] = -conmat[:, num_vars+1] and hence max(b[:num_constraints] - d@A[:, :num_constraints], 0) + # is the L-infinity violation of the linearized constraints corresponding to d. When d=0, the + # violation is max(b[:num_constraints], 0) = cval[num_vars]. Prerec is the reduction of this + # violation achieved by d, which is nonnegative in theory; prerec = 0 iff b[:num_constraints] <= 0, + # i.e. the trust-region center satisfies the linearized constraints. + # 2. Preref may be negative or 0, but it is positive when prerec = 0 and shortd is False + # 3. Due to 2, in theory, max(prerec, preref) > 0 if shortd is False. + # 4. In the code, max(prerec, preref) is the counterpart of qred in UOBYQA/NEWUOA/BOBYQA/LINCOA. + prerec = cval[num_vars] - max(b[:num_constraints] - d@A[:, :num_constraints], 0) + preref = np.dot(d, A[:, num_constraints]) # Can be negative + + if shortd or max(prerec, preref) <= 0: + # Reduce delta if d is short or if d fails to render max(prerecm preref) > 0, + # the latter can only happen due to rounding errors. This seems quite important + # for performance + delta *= 0.1 + if delta <= 1.5 * rho: + delta = rho # set delta to rho when it is close to or below + else: + # Increase cpen if necessary to ensure prerem > 0. Branch back if this changes alters the + # optimal vertex. See the discussions around equation (9) of the COBYLA paper. + # This is the first (out of two) places where cpen is updated. It can change cpen only when + # prerec > 0 > preref, in which case prerem is guaranteed positive after the update. + # If prerec = 0 or preref > 0, then prerem is currently positive, so cpen needs no update. + # However, as in Powell's implementation, if prerec > 0 = prefref = cpen, then cpen will + # remain zero, leaving prerem = 0. If cpen = 0 and prerec > 0 > preref, then cpen will + # become positive; if cpen =0, prerec > 0 and preref > 0, then cpen will remain 0. + if prerec > 0 and preref < 0: + # In Powell's code, cpen is increased to 2*barmu if and only if it is currently less + # than 1.5*barmu, a very "Powellful" scheme. In our implementation, however, we set cpen + # directly to the maximum between its current value and 2*barmu while handling possible + # overflow. This simplifies the scheme without worsening the performance of COBLYA. + barmu = -preref / prerec # preref + barmu * prerec = 0 + cpen = max(cpen, min(2*barmu, REALMAX)) # This is the 1st (out of 2) updates of cpen. + if findpole(cpen, cval, fval) <= num_vars: + + conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi) + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break # Better action to take? Geometry step, or simply continue? + continue + # N.B.: The continue can occur at most num_vars times before a new function evaluation + # takes place. This is because the update of cpen does not decrease cpen, and hence it + # can make vertex j (j < num_vars) become the new optimal vertex only if cval[j] < cval[num_vars], + # which can happen at most num_vars times. See the paragraph below (9) in the COBYLA paper. + + x = sim[:, num_vars+1] + d + # Evaluate the objective and constraints at X, taking care of possible inf/nan values. + f, constr, cstrv = evaluate(calcfc, x) + nf += 1 + + # Print a message about the function/constraint evaluation accoring to iprint + # TODO: Implement me + # fmsg(solver, iprint, nf, f, x, cstrv, constr) + # Save X, F, CONSTR, CSTRV into the history. + # TODO: Implement me + # savehist(nf, x, xhist, f, fhist, cstrv, chist, constr, conhist) + # Save X, F, CONSTR, CSTRV into the filter. + # TODO: Implement me + # savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) + + # Evaluate prerem and actem, which are the predicted and actual reductions in the merit + # function, respectively. + prerem = preref + cpen * prerec # Theoretically nonnegative; is 0 if cpen = 0 = preref + actrem = (fval[num_vars] + cpen * cval[num_vars]) - (f + cpen * cstrv) + if cpen <= 0 and abs(f - fval[num_vars]) <= 0: + # cpen <= 0 indeed means cpen == 0, which abs(A-B) <= 0 indeed means A == B + prerem = prerec + actem = cval[num_vars] - cstrv + + # In theory, PREREM >= 0, but this can fail due to rounding errors. + # assert prerem >= 0, 'PREREM >= 0, COBYLA' + + # Calculate the reduction ratio by redrat, which hands inf/nan carefully + ratio = redrat(actem, prerem, eta1) + + # Update delta. After this, delta < dnorm may hold. + # N.B.: + # 1. Powell's code uses rho as the trust-region radius and updates it as follows: + # Reduce rho to gamma1*rho if adequate_geo is True and either shortd is True or ratio < eta1, + # and then revise rho and rhoend if its new value is not more than 1.5*rhoend; rho remains + # unchanged in all other cases; in particular, rho is never increased. + # 2. Our implementation uses delta as the trust-region radius, while using rho as a lower + # bound for delta. Delta is updated in a way that is typical for trust-region methods, and + # it is revised to rho if its new value is not more than 1.5*rho. Rho reflects the current + # resolution of the algorithm; its update is essentially the same as the update of rho in + # Powell's code (see the definition of reduce_rho below). Our implementation aligns with + # UOBYQA/NEWUOA/BOBYQA/LINCOA and improves the performance of COBYLA. + # 3. The same as Powell's code, we do not reduce rho unless adequate_geo is True. This is + # also how Powell updated rho in UOBYQA/NEWUOA/BOBYQA/LINCOA. What about we also use + # adequate_geo == True as a prerequisite for reducing delta? The argument would be that the + # bad (small) value of ratio may be because of a bad geometry (and hence a bad model) rather + # than an improperly large delta, and it might be good to try improving the geometry first + # without reducing delta. However, according to a test of 230206, it does not improve the + # performance if we skip the update of delta when adequeate_geo is False and ratio < 0.1. + # Therefore we choose to update delta without checking adequate_geo + delta = trrad(delta, dnorm, eta1, eta2, gamma1, gamma2, ratio) + if delta <= 1.5*rho: + delta = rho # Set delta to rho when it is close to or below. + + # Is the newly generated X better than the current best point? + ximproved = actrem > 0 # If actrem is NaN then ximproved should and will be False + + # Set jdrop_tr to the index of the vertex to be replaced with X. jdrop_tr = 0 means there + # is no good point to replace, and X will not be included into the simplex; in this case, + # the geometry of the simplex likely needs improvement, which will be handled below. + jdrop_tr = setdrop_tr(ximproved, d, sim, simi) + + # Update sim, simi, fval, conmat, and cval so that sim[:, jdrop_tr] is replaced with d. + # Updatexfc does nothing if jdrop_tr = 0, as the algorithm decides to discard X. + sim, simi, fval, conmat, cval, subinfo = updatexfc(jdrop_tr, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi) + # Check whether to break due to dmaging rounding in updatepole (called by updatexfc) + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break + + # Check whether to break due to maxfun, ftarget, etc. + subinfo = checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break + # End of if shortd or max(prerec, preref) <= 0. The normal trust-region calculation ends. + + # Before the next trust-region iteration, we possibly improve the geometry of the simplex or + # reduce rho according to improve_geo and reduce_rho. Now we decide these indicators. + # N.B.: We must ensure that the algorithm does not set improve_geo to True at infinitely many + # consecutive iterations without moving sim[:, num_vars] or reducing rho. Otherwise, the algorithm + # will get stuck in repetitive invocations of geostep. This is ensured by the following facts: + # 1. If an iteration sets improve_geo to True, it must also reduce delta or set delta to rho. + # 2. If sim[:, num_vars] and rho remain unchanged, then adequate_geo will become True after at + # most num_vars invocations of geostep. + + # bad_trstep: Is the last trust-region step bad? + bad_trstep = shortd or (max(prerec, preref) <= 0) or ratio <= 0 or jdrop_tr == 0 + # improve_geo: Should we take a geometry step to improve the geometry of the interpolation set? + improve_geo = bad_trstep and not adequate_geo + # reduce_rho: Should we enhance the resolution by reducing rho? + reduce_rho = bad_trstep and adequate_geo and max(delta, dnorm) <= rho + + # COBYLA never sets improve_geo and reduce_rho to True simultaneously. + # assert not (improve_geo and reduce_rho), 'IMPROVE_GEO or REDUCE_RHO are not both TRUE, COBYLA' + + # If shortd is True or max(prerec, preref) <= 0, then either improve_geo or reduce_rho + # is True unless adequate_geo is True and max(delta, dnorm) > rho. + # assert (not shortd and max(prerec, preref) > 0) or (improve_geo or reduce_rho or adequate_geo and max(delta, dnorm) > rho) + + # Comments on bad_trstep: + # 1. Powell's definition of bad_trstep is as follows. The one used above seems to work better, + # especially for linearly constrained problems due to the factor 1/10 (= eta1). + # bad_trstep = shortd or actrem <= 0 or actrem < prerem/10 or jdrop_tr == 0 + # Besides, Powell did not check max(prerec, preref) > 0 in bad_trstep, which is reasonable to do + # but has little impact on the performance. + # 2. NEWUOA/BOBYQA/LINCOA would define bad_trstep, improve_geo, and reduce_rho as follows. Two + # different thresholds are used in bad_trstep. It outperforms Powel''s version. + # bad_trstep = shortd or max(prerec, preref) <= 0 or ratio <= eta1 or jdrop_tr == 0 + # improve_geo = bad_trstep and not adequate_geo + # reduce_rho = bad_trstep and adequate_geo and max(delta, dnorm) <= rho + # 3. Theoretically jdrop_tr > 0 when actrem > 0 (guaranteed by ratio > 0). However, in Powell's + # implementation, jrop_tr may be 0 even when ratio > 0 due to NaN. The modernized code has rectified + # this in the function setdrop_tr. After this rectification, we can indeed simplify the definition + # of bad_trstep by removing the condition jdrop_tr == 0. We retain it for robustness. + + # Comments on reduce_rho: + # When shortd is True, UOBQA/NEWUOA/BOBYQA/LINCOA all set reduce_rho to True if the recent + # models are sufficiently accurate according to certain criteria. See the paragraph around (37) + # in the UOBYQA paper and the discussions about Box 14 in the NEWUOA paper. This strategy is + # crucial for the performance of the solvers. However, as of 20221111, we have not managed to + # make it work in COBYLA. As in NEWUOA, we recorded the errors of the recent models, and set + # reduce_rho to True if they are small (e.g. all(abs(moderrsav) <= 0.1 * max(abs(A))*rho) or + # all(abs(moderrsav) <= rho**2)) when shortd is True. It made little impact on the performance. + + # Since COBYLA never sets improve_geo and reduce_rho to True simultaneously, the following + # two blocks are exchangeable: "if improve_geo:" and "if reduce_rho:". + + # Improve the geometry of the simplex by removing a point and adding a new one. + # If the current interpolation set has acceptable geometry, then we skip the geometry step. + # The code has a small difference from Powell's original code here: If the current geometry + # is acceptable, the we will continue with a new trust-region iteration; however, at the + # beginning of the iteration, cpen may be updated, which may alter the pole point sim[:, num_vars] + # by updatepole; the quality of the interpolation point depends on sim[:, num_vars], meaning + # that the same interpolation set may have good or bad geometry with respect to different + # "poles"; if the geometry tuens out bad with the new pole, the original COBYLA code will + # take a geometry step, but our code will NOT do it but continue to take a trust-region + # step. The argument is this: even if the geometry step is not skipped in the first place, the + # geometry may turn out bad again after the pole is altered due to an update to cpen; should + # we take another geometry step in that case? If no, why should we do it here? Indeed, this + # distinction makes no practical difference for the CUTEst problems with at most 100 variables + # and 5000 constraints, while the algorithm framework is simplified. + if improve_geo and not assess_geo(delta, factor_alpha, factor_beta, sim, simi): + # Before the geometry step, updatepole has been called either implicitly by updatexfc or + # explicitly after cpen is updated, so that sim[:num_vars] is the optimal vertex. + + # Pick a vertex to drop from the simplex. It will be replaced with sim[:, num_Vars] + d to + # improve acceptability of the simplex. See equations (15) and (16) of the COBYLA paper. + # N.B.: COBYLA never sets jdrop_geo = num_vars. + jdrop_geo = setdrop_geo(delta, factor_alpha, factor_beta, sim, simi) + + # The following jdrop_geo comes from UOBYQA/NEWUOA/BOBYQA/LINCOA. It performs poorly! + # jdrop_geo = np.argmax(np.sum(sim[:, :num_vars]**2, axis=0), axis=0) # TODO: Check axes, not sure if necessary tho since this is commented out + + # jdrop_geo is between 0 and num_vars unless sim and simi contain NaN, which should not happen + # at this point unless there is a bug. Nevertheless, for robustness, we include the + # following instruction to break when jdrop_geo == 0 (if jdrop_geo does become 0, then a + # memory error will occur if we continue, as jdrop_gro will be used as an index of arrays.) + # TODO: How will this translate to Python? + # Thought - perhaps setdrop_geo returns 0 when its bad, so we can return -1, or None + if jdrop_geo == 0: + info = DAMAGING_ROUNDING + break + + # Calculate the geometry step d. + # In NEWUOA, geostep takes delbar = max(min(sqrt(max(distsq))/10, delta/2), rho) + # rather than delta. This should not be done here, because d should improve the geometry of + # the simplex when sim[:, jdrop] is replaced with d; the quality of the geometry is defined + # by delta instead of delbar as in (14) of the COBYLA paper. See geostep for more detail. + d = geostep(jdrop_geo, cpen, conmat, cval, delta, fval, factor_gamma, simi) + + x = sim[:, num_vars] + d + # Evaluate the objective and constraints at X, taking care of possible inf/nan values. + f, constr, cstrv = evaluate(calcfc, x) + nf += 1 + + # Print a message about the function/constraint evaluation accoring to iprint + # TODO: Implement me + # fmsg(solver, iprint, nf, f, x, cstrv, constr) + # Save X, F, CONSTR, CSTRV into the history. + # TODO: Implement me + # savehist(nf, x, xhist, f, fhist, cstrv, chist, constr, conhist) + # Save X, F, CONSTR, CSTRV into the filter. + # TODO: Implement me + # savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt) + + subinfo = updatexfc(jdrop_geo, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi) + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break + + # Check whether to break due to maxfun, ftarget, etc. + subinfo = checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x) + if subinfo != INFO_DEFAULT: + info = subinfo + break + # end of if improve_geo. The procedure of improving the geometry ends. + + # The calculations with the current rho are complete. Enhance the resolution of the algorithm + # by reducing rho; update delta and cpen at the same time. + if reduce_rho: + if rho <= rhoend: + info = SMALL_TR_RADIUS + break + delta = rho/2 + rho = redrho(rho, rhoend) + delta = max(delta, rho) + cpen = min(cpen, fcratio(fval, conmat)) # This is the 2nd (out of 2) update of cpen. It may become 0 + # Print a message about the reduction of rho according to iprint + # TODO: implement me! + #call rhomsg(solver, iprint, nf, fval(n + 1), rho, sim(:, n + 1), cval(n + 1), conmat(:, n + 1), cpen) + conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi) + # Check whether to break due to damaging rounding detected in updatepole + if subinfo == DAMAGING_ROUNDING: + info = subinfo + break # Better action to take? Geometry step, or simply continue? + # end of for loop + + # Return the best calculated values of the variables + # N.B.: Selectx and findpole choose X by different standards, one cannot replace the other. + kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], max(cpen, cweight), ctol) + x = xfilt[:, kopt] + f = ffilt[kopt] + constr = confilt[:, kopt] + cstrv = cfilt[kopt] + + # Arrange CHIST, CONHIST, FHIST, and XHIST so that they are in the chronological order. + # TODO: Implement me + # call rangehist(nf, xhist, fhist, chist, conhist) + + # Print a return message according to IPRINT. + # TODO: Implement me + #call retmsg(solver, info, iprint, nf, f, x, cstrv, constr) + return x, f, nf, info + + + diff --git a/python/src/prima/cobyla/geometry.py b/python/src/prima/cobyla/geometry.py new file mode 100644 index 0000000000..b230e45e58 --- /dev/null +++ b/python/src/prima/cobyla/geometry.py @@ -0,0 +1,188 @@ +import numpy as np + +def setdrop_geo(delta, factor_alpha, factor_beta, sim, simi): + ''' + This function finds (the index) of a current interpolation point to be replaced with a + geometry-improving point. See (15)-(16) of the COBYLA paper. + N.B.: COBYLA never sets jrop to num_vars + ''' + + num_vars = sim.shape[0] + + # Calculate the values of sigma and eta + # vsig[j] for j = 0...num_vars-1 is the Euclidean distance from vertex J to the opposite face of the simplex. + vsig = 1 / np.sqrt(np.sum(simi**2, axis=1)) + veta = np.sqrt(sum(sum[:, :num_vars]**2, axis=0)) + + # Decide which vertex to drop from the simplex. It will be replaced with a new point to improve the + # acceptability of the simplex. See equations (15) and (16) of the COBYLA paper. + if any(veta > factor_beta * delta): + jdrop = np.where(veta == max(veta[~np.isnan(veta)]))[0][0] + elif any(vsig < factor_alpha * delta): + jdrop = np.where(vsig == min(vsig[~np.isnan(vsig)]))[0][0] + else: + # We arrive here if vsig and veta are all nan, which can happen due to nan in sim and simi + # which should not happen unless there is a bug + jdrop = None + + # Zaikun 230202: What if we consider veta and vsig together? The following attempts do not work well. + # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)*np.sum(simi**2, axis=1)) # Condition number + # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)**2 * np.sum(simi**2, axis=1)) # Condition number times distance + return jdrop + +def geostep(jdrop, cpen, conmat, cval, delta, fval, factor_gamma, simi): + ''' + This function calculates a geometry step so that the geometry of the interpolation set is improved + when sim[: jdrop_geo] is replaced with sim[:num_vars] + d + ''' + num_constraints = conmat.shape[0] + num_vars = simi.shape[0] + + # simi[jdrop, :] is a vector perpendicular to the face of the simplex to the opposite of vertex + # jdrop. This vsigj * simi[jdrop, :] is the unit vector in this direction + vsigj = 1 / np.sqrt(np.sum(simi[jdrop, :]**2)) + + # Set d to the vector in the above-mentioned direction and with length factor_gamma * delta. Since + # factor_alpha < factor_gamma < factor_beta, d improves the geometry of the simplex as per (14) of + # the COBYLA paper. This also explains why this subroutine does not replace delta with + # delbar = max(min(np.sqrt(max(distsq))/10, delta/2), rho) as in NEWUOA. + d = factor_gamma * delta * (vsigj * simi[jdrop, :]) + + # Calculate the coefficients of the linear approximations to the objective and constraint functions, + # placing minus the objective function gradient after the constraint gradients in the array A + A = np.zeros((num_vars, num_constraints + 1)) + A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T + A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi + cvmaxp = max(0, max(-d@A[:, :num_constraints] - conmat[:, num_vars])) + cvmaxn = max(0, max(d@A[:, :num_constraints] - conmat[:, num_vars])) + if 2 * np.dot(d, A[:, num_constraints]) < cpen * (cvmaxp - cvmaxn): + d *= -1 + return d + +def setdrop_tr(ximproved, d, sim, simi): + ''' + This function finds (the index) of a current interpolation point to be replaced with the + trust-region trial point. See (19)-(22) of the COBYLA paper. + N.B.: + 1. If ximproved == True, then jdrop >= 0 so that d is included into xpt. Otherwise, it is a bug. + 2. COBYLA never sets drop = num_vars + TODO: Check whether it improves the performance if jdrop = num_vars is allowed when ximproved is True. + Note that updatexfc should be revised accordingly. + ''' + + num_vars = sim.shape[0] + + # -------------------------------------------------------------------------------------------------- # + # The following code is Powell's scheme for defining JDROP. + # -------------------------------------------------------------------------------------------------- # + # ! JDROP = 0 by default. It cannot be removed, as JDROP may not be set below in some cases (e.g., + # ! when XIMPROVED == FALSE, MAXVAL(ABS(SIMID)) <= 1, and MAXVAL(VETA) <= EDGMAX). + # jdrop = 0 + # + # ! SIMID(J) is the value of the J-th Lagrange function at D. It is the counterpart of VLAG in UOBYQA + # ! and DEN in NEWUOA/BOBYQA/LINCOA, but it excludes the value of the (N+1)-th Lagrange function. + # simid = matprod(simi, d) + # if (any(abs(simid) > 1) .or. (ximproved .and. any(.not. is_nan(simid)))) then + # jdrop = int(maxloc(abs(simid), mask=(.not. is_nan(simid)), dim=1), kind(jdrop)) + # !!MATLAB: [~, jdrop] = max(simid, [], 'omitnan'); + # end if + # + # ! VETA(J) is the distance from the J-th vertex of the simplex to the best vertex, taking the trial + # ! point SIM(:, N+1) + D into account. + # if (ximproved) then + # veta = sqrt(sum((sim(:, 1:n) - spread(d, dim=2, ncopies=n))**2, dim=1)) + # !!MATLAB: veta = sqrt(sum((sim(:, 1:n) - d).^2)); % d should be a column! Implicit expansion + # else + # veta = sqrt(sum(sim(:, 1:n)**2, dim=1)) + # end if + # + # ! VSIG(J) (J=1, .., N) is the Euclidean distance from vertex J to the opposite face of the simplex. + # vsig = ONE / sqrt(sum(simi**2, dim=2)) + # sigbar = abs(simid) * vsig + # + # ! The following JDROP will overwrite the previous one if its premise holds. + # mask = (veta > factor_delta * delta .and. (sigbar >= factor_alpha * delta .or. sigbar >= vsig)) + # if (any(mask)) then + # jdrop = int(maxloc(veta, mask=mask, dim=1), kind(jdrop)) + # !!MATLAB: etamax = max(veta(mask)); jdrop = find(mask & ~(veta < etamax), 1, 'first'); + # end if + # + # ! Powell's code does not include the following instructions. With Powell's code, if SIMID consists + # ! of only NaN, then JDROP can be 0 even when XIMPROVED == TRUE (i.e., D reduces the merit function). + # ! With the following code, JDROP cannot be 0 when XIMPROVED == TRUE, unless VETA is all NaN, which + # ! should not happen if X0 does not contain NaN, the trust-region/geometry steps never contain NaN, + # ! and we exit once encountering an iterate containing Inf (due to overflow). + # if (ximproved .and. jdrop <= 0) then ! Write JDROP <= 0 instead of JDROP == 0 for robustness. + # jdrop = int(maxloc(veta, mask=(.not. is_nan(veta)), dim=1), kind(jdrop)) + # !!MATLAB: [~, jdrop] = max(veta, [], 'omitnan'); + # end if + # -------------------------------------------------------------------------------------------------- # + # Powell's scheme ends here. + # -------------------------------------------------------------------------------------------------- # + + # The following definition of jdrop is inspired by setdrop_tr in UOBYQA/NEWUOA/BOBYQA/LINCOA. + # It is simpler and works better than Powell's scheme. Note that we allow jdrop to be num_vars if + # ximproved is True, whereas Powell's code does not. + # See also (4.1) of Scheinberg-Toint-2010: Self-Correcting Geometry in Model-Based Algorithms for + # Derivative-Free Unconstrained Optimization, which refers to the strategy here as the "combined + # distance/poisedness criteria". + + # distsq[j] is the square of the distance from the jth vertex of the simplex to get "best" point so + # far, taking the trial point sim[:, num_vars] + d into account. + distsq = np.zeros(sim.shape[0]) + if ximproved: + distsq[:num_vars] = sum((sim[:, :num_vars] - np.tile(d, (num_vars, 1)).T)**2, axis=0) + distsq[num_vars] = sum(d**2) + else: + distsq[:num_vars] = sum(sim[:, :num_vars]**2, axis=0) + distsq[num_vars] = 0 + + weight = distsq + + # Other possible definitions of weight. They work almost the same as the one above. + # weight = max(1, max(25 * distsq / delta**2)) + # weight = max(1, max(10 * distsq / delta**2)) + # weight = max(1, max(1e2 * distsq / delta**2)) + # weight = max(1, max(distsq / max(rho, delta/10)**2)) + # weight = max(1, max(distsq / rho**2)) + + # If 0 <= j < num_vars, simid[j] is the value of the jth Lagrange function at d; the value of the + # (num_vars+1)th Lagrange function is 1 - sum(simid). [simid, 1 - sum(simid)] is the counterpart of + # vlag in UOBYQA and den in NEWUOA/BOBYQA/LINCOA. + simid = simi@d + score = weight * abs(np.array([*simid, 1 - sum(simid)])) + + # If ximproved is False (the new d does not render a "better" x), we set score[num_vars] = -1 to avoid + # jdrop = num_vars. This is not really needed if weight is defined to distsq to some power, in which case + # score[num_vars] = 0. We keep the code for robustness (in case the definition of weight changes later). + if not ximproved: + score[num_vars] = -1 + + if any(score > 0): + jdrop = np.where(score == max(score[~np.isnan(score)]))[0][0] + elif ximproved: + jdrop = np.argmax(distsq) + else: + jdrop = None + + return jdrop + +def assess_geo(delta, factor_alpha, factor_beta, sim, simi): + # This function checks if an interpolation set has acceptable geometry as + # (14) of the COBYLA paper + + # Calculate the values of sigma and eta + # veta[j] (0 <= j < n) is the distance between the vertices j and 0 (the best vertex) + # of the simplex. + # vsig[j] (0 <= j < n) is the distance from vertex j to its opposite face of the simplex. + # Thus vsig <= veta. + # N.B.: What about the distance from vertex N+1 to its opposite face? Consider the simplex + # {V_{N+1}, V_{N+1} + L*e_1,... v_{N+1} + L*e_N}, where V_{N+1} is vertex N+1, + # namely the current "best" point, [e_1, ..., e_n] is an orthogonal matrix, and L is a + # constant in the order of delta. This simplex is optimal in the sense that the interpolation + # system has the minimal condition number, i.e. 1. For this simplex, the distance from + # V_{N+1} to its opposite face is L/sqrt(n). + vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) # TODO: Check axis + veta = np.sqrt(np.sum(sim[:, :-1]**2, axis=0)) + adequate_geo = all(vsig >= factor_alpha * delta) and all(veta <= factor_beta * delta) + return adequate_geo \ No newline at end of file diff --git a/python/src/prima/cobyla/trustregion.py b/python/src/prima/cobyla/trustregion.py new file mode 100644 index 0000000000..676b0e0a39 --- /dev/null +++ b/python/src/prima/cobyla/trustregion.py @@ -0,0 +1,447 @@ +''' +This module provides subrouties concerning the trust-region calculations of COBYLA. + +Coded by Zaikun ZHANG (www.zhangzk.net) based on Powell's code and the COBYLA paper. + +Dedicated to late Professor M. J. D. Powell FRS (1936--2015). + +Python implementation by Nickolai Belakovski +''' + +import numpy as np +import numpy.typing as npt +from prima.common.consts import REALMIN, REALMAX, EPS +from prima.common.powalg import qradd_Rdiag, qrexc_Rdiag +from prima.common.linalg import isminor + +def trrad(delta_in, dnorm, eta1, eta2, gamma1, gamma2, ratio): + # This function updates the trust region radius according to ratio and dnorm. + + if ratio <= eta1: + delta = gamma1 * dnorm # Powell's UOBYQA/NEWOUA + # delta = gamma1 * delta_in # Powell's COBYLA/LINCOA + # delta = min(gamma1 * delta_in, dnorm) # Powell's BOBYQA + elif ratio <= eta2: + delta = max(gamma1 * delta_in, dnorm) # Powell's UOBYQA/NEWOUA/BOBYQA/LINCOA + else: + delta = max(gamma1 * delta_in, gamma2 * dnorm) # Powell's NEWUOA/BOBYQA + # delta = max(delta_in, gamma2 * dnorm) # Modified version. Works well for UOBYQA + # For noise-free CUTEst problems of <= 100 variables, Powell's version works slightly better + # than the modified one. + # delta = max(delta_in, 1.25*dnorm, dnorm + rho) # Powell's UOBYQA + # delta = min(max(gamma1 * delta_in, gamma2 * dnorm), gamma3 * delta_in) # Powell's LINCOA, gamma3 = np.sqrt(2) + + # For noisy problems, the following may work better. + # if ratio <= eta1: + # delta = gamma1 * dnorm + # elseif ratio <= eta2: # Ensure DELTA >= DELTA_IN + # delta = delta_in + # else: # Ensure DELTA > DELTA_IN with a constant factor + # delta = max(delta_in * (1 + gamma2) / 2, gamma2 * dnorm) + return delta + +def redrat(ared, pred, rshrink): + # This function evaluates the reduction ratio of a trust-region step, handling inf/nan properly. + if np.isnan(ared): + # This should not happen in unconstrained problems due to the moderated extreme barrier. + ratio = -REALMAX + elif np.isnan(pred) or pred <= 0: + # The trust-region subproblem solver fails in this rare case. Instead of terminating as Powell's + # original code does, we set ratio as follows so that the solver may continue to progress. + if ared > 0: + # The trial point will be accepted, but the trust-region radius will be shrunk if rshrink>0 + ratio = rshrink/2 + else: + # Set the ration to a large negative number to signify a bad trust-region step, so that the + # solver will check whether to take a geometry step or reduce rho. + ratio = -REALMAX + elif np.isposinf(pred) and np.isposinf(ared): + ratio = 1 # ared/pred = NaN if calculated directly + elif np.isposinf(pred) and np.isneginf(ared): + ration = -REALMAX # ared/pred = NaN if calculated directly + else: + ratio = ared/pred + return ratio + +def trstlp(A_in, b_in, delta): + ''' + This function calculated an n-component vector d by the following two stages. In the first + stage, d is set to the shortest vector that minimizes the greatest violation of the constraints + np.dot(A[:n, k], d) >= b(k), k = 0, 1, 2, ..., m-1 + subject to the Euclidean length of d being at most delta. If its length is strictly less than + delta, then the second stage uses the resultant freedom in d to minimize the objective function + np.dot(-A[:n, m], d) + subject to no increase in any greatest constraint violation. This notation allows the gradient of + the objective function to be regarded as the gradient of a constraint. Therefore the two stages + are distinguished by mcon == m and mcon > m respectively. + + It is possible but rare that a degeneracy may prevent d from attaining the target length delta. + + cviol is the largest constraint violation of the current d: max(b[:m] - A[:,:m].T@d, 0). TODO: Not sure if it should be an @, or what max(vector, scalar) means + icon is the index of a most violated constraint if cviol is positive. + + nact is the number of constraints in the active set and iact[0], ..., iact[nact-1] are their indices, + while the remainder of the iact contains a permutation of the remaining constraint indicies. + N.B.: nact <= min(num_constraints, num_vars). Obviously nact <= num_constraints. In addition, the constraints + in iact[0, ..., nact-1] have linearly independent gradients (see the comments above the instruction + that delete a constraint from the active set to make room for the new active constraint with index iact[icon]); + it can also be seen from the update of nact: starting from 0, nact is incremented only if nact < n. + + Further, Z is an orthogonal matrix whose first nact columns can be regarded as the result of + Gram-Schmidt applied to the active constraint gradients. For j = 0, 1, ..., nact-1, the number + zdota[j] is the scalar product of the jth column of Z with the gradient of the jth active + constraint. d is the current vector of variables and here the residuals of the active constraints + should be zero. Further, the active constraints have nonnegative Lagrange multipliers that are + held at the beginning of vmultc. The remaineder of this vector holds the residuals of the inactive + constraints at d, the ordering of the componenets of vmultc being in agreement with the permutation + of the indices of the constraints that is in iact. All these residuals are nonnegative, which is + achieved by the shift cviol that makes the least residual zero. + + N.B.: + 1. The algorithm was NOT documented in the COBYLA paper. A note should be written to introduce it! + 2. As a major part of the algorithm (see trstlp_sub), the code maintains and updates the QR + factorization of A[iact[:nact]], i.e. the gradients of all the active (linear) constraints. The + matrix Z is indeed Q, and the vector zdota is the diagonal of R. The factorization is updated by + Givens rotations when an index is added in or removed from iact. + 3. There are probably better algorithms available for this trust-region linear programming problem. + ''' + + A = A_in.copy() + b = b_in.copy() + vmultc = np.zeros(len(b)) + iact = np.zeros(len(b)) + nact = 0 + d = np.zeros(A_in.shape[0]) + z = np.zeros((len(d), len(d))) + num_constraints = A.shape[1] - 1 + + # Scale the problem if A contains large values. Otherwise floating point exceptions may occur. + # Note that the trust-region step is scale invariant. + for i in range(num_constraints+1): # Note that A.shape[1] == len(b) == num_constraints+1 != num_constraints + if maxval:=max(abs(A_in[:, i]) > 1e12): + modscal = max(2*REALMIN, 1/maxval) + A[:, i] *= modscal + b[i] *= modscal + + # Stage 1: minimize the 1+infinity constraint violation of the linnearized constraints. + iact, nact, d, vmultc, z = trstlp_sub(iact[:num_constraints], nact, 1, A[:, :num_constraints], b[:num_constraints], delta, d, vmultc[:num_constraints], z) + + # Stage 2: minimize the linearized objective without increasing the 1_infinity constraint violation. + iact, nact, d, vmultc, z = trstlp_sub(iact, nact, 2, A, b, delta, d, vmultc, z) + # TODO: I think I need to define vmultc and z, and I should double check what's going on with A_in vs A in the fortran code + return d + +def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z): + ''' + This subroutine does the real calculations for trstlp, both stage 1 and stage 2. + Major differences between stage 1 and stage 2: + 1. Initializiation. Stage 2 inherits the values of some variables from stage 1, so they are + initialized in stage 1 but not in stage 2. + 2. cviol. cviol is updated after at iteration in stage 1, while it remains a constant in stage2. + 3. sdirn. See the definition of sdirn in the code for details. + 4. optnew. The two stages have different objectives, so optnew is updated differently. + 5. step. step <= cviol in stage 1. + ''' + # zdasav = np.zeros(z.shape[1]) + vmultd = np.zeros(len(vmultc)) + + num_vars = A.shape[0] + mcon = A.shape[1] + zdota = np.zeros(z.shape[1]) + + # Initialize according to stage + if stage == 1: + iact = np.linspace(1, mcon, mcon) + nact = 0 + d = 0 + cviol = max(*b, 0) + vmultc = cviol - b + z = np.eye(num_vars) + if mcon == 0 or cviol <= 0: + # Check whether a quick return is possible. Make sure the in-outputs have been initialized. + return iact, nact, d, vmultc, z + + if all(np.isnan(b)): + return iact, nact, d, vmultc, z + else: + icon = np.where(b == max(b[~np.isnan(b)]))[0][0] + num_constraints = mcon + sdirn = np.zeros(len(d)) + else: + if np.dot(d, d) >= delta**2: + # Check whether a quick return is possible. + return iact, nact, d, vmultc, z + + iact[mcon-1] = mcon + vmultc[mcon-1] = 0 + num_constraints = mcon - 1 + icon = mcon - 1 + + # In Powell's code, stage 2 uses the zdota and cviol calculated by stage1. Here we recalculate + # them so that they need not be passed from stage 1 to 2, and hence the coupling is reduced. + cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + zdota[:nact] = [np.dot(z[:, k], A[:, iact[k]]) for k in range(nact)] + + # More initialization + optold = REALMAX + nactold = nact + nfail = 0 + + # Zaikun 20211011: vmultd is computed from scratch at each iteration, but vmultc is inherited + + # Powell's code can encounter infinite cycling, which did happen when testing the following CUTEst + # problems: DANWOODLS, GAUSS1LS, GAUSS2LS, GAUSS3LS, KOEBHELB, TAX13322, TAXR13322. Indeed, in all + # these cases, Inf/NaN appear in d due to extremely large values in A (up to 10^219). To resolve + # this, we set the maximal number of iterations to maxiter, and terminate if Inf/NaN occurs in d. + maxiter = min(10000, 100*max(num_constraints, num_vars)) + for iter in range(maxiter): + if stage == 1: + optnew = cviol + else: + optnew = -np.dot(d, A[:, mcon-1]) + + # End the current stage of the calculation if 3 consecutive iterations have either failed to + # reduce the best calculated value of the objective function or to increase the number of active + # constraints since the best value was calculated. This strategy prevents cycling, but there is + # a remote possibility that it will cause premature termination. + if optnew < optold or nact > nactold: + nactold = nact + nfail = 0 + else: + nfail += 1 + optold = min(optold, optnew) + if nfail == 3: + break + + # If icon exceeds nact, then we add the constraint with index iact[icon] to the active set. + # Apply Givens rotations so that the last num_vars-nact-1 columns of Z are orthogonal to the gradient + # of the new constraint, a scalar product being set to 0 if its nonzero value could be due to + # computer rounding errors, which is tested by ISMINOR. + if icon > nact: + # zdasav[:nact] = zdota + nactsav = nact + z, zdota, nact = qradd_Rdiag(A[:, iact[icon]], z, zdota, nact) # May update nact to nact+1 + # Indeed it suffices to pass zdota[:min(num_vars, nact+1)] to qradd as follows: + # qradd(A[:, iact[icon]], z, zdota[:min(num_vars, nact+1)], nact) + + if nact == nactsav + 1: + # N.B.: It is possible to index arrays using [nact, icon] when nact == icon. + # Zaikun 20211012: Why should vmultc[nact] = 0? + if nact != icon: + vmultc[[icon, nact]] = vmultc[nact], 0 + iact[[icon, nact]] = iact[[nact, icon]] + else: + vmultc[nact] = 0 + else: + # Zaikun 20211011: + # 1. vmultd is calculated from scratch for the first time (out of 2) in one iteration. + # 2. Note that iact has not been updated to replace iact[nact] with iact[icon]. Thus + # A[:, iact[:nact]] is the UNUPDATED version before qradd (note Z[:, :nact] remains the + # same before and after qradd). Therefore if we supply zdota to lsqr (as Rdiag) as + # Powell did, we should use the UNUPDATED version, namely zdasav. + # vmultd[:nact] = lsqr(A[:, iact[:nact]], A[:, iact[icon]], z[:, :nact], zdasav[:nact]) + vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], A[:, iact[icon]])[0] + if not any(np.logical_and(vmultd[:nact] > 0, iact[:nact] <= num_constraints)): + # N.B.: This can be triggered by nact == 0 (among other possibilities)! This is + # important, because nact will be used as an index in the sequel. + break + # vmultd[nact+1:mcon] is not used, but we have to initialize it in Fortran, or compilers + # complain about the where construct below (another solution: restrict where to 1:nact). + # TODO: How does this apply to Python? + vmultd[nact:mcon] = -1 # len(vmultd) == mcon + + # Revise the Lagrange multipliers. The revision is not applicable to vmultc[nact:num_constraints]. + fracmult = [vmultc[i]/vmultd[i] if vmultd[i] > 0 and iact[i] <= num_constraints else REALMAX for i in range(nact)] + # Only the places with vmultd > 0 and iact <= m is relevant below, if any. + frac = min(fracmult[:nact]) # fracmult[nact:mcon] may contain garbage + vmultc[:nact] = max(0, vmultc[:nact] - frac*vmultd[:nact]) + + # Zaikun 20210811: Powell's code includes the following, which is IMPOSSIBLE TO REACH + # !if (icon < nact) then + # ! do k = icon, nact-1 + # ! hypt = sqrt(zdota(k+1)**2+inprod(z(:, k), A(:, iact(k+1)))**2) + # ! grot = planerot([zdota(k+1), inprod(z(:, k), A(:, iact(k+1)))]) + # ! z(:, [k, k+1]) = matprod(z(:, [k+1, k]), transpose(grot)) + # ! zdota([k, k+1]) = [hypt, (zdota(k+1) / hypt) * zdota(k)] + # ! end do + # ! iact(icon:nact) = [iact(icon+1:nact), iact(icon)] + # ! vmultc(icon:nact) = [vmultc(icon+1:nact), vmultc(icon)] + # !end if + + # Reorder the active constraints so that the one to be replaced is at the end of the list. + # Exit if the new value of zdota[nact] is not acceptable. Powell's condition for the + # following If: non abs(zdota[nact]) > 0. Note that it is different from + # 'abs(zdota[nact]) <=0)' as zdota[nact] can be NaN. TODO: Does that make sense with Python? + # It seems Python returns false in both cases, whereas Fortran returns true for abs(NaN) > 0 + # and false for the other one. + # N.B.: We cannot arrive here with nact == 0, which should have triggered a break above + if np.isnan(zdota[nact]) or abs(zdota[nact]) <= EPS**2: + break + vmultc[[icon, nact]] = 0, frac # vmultc[[icon, nact]] is valid as icon > nact + iact[[icon, nact]] = iact[[nact, icon]] + # end if nact == nactsav + 1 + + # In stage 2, ensure that the objective continues to be treated as the last active constraint. + # Zaikun 20211011, 20211111: Is it guaranteed for stage 2 that iact[nact-1] = mcon when + # iact[nact] != mcon??? If not, then how does the following procedure ensure that mcon is + # the last of iact[:nact]? + if stage == 2 and iact[nact] != mcon: + assert nact > 1, "nact > 1 is required" + z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], nact - 1) + # Indeed, it suffices to pass Z[:, :nact] to qrexc as follows: + # z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], nact - 1) + iact[[nact-1, nact]] = iact[[nact, nact-1]] + vmultc[[nact-1, nact]] = vmultc[[nact, nact-1]] + # Zaikun 20211117: It turns out that the last few lines do not guarantee iact[nact] == num_vars in + # stage 2; the following test cannot be passed. IS THIS A BUG?! + # assert iact[nact] == mcon or stage == 1, 'iact[nact] must == mcon in stage 2' + + # Powell's code does not have the following. It avoids subsequent floating points exceptions. + if np.isnan(zdota[nact]) or abs(zdota[nact]) <= EPS**2: + break + + # Set sdirn to the direction of the next change to the current vector of variables + # Usually during stage 1 the vector sdirn gives a search direction that reduces all the + # active constraint violations by one simultaneously. + if stage == 1: + sdirn -= ((np.dot(sdirn, A[:, iact[nact]]) - 1)/zdota[nact])*z[:, nact] + else: + sdirn = 1/zdota[nact]*z[:, nact] + else: # icon <= nact + # Delete the constraint with the index iact[icon] from the active set, which is done by + # reordering iact[icont:nact] into [iact[icon+1:nact], iact[icon]] and then reduce nact to + # nact - 1. In theory, icon > 0. + assert icon > 0, "icon > 0 is required" + z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], icon) # qrexc does nothing if icon == nact + # Indeed, it suffices to pass Z[:, :nact] to qrexc as follows: + # z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], icon) + iact[icon:nact+1] = iact[icon+1:nact+1], iact[icon] + vmultc[icon:nact+1] = vmultc[icon+1:nact+1], vmultc[icon] + nact -= 1 + + # Powell's code does not have the following. It avoids subsequent exceptions. + # Zaikun 20221212: In theory, nact > 0 in stage 2, as the objective function should always + # be considered as an "active constraint" --- more precisely, iact[nact] = mcon. However, + # looking at the copde, I cannot see why in stage 2 nact must be positive after the reduction + # above. It did happen in stage 1 that nact became 0 after the reduction --- this is + # extremely rare, and it was never observed until 20221212, after almost one year of + # random tests. Maybe nact is theoretically positive even in stage 1? + assert stage == 1 or nact > 0, "nact > 0 is required in stage 2" + if stage == 2 and nact < 0: + break # If this case ever occurs, we have to break, as nact is used as an index below. + if nact > 0: + if np.isnan(zdota[nact]) or abs(zdota[nact]) <= EPS**2: + break + + # Set sdirn to the direction of the next change to the current vector of variables. + if stage == 1: + sdirn -= np.dot(sdirn, z[:, nact+1]) * z[:, nact+1] + # sdirn is orthogonal to z[:, nact+1] + else: + sdirn = 1/zdota[nact] * z[:, nact] + # end if icon > nact + + # Calculate the step to the trust region boundary or take the step that reduces cviol to 0. + # ----------------------------------------------------------------------------------------- # + # The following calculation of step is adopted from NEWUOA/BOBYQA/LINCOA. It seems to improve + # the performance of COBYLA. We also found that removing the precaution about underflows is + # beneficial to the overall performance of COBYLA --- the underflows are harmless anyway. + dd = delta**2 - np.dot(d, d) + ss = np.dot(sdirn, sdirn) + sd = np.dot(sdirn, d) + if dd <= 0 or ss <= EPS * delta**2 or np.isnan(sd): + break + # sqrtd: square root of a discriminant. The max avoids sqrtd < abs(sd) due to underflow + sqrtd = max(np.sqrt(ss*dd + sd**2), abs(sd), np.sqrt(ss * dd)) + if sd > 0: + step = dd / (sqrtd + sd) + else: + step = (sqrtd - sd) / ss + # step < 0 should not happen. Step can be 0 or NaN when, e.g., sd or ss becomes inf + if step <= 0 or not np.isfinite(step): + break + + # Powell's approach and comments are as follows. + # -------------------------------------------------- # + # The two statements below that include the factor eps prevent + # some harmless underflows that occurred in a test calculation + # (Zaikun: here, eps is the machine epsilon; Powell's original + # code used 1.0e-6, and Powell's code was written in single + # precision). Further, we skip the step if it could be 0 within + # a reasonable tolerance for computer rounding errors. + + # !dd = delta**2 - sum(d**2, mask=(abs(d) >= EPS * delta)) + # !ss = inprod(sdirn, sdirn) + # !if (dd <= 0) then + # ! exit + # !end if + # !sd = inprod(sdirn, d) + # !if (abs(sd) >= EPS * sqrt(ss * dd)) then + # ! step = dd / (sqrt(ss * dd + sd**2) + sd) + # !else + # ! step = dd / (sqrt(ss * dd) + sd) + # !end if + # -------------------------------------------------- # + + if stage == 1: + if isminor(cviol, step): + break + step = min(step, cviol) + + # Set dnew to the new variables if step is the steplength, and reduce cviol to the corresponding + # maximum residual if thage 1 is being done + dnew = d + step * sdirn + if stage == 1: + cviol = max(max(b[iact[:nact]] - dnew@A[:, iact[:nact]]), 0) + # N.B.: cviol will be used when calculating vmultd[nact+1:mcon]. + + # Zaikun 20211011: + # 1. vmultd is computed from scratch for the second (out of 2) time in one iteration. + # 2. vmultd[:nact] and vmultd[nact:mcon] are calculated separately with no coupling. + # 3. vmultd will be calculated from scratch again in the next iteration. + # Set vmultd to the vmultc vector that would occur if d became dnew. A device is included to + # force multd[k] = 0 if deviations from this value can be attributed to computer rounding + # errors. First calculate the new Lagrange multipliers. + # vmultd[:nact] = lsqr(A[:, iact[:nact]], dnew, z[:, :nact], zdota[:nact]) + vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], dnew)[0] + if stage == 2: + vmultd[nact-1] = max(0, vmultd[nact-1]) # This seems never activated. + # Complete vmultd by finding the new constraint residuals. (Powell wrote "Complete vmultc ...") + cvshift = dnew@A[:, iact] - b[iact] + cviol # Only cvshift[nact+1:mcon] is needed + # TODO: The fortran code has a matprod for two vectors, not sure what that's supposed to return? cvshift is supposed to be a vector + cvsabs = abs(dnew)@abs(A[:, iact]) + abs(b[iact]) + cviol + cvshift[isminor[cvshift, cvsabs]] = 0 + vmultd[nact+1:mcon] = cvshift[nact+1:mcon] + + # Calculate the fraction of the step from d to dnew that will be taken + fracmult = [vmultc[i]/(vmultc[i] - vmultd[i]) if vmultd[i] < 0 else REALMAX for i in range(nact)] + # Only the places with vmultd < 0 are relevant below, if any. + icon = np.where(fracmult == min(1, *fracmult))[0][0] - 1 + # TODO: The above code is a little strange. It seems like icon could be -1 which in our context is invalid + frac = min(1, *fracmult) + + # Update d, vmultc, and cviol + dold = d + d = (1 - frac)*d + frac * dnew + # Break in the case of inf/nan in d. + if not np.isfinite(d).all(): + d = dold # Should we restore also iact, nact, vmultc, and z? + break + + vmultc = max(0, max((1 - frac)*vmultc + frac*vmultd)) + if stage == 1: + # cviol = (1 - frac) * cvold + frac * cviol # Powell's version + # In theory, cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0), yet the + # cviol updated as above can be quite different from this value if A has huge entries (e.g., > 1e20) + cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0) + + if icon < 0 or icon >= mcon: + # In Powell's code, the condition is icon == 0. Indeed, icon < 0 cannot hold unless + # fracmult contains only nan, which should not happen; icon >= mcon should never occur. + break + return iact, nact, d, vmultc, z + + + + + diff --git a/python/src/prima/cobyla/update.py b/python/src/prima/cobyla/update.py new file mode 100644 index 0000000000..8a3273853b --- /dev/null +++ b/python/src/prima/cobyla/update.py @@ -0,0 +1,164 @@ +from prima.common.consts import DAMAGING_ROUNDING, INFO_DEFAULT +import numpy as np + + +def updatexfc(jdrop, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi): + # This function revises the simplex by updating the elements of sim, simi, fval, conmat, and cval + + itol = 1 + + num_constraints = len(constr) + num_vars = sim.shape[0] + + # Do nothing when jdrop is None. This can only happen after a trust-region step. + if jdrop is None: # jdrop is None is impossible if the input is correct. + return conmat, cval, fval, sim, simi, INFO_DEFAULT + + sim_old = sim + simi_old = simi + + if jdrop < num_vars: + sim[:, jdrop] = d + simi_jdrop = simi[jdrop, :] / np.dot(simi[jdrop, :], d) + simi -= np.outer(simi@d, simi_jdrop) + simi[jdrop, :] = simi_jdrop + else: # jdrop == num_vars + sim[:, num_vars] += d + sim[:, :num_vars] -= np.tile(d, (num_vars, 1)).T + simid = simi@d + sum_simi = sum(simi, axis=0) + simi += np.outer(simid, sum_simi / (1 - sum(simid))) + + erri = max(abs(simi@sim[:, :num_vars] - np.eye(num_vars))) + if erri > 0.1 * itol or np.isnan(erri): + simi_test = np.linalg.inv(sim[:, :num_vars]) + erri_test = max(abs(simi_test@sim[:, :num_vars] - np.eye(num_vars))) + if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)): + simi = simi_test + erri = erri_test + + if erri <= itol: + fval[jdrop] = f + conmat[:, jdrop] = constr + cval[jdrop] = cstrv + # Switch the best vertex to the pole position sim[: num_vars] if it is not there already + conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim, simi) + else: + info = DAMAGING_ROUNDING + sim = sim_old + simi = simi_old + + + return sim, simi, fval, conmat, cval, info + +def findpole(cpen, cval, fval): + # This subroutine identifies the best vertex of the current simplex with respect to the merit + # function PHI = F + CPEN * CSTRV. + n = len(fval) - 1 # used for debugging which I have not implemented yet + + # Identify the optimal vertex of the current simplex + jopt = len(fval) - 1 + phi = fval + cpen * cval + phimin = min(phi) + joptcandidate = np.argmin(phi) + if phi[joptcandidate] < phi[jopt]: + jopt = joptcandidate + if cpen <= 0 and any(cval < cval[jopt] and phi <= phimin): + # jopt is the index of the minimum value of cval + # on the set of cval values where phi <= phimin + jopt = np.where(cval == min(cval[phi <= phimin]))[0][0] + return jopt + + +def updatepole(cpen, conmat, cval, fval, sim, simi): + #--------------------------------------------------------------------------------------------------! + # This subroutine identifies the best vertex of the current simplex with respect to the merit + # function PHI = F + CPEN * CSTRV, and then switch this vertex to SIM(:, N + 1), which Powell called + # the "pole position" in his comments. CONMAT, CVAL, FVAL, and SIMI are updated accordingly. + # + # N.B. 1: In precise arithmetic, the following two procedures produce the same results: + # 1) apply UPDATEPOLE to SIM twice, first with CPEN = CPEN1 and then with CPEN = CPEN2; + # 2) apply UPDATEPOLE to SIM with CPEN = CPEN2. + # In finite-precision arithmetic, however, they may produce different results unless CPEN1 = CPEN2. + # + # N.B. 2: When JOPT == N+1, the best vertex is already at the pole position, so there is nothing to + # switch. However, as in Powell's code, the code below will check whether SIMI is good enough to + # work as the inverse of SIM(:, 1:N) or not. If not, Powell's code would invoke an error return of + # COBYLB; our implementation, however, will try calculating SIMI from scratch; if the recalculated + # SIMI is still of poor quality, then UPDATEPOLE will return with INFO = DAMAGING_ROUNDING, + # informing COBYLB that SIMI is poor due to damaging rounding errors. + # + # N.B. 3: UPDATEPOLE should be called when and only when FINDPOLE can potentially returns a value + # other than N+1. The value of FINDPOLE is determined by CPEN, CVAL, and FVAL, the latter two being + # decided by SIM. Thus UPDATEPOLE should be called after CPEN or SIM changes. COBYLA updates CPEN at + # only two places: the beginning of each trust-region iteration, and when REDRHO is called; + # SIM is updated only by UPDATEXFC, which itself calls UPDATEPOLE internally. Therefore, we only + # need to call UPDATEPOLE after updating CPEN at the beginning of each trust-region iteration and + # after each invocation of REDRHO. + #--------------------------------------------------------------------------------------------------! + # List of local arrays (including function-output arrays; likely to be stored on the stack): + # REAL(RP) :: CONMAT_OLD(M, N+1), CVAL_OLD(N+1), FVAL_OLD(N+1), SIM_JDROP(N), SIM_OLD(N, N+1), & + # & SIMI_OLD(N, N), SIMI_TEST(N, N) + # Size of local arrays: REAL(RP)*((M+N+2)*(N+1) + N + 2*N^2) (TO BE REDUCED TO 2*N +N^2 by removing + # *_OLD. Should we allocate SIMI_TEST? Yes, it is needed quite rarely!) + #--------------------------------------------------------------------------------------------------! + num_constraints = conmat.shape[0] + num_vars = sim.shape[0] + info = INFO_DEFAULT + itol = 1 # Why is this a separate variable? It's not even an input + + # Identify the optimal vertex of the current simplex. + jopt = findpole(cpen, cval, fval) + + # Switch the best vertex to the pole position sim[:, -1] if it is not there already. Then update + # CONMAT etc. Before the update, save a copy of CONMAT etc. If the update is unsuccessful due to + # damaging rounding errors, we restore them for COBYLA to extract X/F/C from the undamaged data. + fval_old = fval + conmat_old = conmat + cval_old = cval + sim_old = sim + simi_old = simi + if 0 <= jopt < num_vars: + # Unless there is a bug in findpole it is guaranteed that jopt is >= 0 + # When jopt == num_vars+1, there is nothing to switch, and simi[jopt, :] + # would be illegal + fval[[jopt, -1]] = fval[[-1, jopt]] + conmat[:, [jopt, -1]] = conmat[:, [-1, jopt]] # Exchange CONMAT[:, JOPT] AND CONMAT[:, -1] + cval[[jopt, -1]] = cval[[-1, jopt]] + sim[:, -1] += + sim[:, jopt] + sim_jopt = sim[:, jopt].copy() + sim[:, jopt] = 0 + sim[:, :num_vars] -= np.tile(sim_jopt, (num_vars, 1)).T + # The above update is equivalent to multiplying sim[:, :num_vars] by + # from the right side by a matrix whose jopt-th row is [-1, -1, ... -1], + # while all other rows are the same as those of the identity matrix. It + # is easy to check that the inverse of this matrix is itself. Therefore + # simi should be updated by a multiplication with this matrix (i.e. its inverse) + # from the left side, as is done in the following line. The jopt-th row of + # the updated simi is minus the sum of all rows of the original simi, whereas + # all the other rows remain unchanged. + simi[jopt, :] - -sum(simi, axis=0) + # TODO: I don't quite understand the above comment, I'd like to check it + # againt the fortran code + + # Check whether SIMI is a poor approximation to the inverse of sim[:, :num_vars] + # Calculate simi from scratch if the current one is damaged by rounding errors. + erri = max(abs(simi@sim[:, :num_vars] - np.eye(num_vars))) + if erri > 0.1 * itol or np.isnan(erri): + simi_test = np.linalg.inv(sim[:, :num_vars]) + erri_test = max(abs(simi_test@sim[:, :num_vars] - np.eye(num_vars))) + if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)): + simi = simi_test + erri = erri_test + + # If the recalculated simi is still damaged, the restore the data to the + # version before the update + if erri > itol or np.isnan(erri): + info = DAMAGING_ROUNDING + fval = fval_old + conmat = conmat_old + cval = cval_old + sim = sim_old + simi = simi_old + + return conmat, cval, fval, sim, simi, info \ No newline at end of file diff --git a/python/src/prima/common/__init__.py b/python/src/prima/common/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/src/prima/common/consts.py b/python/src/prima/common/consts.py new file mode 100644 index 0000000000..fde68b38aa --- /dev/null +++ b/python/src/prima/common/consts.py @@ -0,0 +1,22 @@ +import numpy as np + +REALMIN = np.finfo(float).tiny +REALMAX = np.finfo(float).max +RHOBEG_DEFAULT = 1 +RHOEND_DEFAULT = 1e-6 +FUNCMAX = 2.0**100 +CONSTRMAX = FUNCMAX +EPS = np.finfo(float).eps +CTOL_DEFAULT = EPS +CWEIGHT_DEFAULT = 1e8 +FTARGET_DEFAULT = -REALMAX +IPRINT_DEFAULT = False +MAXFUN_DIM_DEFAULT = 500 +INFO_DEFAULT = 0 +SMALL_TR_RADIUS = 0 # Why is this same as default? +MAXTR_REACHED = 20 +DAMAGING_ROUNDING = 7 +NAN_INF_X = -1 +NAN_INF_F = -2 +FTARGET_ACHIEVED = 1 +MAXFUN_REACHED = 3 \ No newline at end of file diff --git a/python/src/prima/common/evaluate.py b/python/src/prima/common/evaluate.py new file mode 100644 index 0000000000..d16cabf419 --- /dev/null +++ b/python/src/prima/common/evaluate.py @@ -0,0 +1,36 @@ +import numpy as np +from prima.common.consts import FUNCMAX, CONSTRMAX + +# This is a module evaluating the objective/constraint function with +# Nan/Inf handling. + + +def moderatex(x): + np.nan_to_num(x, copy=False, nan=FUNCMAX) + x = np.clip(x, -np.finfo(float).max, np.finfo(float).max) + return x + +def moderatef(f): + f = FUNCMAX if np.isnan(f) else f + return min(FUNCMAX, f) + +def moderatec(c): + np.nan_to_num(c, copy=False, nan=-CONSTRMAX) + c = np.clip(c, -CONSTRMAX, CONSTRMAX) + return c + + +def evaluate(calfun, x): + """ + This function evaluates calfun at x, setting to f the objective function value. + Nan/Inf are handled by a moderated extreme barrier + """ + + assert not any(np.isnan(x)), "x contains NaN" + + f, constr = calfun(moderatex(x)) + f = moderatef(f) + constr = moderatec(constr) + cstrv = max([*-constr, 0]) + + return f, constr, cstrv \ No newline at end of file diff --git a/python/src/prima/common/linalg.py b/python/src/prima/common/linalg.py new file mode 100644 index 0000000000..c6dcbec7c7 --- /dev/null +++ b/python/src/prima/common/linalg.py @@ -0,0 +1,49 @@ +import numpy as np + +def planerot(x): + ''' + As in MATLAB, planerot(x) returns a 2x2 Givens matrix G for x in R2 so that Y=G@x has Y[1] = 0. + Roughly speaking, G = np.array([[x[0]/R, x[1]/R], [-x[1]/R, x[0]/R]]), where R = np.linalg.norm(x). + 0. We need to take care of the possibilities of R=0, Inf, NaN, and over/underflow. + 1. The G defined above is continuous with respect to X except at 0. Following this definition, + G = np.array([[np.sign(x[0]), 0], [0, np.sign(x[0])]]) if x[1] == 0, + G = np.array([[0, np.sign(x[1])], [np.sign(x[1]), 0]]) if x[0] == 0 + Yet some implementations ignore the signs, leading to discontinuity and numberical instability. + 2. Difference from MATLAB: if x contains NaN of consists of only Inf, MATLAB returns a NaN matrix, + but we return an identity matrix or a matrix of +/-np.sqrt(2). We intend to keep G always orthogonal. + ''' + assert len(x) == 2, "x must be a 2-vector" + + # Define c = x[0]/R and s = x[1]/R with R = hypot(x). Handle Inf/NaN + # if any(np.isnan(x)): + + # c = 1 + # s = 0 + # elif all(np.isinf(x)): + + # TODO: Handle inf/nan + R = np.linalg.norm(x) + G = np.array([[x[0]/R, x[1]/R], [-x[1]/R, x[0]/R]]) + return G + + +def isminor(x, ref): + ''' + This function tests whether x is minor compared to ref. It is used by Powell, e.g., in COBYLA. + In precise arithmetic, isminor(x, ref) is true if and only if x == 0; in floating point + arithmetic, isminor(x, ref) is true if x is 0 or its nonzero value can be attributed to + computer rounding errrors according to ref. + Larger sensitivity means the function is more strict/precise, the value 0.1 being due to Powell. + + For example: + isminor(1e-20, 1e300) -> True, because in floating point arithmetic 1e-20 cannot be added to + 1e300 without being rounded to 1e300. + isminor(1e300, 1e-20) -> False, because in floating point arithmetic adding 1e300 to 1e-20 + dominates the latter number. + isminor(3, 4) -> False, because 3 can be added to 4 without being rounded off + ''' + + sensitivity = 0.1 + refa = abs(ref) + sensitivity * abs(x) + refb = abs(ref) + 2 * sensitivity * abs(x) + return abs(ref) >= refa or refa >= refb diff --git a/python/src/prima/common/output.py b/python/src/prima/common/output.py new file mode 100644 index 0000000000..151b150eb9 --- /dev/null +++ b/python/src/prima/common/output.py @@ -0,0 +1,20 @@ +def fmsg(solver, iprint, nf, f, x, cstrv=None, constr=None): + if abs(iprint) < 3: + return + elif iprint > 0: + output = "stdout" + else: + output = "file" + filename = solver + "_output.txt" + + # Decide whether the problem is truly constrained. + if constr is None: + is_constrained = cstrv is not None + else: + is_constrained = bool(len(constr)) + + # Decide the constraint violation + + # Will finish this later, not important for now + + diff --git a/python/src/prima/common/powalg.py b/python/src/prima/common/powalg.py new file mode 100644 index 0000000000..1a0a469067 --- /dev/null +++ b/python/src/prima/common/powalg.py @@ -0,0 +1,109 @@ +import numpy as np +from prima.common.linalg import isminor, planerot +from prima.common.consts import EPS + + +def qradd_Rdiag(c, Q, Rdiag, n): + ''' + This function updates the QR factorization of an MxN matrix A of full column rank, attempting to + add a new column C to this matrix as the LAST column while maintaining the full-rankness. + Case 1. If C is not in range(A) (theoretically, it implies N < M), then the new matrix is np.hstack([A, C]) + Case 2. If C is in range(A), then the new matrix is np.hstack([A[:, :n-1], C]) + N.B.: + 0. Instead of R, this subroutine updates Rdiag, which is np.diag(R), with a size at most M and at + least min(m, n+1). The number is min(m, n+1) rather than min(m, n) as n may be augmented by 1 in + the function. + 1. With the two cases specified as above, this function does not need A as an input. + 2. The function changes only Q[:, nsave:m] (nsave is the original value of n) and + R[:, n-1] (n takes the updated value) + 3. Indeed, when C is in range(A), Powell wrote in comments that "set iOUT to the index of the + constraint (here, coloumn of A --- Zaikun) to be deleted, but branch if no suitable index can be + found". The idea is to replace a column of A by C so that the new matrix still has full rank + (such a column must exist unless C = 0). But his code essentially sets iout=n always. Maybe he + found this worked well enough in practice. Meanwhile, Powell's code includes a snippet that can + never be reached, which was probably intended to deal with the case that IOUT != n + ''' + m = Q.shape[1] + nsave = n # Needed for debugging (only) + + # As in Powell's COBYLA, CQ is set to 0 at the positions with CQ being negligible as per ISMINOR. + # This may not be the best choice if the subroutine is used in other contexts, e.g. LINCOA. + cq = c @ Q + cqa = abs(c) @ abs(Q) + # The line below basically makes an element of cq 0 if adding it to the corresponding element of + # cqa does not change the latter. + cq = np.array([0 if isminor(cqi, cqai) else cqi for cqi, cqai in zip(cq, cqa)]) + + # Update Q so that the columns of Q[:, n+1:m] are orthogonal to C. This is done by applying a 2D + # Givens rotation to Q[:, [k, k+1]] from the right to zero C' @ Q[:, k+1] out for K=n+1, ... m-1. + # Nothing will be done if n >= m-1 + for k in range(m-1, n, -1): + if abs(cq[k+1]) > 0: + # Powell wrote cq[k+1] != 0 instead of abs. The two differ if cq[k+1] is NaN. + # If we apply the rotation below when cq[k+1] = 0, then cq[k] will get updated to |cq[k]|. + G = planerot(cq[k, k+1]) # TODO: Implement planerot + Q[:, [k, k+1]] = Q[:, [k, k+1]] @ G.T + cq[k] = np.linalg.norm(cq[k:k+2]) + # TODO: I have no idea if I'm handling these indices correctly. Will figure it out at debugging. + + # Augment n by 1 if C is not in range(A) + # The two ifs cannot be merged as Fortran may evaluate cq[n+1] even if n>=m, leading to a segfault. + # TODO: Reword the above as appropriate for Python. RangeError as opposed to segafult perhaps + if n < m: + # Powell's condition for the following if: cq[n+1] != 0 + if abs(cq[n+1]) > EPS*2 and not isminor(cq[n+1], cqa[n+1]): + n += 1 + + # Update Rdiag so that Rdiag[n] = cq[n] = npdot(c, q[:, n]). Note that N may be been augmented. + if n >= 1 and n <= m: # n > m should not happen unless the input is wrong + Rdiag[n] = cq[n] + return Q, Rdiag, n + + +def qrexc_Rdiag(A, Q, Rdiag, i): # Used in COBYLA + ''' + This function updates the QR factorization for an MxN matrix A=Q@R so that the updated Q and + R form a QR factorization of [A_0, ..., A_{I-1}, A_{I+1}, ..., A_{N-1}, A_I] which is the matrix + obtained by rearranging columns [I, I+1, ... N-1] of A to [I+1, ..., N-1, I]. Here A is ASSUMED TO + BE OF FULL COLUMN RANK, Q is a matrix whose columns are orthogonal, and R, which is not present, + is an upper triangular matrix whose diagonal entries are nonzero. Q and R need not be square. + N.B.: + 0. Instead of R, this function updates Rdiag, which is np.diag(R), the size being n. + 1. With L = Q.shape[1] = R.shape[0], we have M >= L >= N. Most often L = M or N. + 2. This function changes only Q[:, i:] and Rdiag[i:] + ''' + m, n = A.shape + + + if i < 0 or i >= n: + return Q, Rdiag + + # Let R be the upper triangular matrix in the QR factorization, namely R = Q.T@A. + # For each k, find the Givens rotation G with G@(R[k:k+2, :]) = [hypt, 0], and update Q[:, k:k+2] + # to Q[:, k:k+2]@(G.T). Then R = Q.T@A is an upper triangular matrix as long as A[:, [k, k+1]] is + # updated to A[:, [k+1, k]]. Indeed, this new upper triangular matrix can be obtained by first + # updating R[[k, k+1], :] to G@(R[[k, k+1], :]) and then exchanging its columns K and K+1; at the same + # time, entries k and k+1 of R's diagonal Rdiag become [hypt, -(Rdiag[k+1]/hypt)*RDiag[k]]. + # After this is done for each k = 0, ..., n-2, we obtain the QR factorization of the matrix that + # rearranges columns [i, i+1, ... n-1] of A as [i+1, ..., n-1, i]. + # Powell's code, however, is slightly different: before everything, he first exchanged columns k and + # k+1 of Q (as well as rows k and k+1 of R). This makes sure that the entries of the update Rdiag + # are all positive if it is the case for the original Rdiag. + # TODO: I need some confidence that the i coming into this function is correct + for k in range(i, n): + G = planerot(Rdiag[k+1], np.dot(Q[:, k], A[:, k+1])) + Q[:, [k, k+1]] = Q[:, [k+1, k]]@(G.T) + # Powell's code updates Rdiag in the following way: + # hypt = np.sqrt(Rdiag[k+1]**2 + np.dot(Q[:, k], A[:, k+1])**2) + # Rdiag[[k, k+1]] = [hypt, (Rdiag[k+1]/hypt)*Rdiag[k]] + # Note that Rdiag[n-1] inherits all rounding in Rdiag[i:n-1] and Q[:, i:n-1] and hence contains + # significant errors. Thus we may modify Powell's code to set only Rdiag[k] = hypt here and then + # calculate Rdiag[n] by an inner product after the loop. Nevertheless, we simple calculate RDiag + # from scratch below. + + # Calculate Rdiag from scratch + Rdiag = [np.dot(Q[:, k], A[:, k+1]) for k in range(i, n-2)] + Rdiag.append(np.dot(Q[:, n], A[:, i])) + Rdiag = np.array(Rdiag) + + return Q, Rdiag \ No newline at end of file diff --git a/python/src/prima/common/selectx.py b/python/src/prima/common/selectx.py new file mode 100644 index 0000000000..0edde534e0 --- /dev/null +++ b/python/src/prima/common/selectx.py @@ -0,0 +1,179 @@ +''' +This module provides subroutines that ensure the returned X is optimal among all the calculted +points in the sense that no other point achieves both lower function value and lower constraint +violation at the same time. This module is needed only in the constrained case. + +Coded by Zaikun ZHANG (www.zhangzk.net). + +Python implementation by Nickolai Belakovski +''' + +import numpy as np +import numpy.typing as npt +from prima.common.consts import EPS, CONSTRMAX, REALMAX, FUNCMAX + +def isbetter00(f1: float, c1: float, f2: float, c2: float, ctol: float) -> bool: + isbetter = False + isbetter = isbetter or (any(np.isnan([f1, c1])) and not any(np.isnan([f2, c2]))) + isbetter = isbetter or (f1 < f2 and c1 <= c2) + isbetter = isbetter or (f1 <= f2 and c1 < c2) + # If C1 <= CTOL and C2 is significantly larger/worse than CTOL, i.e., C2 > MAX(CTOL,CREF), + # then FC1 is better than FC2 as long as F1 < REALMAX. Normally CREF >= CTOL so MAX(CTOL, CREF) + # is indeed CREF. However, this may not be true if CTOL > 1E-1*CONSTRMAX. + cref = 10 * max(EPS, min(ctol, 1.0E-2 * CONSTRMAX)) # The MIN avoids overflow. + isbetter = isbetter or (f1 < REALMAX and c1 <= ctol and (c2 > max(ctol, cref) or np.isnan(c2))) + return isbetter + + +def isbetter10(f1: npt.NDArray | list[float], c1: npt.NDArray | list[float], f2: float, c2: float, ctol: float) -> bool: + isbetter = [isbetter00(f1i, c1i, f2, c2, ctol) for f1i, c1i in zip(f1, c1)] + return isbetter + + +def isbetter01(f1: float, c1: float, f2: float, c2: npt.NDArray | list[float], ctol: npt.NDArray | list[float]) -> bool: + isbetter = [isbetter00(f1, c1, f2i, c2i, ctol) for f2i, c2i in zip(f2, c2)] + return isbetter + + +def savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr=None, confilt=None): + ''' + This subroutine saves x, f, and cstrv in xfilt, ffilt, and cfilt (and constr in confilt + if they are present), unless a vector in xfilt[:, :nfilt] is better than X. + If X is better than some vectors in XFLIT[:, :nfilt] then these vectors will be + removed. If X is not better than any of XFILT[:, :NFILT], but nfilt == maxfilt, + then we remove a column from xfilt according to the merit function + phi = ffilt + cweight * max (cfilt - ctol, 0) + N.B.: + 1. Only xfilt[:, :nfilt] and ffilt[:, :nfilt] etc contains valid information, + while xfilt[:, nfilt+1:maxfilt] and ffilt[:, nfilt+1:maxfilt] etc are not + initialized yet. + 2. We decide whether and X is better than another by the ISBETTER function + ''' + if constr is None: + num_constraints = 0 + else: + num_constraints = constr.size + num_vars = x.size + maxfilt = ffilt.size + + # Return immeditely if any column of xfilt is better than x. + if any(isbetter10(ffilt[:nfilt], cfilt[:nfilt], f, cstrv, ctol)): + return nfilt, cfilt, ffilt, xfilt, confilt + + # If NFILT == MAXFILT and X is not better than any column of XFILT, then we remove the worst column + # of XFILT according to the merit function PHI = FFILT + CWEIGHT * MAX(CFILT - CTOL, ZERO). + keep = [not element for element in isbetter01(f, cstrv, ffilt[:nfilt], cfilt[:nfilt], ctol)] + if sum(keep) == maxfilt: # In this case, NFILT = SIZE(KEEP) = COUNT(KEEP) = MAXFILT > 0. + cfilt_shifted = max(cfilt - ctol, 0) + if cweight <= 0: + phi = ffilt + elif np.isposinf(cweight): + phi = cfilt_shifted + # We should not use CFILT here; if MAX(CFILT_SHIFTED) is attained at multiple indices, then + # we will check FFILT to exhaust the remaining degree of freedom. + else: + phi = max(ffilt, -REALMAX) + cweight * cfilt_shifted + # MAX(FFILT, -REALMAX) makes sure that PHI will not contain NaN (unless there is a bug). + # TODO: Does that make sense in Python? + # We select X to maximize PHI. In case there are multiple maximizers, we take the one with the + # largest CSTRV_SHIFTED; if there are more than one choices, we take the one with the largest F; + # if there are several candidates, we take the one with the largest CSTRV; if the last comparison + # still leads to more than one possibilities, then they are equally bad and we choose the first. + # N.B.: + # 1. This process is the opposite of selecting KOPT in SELECTX. + # 2. In finite-precision arithmetic, PHI_1 == PHI_2 and CSTRV_SHIFTED_1 == CSTRV_SHIFTED_2 do + # not ensure that F_1 == F_2! + phimax = max(phi) + cref = max(cfilt_shifted[phi >= phimax]) + fref = max(ffilt[cfilt_shifted >= cref]) + kworst = np.where(cfilt == max(cfilt[ffilt <= fref]))[0][0] + if kworst < 0 or kworst >= len(keep): # For security. Should not happen. + kworst = 0 + keep[kworst] = False + + # TODO: This code doesn't make much sense to me. I'll have + # to step through the fortran code to see what's going on + nfilt = sum(keep) + index_to_keep = np.zeros(len(ffilt)) + index_to_keep[:nfilt] = np.where(keep)[0] + xfilt[:, :nfilt] = x[:, index_to_keep] + ffilt[:nfilt] = f[index_to_keep] + cfilt[:nfilt] = cstrv[index_to_keep] + if confilt is not None and constr is not None: + confilt[:, :nfilt] = confilt[:, index_to_keep] + + nfilt += 1 + xfilt[:, nfilt] = x + ffilt[nfilt] = f + cfilt[nfilt] = cstrv + if confilt is not None and constr is not None: + confilt[:, nfilt] = constr + + return nfilt, cfilt, ffilt, xfilt, confilt + + +def selectx(fhist: npt.NDArray, chist: npt.NDArray, cweight: float, ctol: float): + ''' + This subroutine selects X according to fhist and chist, which represents (a part of) history + of f and cstrv. Normally, fhist and chist are not the full history but only a filter, e.g. ffilt + and cfilt generated by savefilt. However, we name them as fhist and chist because the [f, cstrv] + in a filter should not dominate each other, but this subroutine does NOT assume such a property. + N.B.: ctol is the tolerance of the constraint violation (cstrv). A point is considered feasible if + its constraint violation is at most ctol. Not that ctol is absolute, not relative. + ''' + + nhist = len(fhist) + + # We select X among the points with f < fref and cstrv < cref. + # Do NOT use f <= fref, because f == fref (funcmax or realmax) may mean f == inf in practice! + if any(np.logical_and(fhist < FUNCMAX, chist < CONSTRMAX)): + fref = FUNCMAX + cref = CONSTRMAX + elif any(np.logical_and(fhist < REALMAX, chist < CONSTRMAX)): + fref = REALMAX + cref = CONSTRMAX + elif any(np.logical_and(fhist < FUNCMAX, chist < REALMAX)): + fref = FUNCMAX + cref = REALMAX + else: + fref = REALMAX + cref = REALMAX + + if not any(np.logical_and(fhist < fref, chist < cref)): + kopt = nhist - 1 + else: + # Shift the constraint violations by ctol, so that cstrv <= ctol is regarded as no violation. + chist_shifted = np.maximum(chist - ctol, 0) + # cmin is the minimal shift constraint violation attained in the history. + cmin = min(chist_shifted[fhist < fref]) + # We consider only the points whose shifted constraint violations are at most the cref below. + # N.B.: Without taking max(EPS, .), cref would be 0 if cmin = 0. In that case, asking for + # cstrv_shift < cref would be WRONG! + cref = max(EPS, 2*cmin) + # We use the following phi as our merit function to select X. + if cweight <= 0: + phi = fhist + elif np.isposinf(cweight): + phi = chist_shifted + # We should not use chist here; if min(chist_shifted) is attained at multiple indices, then + # we will check fhist to exhaust the remaining degree of freedom. + else: + phi = max(fhist, -REALMAX) + cweight * chist_shifted + # max(fhist, -REALMAX) makes sure that phi will not contain NaN (unless there is a bug). + + # We select X to minimize phi subject to f < fref and cstrv_shift <= cref (see the comments + # above for the reason of taking "<" and "<=" in these two constraints). In case there are + # multiple minimizers, we take the one with the least cstrv_shift; if there is more than one + # choice, we take the one with the least f; if there are several candidates, we take the one + # with the least cstrv; if the last comparison still leads to more than one possibility, then + # they are equally good and we choose the first. + # N.B.: + # 1. This process is the opposite of selecting kworst in savefilt + # 2. In finite-precision arithmetic, phi_2 == phi_2 and cstrv_shift_1 == cstrv_shifted_2 do + # not ensure thatn f_1 == f_2! + phimin = min(phi[np.logical_and(fhist < fref, chist_shifted <= cref)]) + cref = min(chist_shifted[np.logical_and(fhist < fref, phi <= phimin)]) + fref = min(fhist[chist_shifted <= cref]) + kopt = np.where(chist == min(chist[fhist <= fref]))[0][0] + + return kopt \ No newline at end of file diff --git a/python/src/prima/examples/cobyla/cobyla_example.py b/python/src/prima/examples/cobyla/cobyla_example.py new file mode 100644 index 0000000000..365fe7488f --- /dev/null +++ b/python/src/prima/examples/cobyla/cobyla_example.py @@ -0,0 +1,322 @@ +import numpy as np +from prima.common.evaluate import evaluate +from prima.common.consts import EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, \ + CTOL_DEFAULT, CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT, \ + MAXFUN_DIM_DEFAULT +from prima.cobyla.cobylb import cobylb + +debugging = True + +def test_chebyquad(): + f, _ = calcfc_chebyquad([1, 2]) + assert np.isclose(f, 91+1/9, atol=1e-6) + + +def calcfc_chebyquad(x): + x = np.array(x) + n = len(x) # or shape? + y = np.zeros((n + 1, n + 1)) + y[0:n, 0] = 1 + y[0:n, 1] = 2*x - 1 + for i in range(1, n): + y[0:n, i+1] = 2*y[0:n, 1] * y[0:n, i] - y[0:n, i - 1] + + f = 0 + for i in range(n+1): + tmp = sum(y[0:n, i]) / n + if i % 2 == 0: + tmp += 1/((i)**2 - 1) + f += tmp**2 + constr = 0 + return f, constr + +def calcfc_hexagon(x): + # Test problem 10 in Powell's original algorithm + + assert len(x) == 9 + + f = -0.5 * (x[0] * x[3] - x[1] * x[2] + x[2] * x[8] - x[4] * x[8] + x[4] * x[7] - x[5] * x[6]) + constr = np.zeros(14) + constr[0] = 1 - x[2]**2 - x[3]**2 + constr[1] = 1 - x[8]**2 + constr[2] = 1 - x[4]**2 - x[5]**2 + constr[3] = 1 - x[1-1]**2 - (x[1] - x[8])**2 + constr[4] = 1 - (x[1-1] - x[4])**2 - (x[1] - x[5])**2 + constr[5] = 1 - (x[1-1] - x[6])**2 - (x[1] - x[7])**2 + constr[6] = 1 - (x[2] - x[4])**2 - (x[3] - x[5])**2 + constr[7] = 1 - (x[2] - x[6])**2 - (x[3] - x[7])**2 + constr[8] = 1 - x[6]**2 - (x[7] - x[8])**2 + constr[9] = x[1-1] * x[3] - x[1] * x[2] + constr[10] = x[2] * x[8] + constr[11] = -x[4] * x[8] + constr[12] = x[4] * x[7] - x[5] * x[6] + constr[13] = x[8] + + return f, constr + +def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, rhobeg=None, + rhoend=None, ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT, + maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None, gamma1=0.5, gamma2=2, + xhist=False, fhist=False, chist=False, conhist=False, maxhist=0, maxfilt=2000): + """ + Among all the arguments, only CALCFC, M, X, and F are obligatory. The others are OPTIONAL and you + can neglect them unless you are familiar with the algorithm. Any unspecified optional input will + take the default value detailed below. For instance, we may invoke the solver as follows. + + ! First define CALCFC, M, and X, and then do the following. + call cobyla(calcfc, m, x, f) + + or + + ! First define CALCFC, M, and X, and then do the following. + call cobyla(calcfc, m, x, f, rhobeg = 0.5D0, rhoend = 1.0D-3, maxfun = 100) + + ! IMPORTANT NOTICE: The user must set M correctly to the number of constraints, namely the size of + ! CONSTR introduced below. Set M to 0 if there is no constraint. + + See examples/cobyla_exmp.py for a concrete example. + + A detailed introduction to the arguments is as follows. + N.B.: RP and IK are defined in the module CONSTS_MOD. See consts.F90 under the directory named + "common". By default, RP = kind(0.0D0) and IK = kind(0), with REAL(RP) being the double-precision + real, and INTEGER(IK) being the default integer. For ADVANCED USERS, RP and IK can be defined by + setting PRIMA_REAL_PRECISION and PRIMA_INTEGER_KIND in common/ppf.h. Use the default if unsure. + + CALCFC + Input, subroutine. + CALCFC(X, F, CONSTR) should evaluate the objective function and constraints at the given + REAL(RP) vector X; it should set the objective function value to the REAL(RP) scalar F and the + constraint value to the REAL(RP) vector CONSTR. It must be provided by the user, and its + definition must conform to the following interface: + !-------------------------------------------------------------------------! + subroutine calcfc(x, f, constr) + real(RP), intent(in) :: x(:) + real(RP), intent(out) :: f + real(RP), intent(out) :: constr(:) + end subroutine calcfc + !-------------------------------------------------------------------------! + Besides, the subroutine should NOT access CONSTR beyond CONSTR(1:M), where M is the second + compulsory argument (see below), signifying the number of constraints. + + M + Input, INTEGER(IK) scalar. + M must be set to the number of constraints, namely the size (length) of CONSTR(X). + N.B.: + 1. M must be specified correctly, or the program will crash!!! + 2. Why don't we define M as optional and default it to 0 when it is absent? This is because + we need to allocate memory for CONSTR_LOC according to M. To ensure that the size of CONSTR_LOC + is correct, we require the user to specify M explicitly. + + X + Input and output, REAL(RP) vector. + As an input, X should be an N-dimensional vector that contains the starting point, N being the + dimension of the problem. As an output, X will be set to an approximate minimizer. + + F + Output, REAL(RP) scalar. + F will be set to the objective function value of X at exit. + + CSTRV + Output, REAL(RP) scalar. + CSTRV will be set to the constraint violation of X at exit, i.e., MAXVAL([-CONSTR(X), 0]). + + CONSTR + Output, ALLOCATABLE REAL(RP) vector. + CONSTR will be set to the constraint value of X at exit. + + F0 + Input, REAL(RP) scalar. + F0, if present, should be set to the objective function value of the starting X. + + CONSTR0 + Input, REAL(RP) vector. + CONSTR0, if present, should be set to the constraint value of the starting X; in addition, + SIZE(CONSTR0) must be M, or the solver will abort. + + NF + Output, INTEGER(IK) scalar. + NF will be set to the number of calls of CALCFC at exit. + + RHOBEG, RHOEND + Inputs, REAL(RP) scalars, default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be set to + the initial and final values of a trust-region radius, both being positive and RHOEND <= RHOBEG. + Typically RHOBEG should be about one tenth of the greatest expected change to a variable, and + RHOEND should indicate the accuracy that is required in the final values of the variables. + + FTARGET + Input, REAL(RP) scalar, default: -Inf. + FTARGET is the target function value. The algorithm will terminate when a feasible point with a + function value <= FTARGET is found. + + CTOL + Input, REAL(RP) scalar, default: machine epsilon. + CTOL is the tolerance of constraint violation. Any X with MAXVAL(-CONSTR(X)) <= CTOL is + considered feasible. + N.B.: 1. CTOL is absolute, not relative. 2. CTOL is used only when selecting the returned X. + It does not affect the iterations of the algorithm. + + CWEIGHT + Input, REAL(RP) scalar, default: CWEIGHT_DFT defined in the module CONSTS_MOD in common/consts.F90. + CWEIGHT is the weight that the constraint violation takes in the selection of the returned X. + + MAXFUN + Input, INTEGER(IK) scalar, default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in the module + CONSTS_MOD (see common/consts.F90). MAXFUN is the maximal number of calls of CALCFC. + + IPRINT + Input, INTEGER(IK) scalar, default: 0. + The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls how much + information will be printed during the computation: + 0: there will be no printing; + 1: a message will be printed to the screen at the return, showing the best vector of variables + found and its objective function value; + 2: in addition to 1, each new value of RHO is printed to the screen, with the best vector of + variables so far and its objective function value; each new value of CPEN is also printed; + 3: in addition to 2, each function evaluation with its variables will be printed to the screen; + -1, -2, -3: the same information as 1, 2, 3 will be printed, not to the screen but to a file + named COBYLA_output.txt; the file will be created if it does not exist; the new output will + be appended to the end of this file if it already exists. + Note that IPRINT = +/-3 can be costly in terms of time and/or space. + + ETA1, ETA2, GAMMA1, GAMMA2 + Input, REAL(RP) scalars, default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2. + ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the trust-region radius + detailed in the subroutine TRRAD in trustregion.f90. Roughly speaking, the trust-region radius + is contracted by a factor of GAMMA1 when the reduction ratio is below ETA1, and enlarged by a + factor of GAMMA2 when the reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2 + < 1 and 0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set ETA1 >= 0.5. + + XHIST, FHIST, CHIST, CONHIST, MAXHIST + XHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; + FHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; + CHIST: Output, ALLOCATABLE rank 1 REAL(RP) array; + CONHIST: Output, ALLOCATABLE rank 2 REAL(RP) array; + MAXHIST: Input, INTEGER(IK) scalar, default: MAXFUN + XHIST, if present, will output the history of iterates; FHIST, if present, will output the + history function values; CHIST, if present, will output the history of constraint violations; + CONHIST, if present, will output the history of constraint values; MAXHIST should be a + nonnegative integer, and XHIST/FHIST/CHIST/CONHIST will output only the history of the last + MAXHIST iterations. Therefore, MAXHIST= 0 means XHIST/FHIST/CONHIST/CHIST will output nothing, + while setting MAXHIST = MAXFUN requests XHIST/FHIST/CHIST/CONHIST to output all the history. + If XHIST is present, its size at exit will be (N, min(NF, MAXHIST)); if FHIST is present, its + size at exit will be min(NF, MAXHIST); if CHIST is present, its size at exit will be + min(NF, MAXHIST); if CONHIST is present, its size at exit will be (M, min(NF, MAXHIST)). + + IMPORTANT NOTICE: + Setting MAXHIST to a large value can be costly in terms of memory for large problems. + MAXHIST will be reset to a smaller value if the memory needed exceeds MAXHISTMEM defined in + CONSTS_MOD (see consts.F90 under the directory named "common"). + Use *HIST with caution!!! (N.B.: the algorithm is NOT designed for large problems). + + MAXFILT + Input, INTEGER(IK) scalar. + MAXFILT is a nonnegative integer indicating the maximal length of the filter used for selecting + the returned solution; default: MAXFILT_DFT (a value lower than MIN_MAXFILT is not recommended); + see common/consts.F90 for the definitions of MAXFILT_DFT and MIN_MAXFILT. + + INFO + Output, INTEGER(IK) scalar. + INFO is the exit flag. It will be set to one of the following values defined in the module + INFOS_MOD (see common/infos.f90): + SMALL_TR_RADIUS: the lower bound for the trust region radius is reached; + FTARGET_ACHIEVED: the target function value is reached; + MAXFUN_REACHED: the objective function has been evaluated MAXFUN times; + MAXTR_REACHED: the trust region iteration has been performed MAXTR times (MAXTR = 2*MAXFUN); + NAN_INF_X: NaN or Inf occurs in X; + DAMAGING_ROUNDING: rounding errors are becoming damaging. + !--------------------------------------------------------------------------! + The following case(s) should NEVER occur unless there is a bug. + NAN_INF_F: the objective function returns NaN or +Inf; + NAN_INF_MODEL: NaN or Inf occurs in the model; + TRSUBP_FAILED: a trust region step failed to reduce the model + !--------------------------------------------------------------------------! + """ + if debugging: + # TODO: need to check how to figure out if python variables are present + # I guess the way to go is to set the default to None + assert (f0 is None) == (constr0 is None), "f0 and constr0 must be both present or both absent" + + n = len(x) + + assert len(constr0) == m if constr0 is not None else True, "len(constr0) != m" + + constr_loc = np.zeros(m) + if f0 is not None and constr0 is not None and all(np.isfinite(x)): + f = f0 + constr_loc = constr0 + else: + # Replace all NaNs with 0s and clip all -inf/inf to -floatmax/floatmax + np.nan_to_num(x, copy=False, neginf=-np.finfo(float).max, posinf=np.finfo(float).max) + f, constr_loc, cstrv_loc = evaluate(calcfc, x) + + if rhobeg is not None: + rhobeg_loc = rhobeg + elif rhoend is not None and np.isfinite(rhoend) and rhoend > 0: + rhobeg_loc = max(10*rhoend, RHOBEG_DEFAULT) + else: + rhobeg_loc = RHOBEG_DEFAULT + + if rhoend is not None: + rhoend_loc = rhoend + elif rhobeg_loc > 0: + rhoend_loc = max(EPS, min(0.1*rhobeg_loc, RHOEND_DEFAULT)) + else: + rhoend_loc = RHOEND_DEFAULT + + maxfun = maxfun if maxfun is not None else MAXFUN_DIM_DEFAULT*n + + if eta1 is not None: + eta1_loc = eta1 + elif eta2 is not None and 0 < eta2 < 1: + eta1_loc = max(EPS, eta2/7) + else: + eta1_loc = 0.1 + + if eta2 is not None: + eta2_loc = eta2 + else: + if 0 < eta1_loc < 1: + eta2_loc = (eta1_loc + 2)/3 + else: + eta2_loc = 0.7 + + if maxhist is not None: + maxhist_loc = maxhist + else: + maxhist_loc = max(maxfun, n+2, MAXFUN_DIM_DEFAULT*n) + + # preprocess + + ''' + ! Further revise MAXHIST_LOC according to MAXHISTMEM, and allocate memory for the history. + ! In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize + ! CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN) + ! if they are requested; replace MAXFUN with 0 for the history that is not requested. + call prehist(maxhist_loc, n, present(xhist), xhist_loc, present(fhist), fhist_loc, & + & present(chist), chist_loc, m, present(conhist), conhist_loc) + ''' + + # call coblyb, which performs the real calculations + x, f, nf, info = cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1_loc, eta2_loc, + ftarget, gamma1, gamma2, rhobeg_loc, rhoend_loc, constr_loc, f, x, + cstrv_loc) + + # Write the outputs. + # TODO: Pick up from here, to finish writing the cobyla top level routine, and then we can start testing + # We'll be able to test up until the trust region calls, until I hear back from Zaikun about how to handle them + return x, f + + +if __name__ == "__main__": + # n_chebyquad = 6 + # x_chebyquad = [i/n_chebyquad for i in range(1, n_chebyquad+1)] + m = 0 + f = 0 + cstrv = 0 + # x, f = cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv) + # print(x, f) + x_hexagon = np.zeros(9) + 2 + m_hexagon = 14 + f = 0 + cstrv = 0 # Should this b e a length 14 array? Of inf? + x, f = cobyla(calcfc_hexagon, m_hexagon, x_hexagon, f, cstrv) \ No newline at end of file