From e46e9192f9a5f80b1ba146120774d21a35e87344 Mon Sep 17 00:00:00 2001
From: Nickolai Belakovski <nbelakovski@gmail.com>
Date: Thu, 20 Jul 2023 11:19:59 +0900
Subject: [PATCH] Initial commit of Python translation

---
 python/README.md                              |   5 +
 python/pyproject.toml                         |  10 +
 python/src/prima/__init__.py                  |   0
 python/src/prima/cobyla/__init__.py           |   0
 python/src/prima/cobyla/cobylb.py             | 560 ++++++++++++++++++
 python/src/prima/cobyla/geometry.py           | 188 ++++++
 python/src/prima/cobyla/trustregion.py        | 447 ++++++++++++++
 python/src/prima/cobyla/update.py             | 164 +++++
 python/src/prima/common/__init__.py           |   0
 python/src/prima/common/consts.py             |  22 +
 python/src/prima/common/evaluate.py           |  36 ++
 python/src/prima/common/linalg.py             |  49 ++
 python/src/prima/common/output.py             |  20 +
 python/src/prima/common/powalg.py             | 109 ++++
 python/src/prima/common/selectx.py            | 179 ++++++
 .../prima/examples/cobyla/cobyla_example.py   | 322 ++++++++++
 16 files changed, 2111 insertions(+)
 create mode 100644 python/README.md
 create mode 100644 python/pyproject.toml
 create mode 100644 python/src/prima/__init__.py
 create mode 100644 python/src/prima/cobyla/__init__.py
 create mode 100644 python/src/prima/cobyla/cobylb.py
 create mode 100644 python/src/prima/cobyla/geometry.py
 create mode 100644 python/src/prima/cobyla/trustregion.py
 create mode 100644 python/src/prima/cobyla/update.py
 create mode 100644 python/src/prima/common/__init__.py
 create mode 100644 python/src/prima/common/consts.py
 create mode 100644 python/src/prima/common/evaluate.py
 create mode 100644 python/src/prima/common/linalg.py
 create mode 100644 python/src/prima/common/output.py
 create mode 100644 python/src/prima/common/powalg.py
 create mode 100644 python/src/prima/common/selectx.py
 create mode 100644 python/src/prima/examples/cobyla/cobyla_example.py

diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000000..d325b1cc99
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,5 @@
+To develop, `cd` into the `src` directory and run
+
+```pip install --editable .```
+
+This will install prima locally in an editable fashion. From there you can run the examples/cobyla/cobyla_example.py (from any directory) and go from there.
diff --git a/python/pyproject.toml b/python/pyproject.toml
new file mode 100644
index 0000000000..d95d6147de
--- /dev/null
+++ b/python/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "prima"
+version = "0.0.1"
+dependencies = [
+	"numpy"
+]
+
+[tool.setuptools.packages.find]
+where = ["src"]  # ["."] by default
+exclude = ["prima.examples*"]  # empty by default
diff --git a/python/src/prima/__init__.py b/python/src/prima/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/src/prima/cobyla/__init__.py b/python/src/prima/cobyla/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/src/prima/cobyla/cobylb.py b/python/src/prima/cobyla/cobylb.py
new file mode 100644
index 0000000000..914d68c581
--- /dev/null
+++ b/python/src/prima/cobyla/cobylb.py
@@ -0,0 +1,560 @@
+import numpy as np
+from prima.common.consts import INFO_DEFAULT, REALMAX, EPS, CONSTRMAX, \
+                    MAXTR_REACHED, DAMAGING_ROUNDING, SMALL_TR_RADIUS, \
+                    NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED
+from prima.common.evaluate import evaluate, moderatec, moderatef
+from prima.cobyla.update import updatepole, findpole, updatexfc
+from prima.cobyla.geometry import assess_geo, setdrop_tr, geostep, setdrop_geo
+from prima.common.selectx import savefilt , selectx
+from prima.cobyla.trustregion import trstlp, redrat, trrad
+
+def fcratio(fval, conmat):
+    '''
+    This function calculates the ratio between the "typical changre" of f and that of constr.
+    See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio.
+    '''
+    cmin = np.min(conmat, axis=1)
+    cmax = np.max(conmat, axis=1)
+    if any(cmin < cmax/2):
+        denom = np.min(max(*cmax, 0) - cmin, where=cmin < cmax/2)
+        r = (max(fval) - min(fval)) / denom
+    else:
+        r = 0
+
+    return r
+
+def redrho(rho, rhoend):
+    '''
+    This function calculated rho when it needs to be reduced.
+    The sceme is shared by UOBYQA, NEWUOA, BOBYQA, LINCOA. For COBYLA, Powell's code reduces rho by
+    'rho /= 2; if rho <= 1.5 * rhoend: rho = rhoend' as specified in (11) of the COBYLA
+    paper. However, this scheme seems to work better, especially after we introduce delta.
+    '''
+
+    rho_ratio = rho / rhoend
+
+    if rho_ratio > 250:
+        rho /= 10
+    elif rho_ratio <= 16:
+        rho = rhoend
+    else:
+        rho = np.sqrt(rho_ratio) * rhoend  # rho = np.sqrt(rho * rhoend)
+    
+    return rho
+
+def checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x):
+    info = INFO_DEFAULT
+
+    # Although x should not contain nan unless there is a bug, we include the following for security.
+    # x can be inf, as finite + finite can be inf numerically.
+    if any(np.isnan(x)) or any(np.isinf(x)):
+        info = NAN_INF_X
+
+    # Although NAN_INF_X should not happen unless there is a bug, we include the following for security.
+    if np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv):
+        info = NAN_INF_F
+    
+    if cstrv <= ctol and f <= ftarget:
+        info = FTARGET_ACHIEVED
+
+    if nf >= maxfun:
+        info = MAXFUN_REACHED
+    return info
+
+def initxfc(calcfc, iprint, maxfun, constr0, ctol, f0, ftarget, rhobeg, x0, nf,
+    conmat, cval, fval):
+    num_constraints = conmat.shape[0]
+    num_vars = x0.size
+    sim = np.eye(num_vars, num_vars+1) * rhobeg
+    sim[:, -1] = x0
+    solver="COBYLA"
+    subinfo = INFO_DEFAULT
+
+    simi = np.eye(num_vars) / rhobeg
+
+    evaluated = np.zeros(num_vars+1, dtype=bool)
+
+    # TODO: Keep evaluating from here
+    for k in range(num_vars + 1):
+        x = sim[:, num_vars].copy()
+        # We will evaluate F corresponding to SIM(:, J).
+        if k == 0:
+            j = num_vars
+            f = moderatef(f0)
+            constr = moderatec(constr0)
+            cstrv = max([-constr, 0])
+        else:
+            j = k - 1
+            x[j] += rhobeg
+            f, constr, cstrv = evaluate(calcfc, x)
+        
+        # Print a message about the function/constraint evaluation according to IPRINT.
+        # TODO: Finish implementing fmsg, if decided that its worth it
+        # fmsg(solver, iprint, k, f, x, cstrv, constr)
+
+        # Save X, F, CONSTR, CSTRV into the history.
+        # TODO: Implement history (maybe we need it for the iterations?)
+        # savehist(k, x, xhist, f, fhist, cstrv, chist, constr, conhist)
+
+        # Save F, CONSTR, and CSTRV to FVAL, CONMAT, and CVAL respectively.
+        evaluated[j] = True
+        fval[j] = f
+        conmat[:, j] = constr
+        cval[j] = cstrv
+
+        # Check whether to exit.
+        # subinfo = checkexit(maxfun, k, cstrv, ctol, f, ftarget, x)
+        # if subinfo != INFO_DEFAULT:
+        #     info = subinfo
+        #     break
+
+        # Exchange the new vertex of the initial simplex with the optimal vertex if necessary.
+        # This is the ONLY part that is essentially non-parallel.
+        if j <= num_vars and fval[j] < fval[-1]:
+            fval[j], fval[-1] = fval[-1], fval[j]
+            cval[j], cval[-1] = cval[-1], cval[j]
+            conmat[:, j], conmat[:, -1] = conmat[:, -1], conmat[:, j]
+            sim[:, -1] = x
+            sim[j, :j] = -rhobeg
+
+    nf = np.count_nonzero(evaluated)
+    if evaluated.all():
+        simi = np.linalg.inv(sim[:, :-1])
+        # Switch the optimal vertex (located by FINDPOLE) to SIM(:, N+1), which Powell called the "pole
+        # position". We call UPDATEPOLE with CPEN = ZERO, which is the initial value of CPEN.
+        conmat, cval, fval, sim, simi, info = updatepole(0, conmat, cval, fval, sim, simi)
+    
+    return evaluated, sim, simi
+
+
+def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt):
+    '''
+    This function initializes the filter (xflit, etc) that will be used when selecting
+    x at the end of the solver.
+    N.B.:
+    1. Why not initialize the filters using XHIST, etc? Because the history is empty if
+    the user chooses not to output it.
+    2. We decouple initxfc and initfilt so that it is easier to parallelize the former
+    if needed.
+    '''
+    num_constraints = conmat.shape[0]
+    num_vars = sim.shape[0]
+    maxfilt = len(ffilt)
+
+    nfilt = 0
+    for i in range(num_vars+1):
+        if evaluated[i]:
+            x = sim[:, i]
+            if i <= num_vars:
+                x += sim[:, -1]
+            nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cval[i], ctol, cweight, fval[i], x, nfilt, cfilt, ffilt, xfilt, conmat[:, i], confilt)
+
+
+    return nfilt
+
+
+def cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1, eta2, ftarget,
+           gamma1, gamma2, rhobeg, rhoend, constr, f, x, 
+           cstrv):
+    num_constraints = len(constr)
+    num_vars = len(x)
+    # maxxhist = xhist.shape[1]
+    # maxfhist = len(fhist)
+    # maxconhist = conhist.shape[1]
+    # maxchist = len(chist)
+    # maxhist = max(maxxhist, maxfhist, maxconhist, maxchist)
+    conmat = np.zeros((num_constraints, num_vars+1)) - REALMAX
+    cval = np.zeros(num_vars+1) + REALMAX
+    fval = np.zeros(num_vars+1) + REALMAX
+    nf = 0
+    
+    subinfo = 0
+    evaluated, sim, simi = initxfc(calcfc, iprint, maxfun, constr, ctol, f, ftarget, rhobeg, x, nf,
+      conmat, cval, fval)
+    # TODO: Need to initialize history with f and x and constr
+    
+    # Initialize the filter, including xfilt, ffilt, confiolt, cfilt, and nfilt.
+    # N.B.: The filter is used only when selecting which iterate to return. It does not
+    # interfere with the iterations. COBYLA is NOT a filter method but a trust-region method
+    # based on an L-inifinity merit function. Powell's implementation does not use a
+    # filter to select the iterate, possibly returning a suboptimal iterate.
+    cfilt = min(max(maxfilt, 1), maxfun)
+    confilt = np.zeros((len(constr), len(cfilt)))
+    ffilt = np.zeros(len(cfilt))
+    xfilt = np.zeros((len(x), len(cfilt)))
+    nfilt = initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt)
+
+    # Check whether to return due to abnormal cases that may occur during the initialization.
+    if subinfo != INFO_DEFAULT:
+        info = subinfo
+        # Return the best calculated values of the variables
+        # N.B: Selectx and findpole choose X by different standards, one cannot replace the other
+        kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], cweight, ctol)
+        x = xfilt[:, kopt]
+        f = ffilt[kopt]
+        constr = confilt[:, kopt]
+        cstrv = cfilt[kopt]
+        # Arrange chist, conhist, fhist, and xhist so that they are in chronological order.
+        # TODO: Implement me
+        # rangehist(nf, xhist, fhist, chist, conhist)
+        # print a return message according to IPRINT.
+        # TODO: Implement me
+        # retmsg("COBYLA", info, iprint, nf, f, x, cstrv, constr)
+
+    # Set some more initial values
+    rho = rhobeg
+    delta = rhobeg
+    cpen = 0
+    prerec = -REALMAX
+    preref = -REALMAX
+    prerem = -REALMAX
+    actrem = -REALMAX
+    ratio = -1
+    jdrop_tr = 0
+    jdrop_geo = 0
+
+    # maxtr is the maximal number of trust region iterations. Normally each trust-region
+    # iteration takes 1 or 2 function evaluations except for the following cases:
+    # 1. The update of cpen alters the optimal vertex
+    # 2. The trust-region step is short or fails to reduce either the
+    #    linearized objective or the linearized constraint violation but
+    #    the geometry step is not invoked.
+    # The following maxtr is unlikely to be reached.
+    maxtr = max(maxfun, 2*maxfun)  # TODO: In Fortran this was done because 2xmaxfun could overflow and be <0. Not sure what the approriate python remedy is
+    info = MAXTR_REACHED
+
+    # A[:, :num_constraints] contains the approximate gradient for the constraints, and
+    # A[:, num_constraints] is minus the approximate gradient for the objective function
+    A = np.zeros((num_vars, num_constraints+1))
+
+    # N.B.: factor_alpha/beta/gamma/delta are four factors the COBYLB uses when
+    # managing the simplex. Note the following:
+    # 1. factor_alpha < factor_gamma < 1 < factor_delta <= factor_beta
+    # 2. factor_delta has nothing to do with delta, which is the trust-region radius
+    # 3. factor_gamma has nothing to do with gamma1 and gamma2, which are the contracting/expanding
+    #    factors for updating the trust-region radius delta.
+    factor_alpha = 0.25 # the factor alpha in the COBYLA paper
+    factor_beta = 2.1 # the factor beta in the COBYLA paper
+    factor_delta = 1.1 # the factor delta in the COBYLA paper
+    factor_gamma = 0.5 # the factor gamma in the COBYLA paper
+
+    # Begin the iterative procedure
+    # After solving a trust-region subproblem, we use 3 boolean variables to control the workflow
+    # shortd - Is the trust-region trial step too short to invoke a function evaluation?
+    # improve_geo - Will we improve the model after the trust-region iteration? If yes, a geometry
+    # step will be taken, corresponding to the "Branch (Delta)" in the COBYLA paper.
+    # reduce_rho - Will we reduce rho after the trust-region iteration?
+    # COBYLA never sets improve_geo and reduce_rho to True simultaneously.
+    
+    for tr in range(maxtr):
+        # Before the trust region step, updatepole has been called either implicitly by initxfc/updatexfc
+        # or explicitly after cpen is updated, so that sim[:, num_vars+1] is the optimal vertex.
+
+        # Does the interpolation set have acceptable geometry? It affects improve_geo and reduce_rho
+        adequate_geo = assess_geo(delta, factor_alpha, factor_beta, sim, simi)
+
+        # Calculate the linear approximations to the objective and constraint functions, placing
+        # minus the objective function gradient after the constraint gradients in the array A.
+        # N.B.: TRSTLP accesses A mostly by colums, so it is more reasonable to save A instead of A.T
+        A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T
+        A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi
+
+        # Theoretically, but not numerically, the last entry of b does not affect the result of TRSTLP
+        # We set it to -fval[num_vars] following Powell's code.
+        b = np.hstack((-conmat[:, num_vars], -fval[:num_vars]))
+        # Calculate the trust-region trial step d. Note that d does NOT depend on cpen.
+        d = trstlp(A, b, delta)
+        dnorm = min(delta, np.linalg.norm(d))
+
+        # Is the trust-region trial step short? N.B.: we compare dnorm with rho, not delta.
+        # Powell's code especially defines shortd by shortd = (dnorm < 0.5 * rho). In out tests
+        # 1/10 seems to work better than 1/2 or 1/4, especially for linearly constrained
+        # problems. Note that LINCOA has a slightly more sophisticated way of defining shortd,
+        # taking into account whether d casues a change to the active set. Should we try the same here?
+        shortd = (dnorm < 0.1 * rho)
+
+        # predict the change to f (preref) and to the constraint violation (prerec) due to d.
+        # We have the following in precise arithmetic. They may fail to hold due to rounding errors.
+        # 1. b[:num_constraints] = -conmat[:, num_vars+1] and hence max(b[:num_constraints] - d@A[:, :num_constraints], 0)
+        # is the L-infinity violation of the linearized constraints corresponding to d. When d=0, the
+        # violation is max(b[:num_constraints], 0) = cval[num_vars]. Prerec is the reduction of this 
+        # violation achieved by d, which is nonnegative in theory; prerec = 0 iff b[:num_constraints] <= 0,
+        # i.e. the trust-region center satisfies the linearized constraints.
+        # 2. Preref may be negative or 0, but it is positive when prerec = 0 and shortd is False
+        # 3. Due to 2, in theory, max(prerec, preref) > 0 if shortd is False.
+        # 4. In the code, max(prerec, preref) is the counterpart of qred in UOBYQA/NEWUOA/BOBYQA/LINCOA.
+        prerec = cval[num_vars] - max(b[:num_constraints] - d@A[:, :num_constraints], 0)
+        preref = np.dot(d, A[:, num_constraints])  # Can be negative
+
+        if shortd or max(prerec, preref) <= 0:
+            # Reduce delta if d is short or if d fails to render max(prerecm preref) > 0,
+            # the latter can only happen due to rounding errors. This seems quite important
+            # for performance
+            delta *= 0.1
+            if delta <= 1.5 * rho:
+                delta = rho  # set delta to rho when it is close to or below
+        else:
+            # Increase cpen if necessary to ensure prerem > 0. Branch back if this changes alters the
+            # optimal vertex. See the discussions around equation (9) of the COBYLA paper.
+            # This is the first (out of two) places where cpen is updated. It can change cpen only when
+            # prerec > 0 > preref, in which case prerem is guaranteed positive after the update.
+            # If prerec = 0 or preref > 0, then prerem is currently positive, so cpen needs no update.
+            # However, as in Powell's implementation, if prerec > 0 = prefref = cpen, then cpen will
+            # remain zero, leaving prerem = 0. If cpen = 0 and prerec > 0 > preref, then cpen will
+            # become positive; if cpen =0, prerec > 0 and preref > 0, then cpen will remain 0.
+            if prerec > 0 and preref < 0:
+                # In Powell's code, cpen is increased to 2*barmu if and only if it is currently less
+                # than 1.5*barmu, a very "Powellful" scheme. In our implementation, however, we set cpen
+                # directly to the maximum between its current value and 2*barmu while handling possible
+                # overflow. This simplifies the scheme without worsening the performance of COBLYA.
+                barmu = -preref / prerec  # preref + barmu * prerec = 0
+                cpen = max(cpen, min(2*barmu, REALMAX))  # This is the 1st (out of 2) updates of cpen.
+                if findpole(cpen, cval, fval) <= num_vars:
+
+                    conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi)
+                    if subinfo == DAMAGING_ROUNDING:
+                        info = subinfo
+                        break  # Better action to take? Geometry step, or simply continue?
+                    continue
+                    # N.B.: The continue can occur at most num_vars times before a new function evaluation
+                    # takes place. This is because the update of cpen does not decrease cpen, and hence it
+                    # can make vertex j (j < num_vars) become the new optimal vertex only if cval[j] < cval[num_vars],
+                    # which can happen at most num_vars times. See the paragraph below (9) in the COBYLA paper.
+
+            x = sim[:, num_vars+1] + d
+            # Evaluate the objective and constraints at X, taking care of possible inf/nan values.
+            f, constr, cstrv = evaluate(calcfc, x)
+            nf += 1
+
+            # Print a message about the function/constraint evaluation accoring to iprint
+            # TODO: Implement me
+            # fmsg(solver, iprint, nf, f, x, cstrv, constr)
+            # Save X, F, CONSTR, CSTRV into the history.
+            # TODO: Implement me
+            # savehist(nf, x, xhist, f, fhist, cstrv, chist, constr, conhist)
+            # Save X, F, CONSTR, CSTRV into the filter.
+            # TODO: Implement me
+            # savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt)
+
+            # Evaluate prerem and actem, which are the predicted and actual reductions in the merit
+            # function, respectively.
+            prerem = preref + cpen * prerec  # Theoretically nonnegative; is 0 if cpen = 0 = preref
+            actrem = (fval[num_vars] + cpen * cval[num_vars]) - (f + cpen * cstrv)
+            if cpen <= 0 and abs(f - fval[num_vars]) <= 0:
+                # cpen <= 0 indeed means cpen == 0, which abs(A-B) <= 0 indeed means A == B
+                prerem = prerec
+                actem = cval[num_vars] - cstrv
+
+            # In theory, PREREM >= 0, but this can fail due to rounding errors.
+            # assert prerem >= 0, 'PREREM >= 0, COBYLA'
+
+            # Calculate the reduction  ratio by redrat, which hands inf/nan carefully
+            ratio = redrat(actem, prerem, eta1)
+
+            # Update delta. After this, delta < dnorm may hold.
+            # N.B.:
+            # 1. Powell's code uses rho as the trust-region radius and updates it as follows:
+            # Reduce rho to gamma1*rho if adequate_geo is True and either shortd is True or ratio < eta1,
+            # and then revise rho and rhoend if its new value is not more than 1.5*rhoend; rho remains
+            # unchanged in all other cases; in particular, rho is never increased.
+            # 2. Our implementation uses delta as the trust-region radius, while using rho as a lower
+            # bound for delta. Delta is updated in a way that is typical for trust-region methods, and
+            # it is revised to rho if its new value is not more than 1.5*rho. Rho reflects the current
+            # resolution of the algorithm; its update is essentially the same as the update of rho in
+            # Powell's code (see the definition of reduce_rho below). Our implementation aligns with
+            # UOBYQA/NEWUOA/BOBYQA/LINCOA and improves the performance of COBYLA.
+            # 3. The same as Powell's code, we do not reduce rho unless adequate_geo is True. This is
+            # also how Powell updated rho in UOBYQA/NEWUOA/BOBYQA/LINCOA. What about we also use
+            # adequate_geo == True as a prerequisite for reducing delta? The argument would be that the
+            # bad (small) value of ratio may be because of a bad geometry (and hence a bad model) rather
+            # than an improperly large delta, and it might be good to try improving the geometry first
+            # without reducing delta. However, according to a test of 230206, it does not improve the
+            # performance if we skip the update of delta when adequeate_geo is False and ratio < 0.1.
+            # Therefore we choose to update delta without checking adequate_geo
+            delta = trrad(delta, dnorm, eta1, eta2, gamma1, gamma2, ratio)
+            if delta <= 1.5*rho:
+                delta = rho  # Set delta to rho when it is close to or below.
+
+            # Is the newly generated X better than the current best point?
+            ximproved = actrem > 0  # If actrem is NaN then ximproved should and will be False
+
+            # Set jdrop_tr to the index of the vertex to be replaced with X. jdrop_tr = 0 means there
+            # is no good point to replace, and X will not be included into the simplex; in this case,
+            # the geometry of the simplex likely needs improvement, which will be handled below.
+            jdrop_tr = setdrop_tr(ximproved, d, sim, simi)
+
+            # Update sim, simi, fval, conmat, and cval so that sim[:, jdrop_tr] is replaced with d.
+            # Updatexfc does nothing if jdrop_tr = 0, as the algorithm decides to discard X.
+            sim, simi, fval, conmat, cval, subinfo = updatexfc(jdrop_tr, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi)
+            # Check whether to break due to dmaging rounding in updatepole (called by updatexfc)
+            if subinfo == DAMAGING_ROUNDING:
+                info = subinfo
+                break
+
+            # Check whether to break due to maxfun, ftarget, etc.
+            subinfo = checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x)
+            if subinfo != INFO_DEFAULT:
+                info = subinfo
+                break
+        # End of if shortd or max(prerec, preref) <= 0. The normal trust-region calculation ends.
+    
+        # Before the next trust-region iteration, we possibly improve the geometry of the simplex or
+        # reduce rho according to improve_geo and reduce_rho. Now we decide these indicators.
+        # N.B.: We must ensure that the algorithm does not set improve_geo to True at infinitely many
+        # consecutive iterations without moving sim[:, num_vars] or reducing rho. Otherwise, the algorithm
+        # will get stuck in repetitive invocations of geostep. This is ensured by the following facts:
+        # 1. If an iteration sets improve_geo to True, it must also reduce delta or set delta to rho.
+        # 2. If sim[:, num_vars] and rho remain unchanged, then adequate_geo will become True after at
+        # most num_vars invocations of geostep.
+
+        # bad_trstep: Is the last trust-region step bad?
+        bad_trstep = shortd or (max(prerec, preref) <= 0) or ratio <= 0 or jdrop_tr == 0
+        # improve_geo: Should we take a geometry step to improve the geometry of the interpolation set?
+        improve_geo = bad_trstep and not adequate_geo
+        # reduce_rho: Should we enhance the resolution by reducing rho?
+        reduce_rho = bad_trstep and adequate_geo and max(delta, dnorm) <= rho
+
+        # COBYLA never sets improve_geo and reduce_rho to True simultaneously.
+        # assert not (improve_geo and reduce_rho), 'IMPROVE_GEO or REDUCE_RHO are not both TRUE, COBYLA'
+    
+        # If shortd is True or max(prerec, preref) <= 0, then either improve_geo or reduce_rho
+        # is True unless adequate_geo is True and max(delta, dnorm) > rho.
+        # assert (not shortd and max(prerec, preref) > 0) or (improve_geo or reduce_rho or adequate_geo and max(delta, dnorm) > rho)
+
+        # Comments on bad_trstep:
+        # 1. Powell's definition of bad_trstep is as follows. The one used above seems to work better,
+        # especially for linearly constrained problems due to the factor 1/10 (= eta1).
+        # bad_trstep = shortd or actrem <= 0 or actrem < prerem/10 or jdrop_tr == 0
+        # Besides, Powell did not check max(prerec, preref) > 0 in bad_trstep, which is reasonable to do
+        # but has little impact on the performance.
+        # 2. NEWUOA/BOBYQA/LINCOA would define bad_trstep, improve_geo, and reduce_rho as follows. Two
+        # different thresholds are used in bad_trstep. It outperforms Powel''s version.
+        # bad_trstep = shortd or max(prerec, preref) <= 0 or ratio <= eta1 or jdrop_tr == 0
+        # improve_geo = bad_trstep and not adequate_geo
+        # reduce_rho = bad_trstep and adequate_geo and max(delta, dnorm) <= rho
+        # 3. Theoretically jdrop_tr > 0 when actrem > 0 (guaranteed by ratio > 0). However, in Powell's
+        # implementation, jrop_tr may be 0 even when ratio > 0 due to NaN. The modernized code has rectified
+        # this in the function setdrop_tr. After this rectification, we can indeed simplify the definition
+        # of bad_trstep by removing the condition jdrop_tr == 0. We retain it for robustness.
+
+        # Comments on reduce_rho:
+        # When shortd is True, UOBQA/NEWUOA/BOBYQA/LINCOA all set reduce_rho to True if the recent
+        # models are sufficiently accurate according to certain criteria. See the paragraph around (37)
+        # in the UOBYQA paper and the discussions about Box 14 in the NEWUOA paper. This strategy is
+        # crucial for the performance of the solvers. However, as of 20221111, we have not managed to
+        # make it work in COBYLA. As in NEWUOA, we recorded the errors of the recent models, and set
+        # reduce_rho to True if they are small (e.g. all(abs(moderrsav) <= 0.1 * max(abs(A))*rho) or
+        # all(abs(moderrsav) <= rho**2)) when shortd is True. It made little impact on the performance.
+
+        # Since COBYLA never sets improve_geo and reduce_rho to True simultaneously, the following
+        # two blocks are exchangeable: "if improve_geo:" and "if reduce_rho:".
+
+        # Improve the geometry of the simplex by removing a point and adding a new one.
+        # If the current interpolation set has acceptable geometry, then we skip the geometry step.
+        # The code has a small difference from Powell's original code here: If the current geometry
+        # is acceptable, the we will continue with a new trust-region iteration; however, at the
+        # beginning of the iteration, cpen may be updated, which may alter the pole point sim[:, num_vars]
+        # by updatepole; the quality of the interpolation point depends on sim[:, num_vars], meaning
+        # that the same interpolation set may have good or bad geometry with respect to different
+        # "poles"; if the geometry tuens out bad with the new pole, the original COBYLA code will
+        # take a geometry step, but our code will NOT do it but continue to take a trust-region
+        # step. The argument is this: even if the geometry step is not skipped in the first place, the
+        # geometry may turn out bad again after the pole is altered due to an update to cpen; should
+        # we take another geometry step in that case? If no, why should we do it here? Indeed, this
+        # distinction makes no practical difference for the CUTEst problems with at most 100 variables
+        # and 5000 constraints, while the algorithm framework is simplified.
+        if improve_geo and not assess_geo(delta, factor_alpha, factor_beta, sim, simi):
+            # Before the geometry step, updatepole has been called either implicitly by updatexfc or
+            # explicitly after cpen is updated, so that sim[:num_vars] is the optimal vertex.
+
+            # Pick a vertex to drop from the simplex. It will be replaced with sim[:, num_Vars] + d to
+            # improve acceptability of the simplex. See equations (15) and (16) of the COBYLA paper.
+            # N.B.: COBYLA never sets jdrop_geo = num_vars.
+            jdrop_geo = setdrop_geo(delta, factor_alpha, factor_beta, sim, simi)
+
+            # The following jdrop_geo comes from UOBYQA/NEWUOA/BOBYQA/LINCOA. It performs poorly!
+            # jdrop_geo = np.argmax(np.sum(sim[:, :num_vars]**2, axis=0), axis=0)  # TODO: Check axes, not sure if necessary tho since this is commented out
+
+            # jdrop_geo is between 0 and num_vars unless sim and simi contain NaN, which should not happen
+            # at this point unless there is a bug. Nevertheless, for robustness, we include the
+            # following instruction to break when jdrop_geo == 0 (if jdrop_geo does become 0, then a
+            # memory error will occur if we continue, as jdrop_gro will be used as an index of arrays.)
+            # TODO: How will this translate to Python?
+            # Thought - perhaps setdrop_geo returns 0 when its bad, so we can return -1, or None
+            if jdrop_geo == 0:
+                info = DAMAGING_ROUNDING
+                break
+
+            # Calculate the geometry step d.
+            # In NEWUOA, geostep takes delbar = max(min(sqrt(max(distsq))/10, delta/2), rho)
+            # rather than delta. This should not be done here, because d should improve the geometry of
+            # the simplex when sim[:, jdrop] is replaced with d; the quality of the geometry is defined
+            # by delta instead of delbar as in (14) of the COBYLA paper. See geostep for more detail.
+            d = geostep(jdrop_geo, cpen, conmat, cval, delta, fval, factor_gamma, simi)
+
+            x = sim[:, num_vars] + d
+            # Evaluate the objective and constraints at X, taking care of possible inf/nan values.
+            f, constr, cstrv = evaluate(calcfc, x)
+            nf += 1
+
+            # Print a message about the function/constraint evaluation accoring to iprint
+            # TODO: Implement me
+            # fmsg(solver, iprint, nf, f, x, cstrv, constr)
+            # Save X, F, CONSTR, CSTRV into the history.
+            # TODO: Implement me
+            # savehist(nf, x, xhist, f, fhist, cstrv, chist, constr, conhist)
+            # Save X, F, CONSTR, CSTRV into the filter.
+            # TODO: Implement me
+            # savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt)
+
+            subinfo = updatexfc(jdrop_geo, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi)
+            if subinfo == DAMAGING_ROUNDING:
+                info = subinfo
+                break
+
+            # Check whether to break due to maxfun, ftarget, etc.
+            subinfo = checkbreak(maxfun, nf, cstrv, ctol, f, ftarget, x)
+            if subinfo != INFO_DEFAULT:
+                info = subinfo
+                break
+        # end of if improve_geo. The procedure of improving the geometry ends.
+
+        # The calculations with the current rho are complete. Enhance the resolution of the algorithm
+        # by reducing rho; update delta and cpen at the same time.
+        if reduce_rho:
+            if rho <= rhoend:
+                info = SMALL_TR_RADIUS
+                break
+            delta = rho/2
+            rho = redrho(rho, rhoend)
+            delta = max(delta, rho)
+            cpen = min(cpen, fcratio(fval, conmat))  # This is the 2nd (out of 2) update of cpen. It may become 0
+            # Print a message about the reduction of rho according to iprint
+            # TODO: implement me!
+            #call rhomsg(solver, iprint, nf, fval(n + 1), rho, sim(:, n + 1), cval(n + 1), conmat(:, n + 1), cpen)
+            conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi)
+            # Check whether to break due to damaging rounding detected in updatepole
+            if subinfo == DAMAGING_ROUNDING:
+                info = subinfo
+                break  # Better action to take? Geometry step, or simply continue?
+    # end of for loop
+
+    # Return the best calculated values of the variables
+    # N.B.: Selectx and findpole choose X by different standards, one cannot replace the other.
+    kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], max(cpen, cweight), ctol)
+    x = xfilt[:, kopt]
+    f = ffilt[kopt]
+    constr = confilt[:, kopt]
+    cstrv = cfilt[kopt]
+
+    # Arrange CHIST, CONHIST, FHIST, and XHIST so that they are in the chronological order.
+    # TODO: Implement me
+    # call rangehist(nf, xhist, fhist, chist, conhist)
+
+    # Print a return message according to IPRINT.
+    # TODO: Implement me
+    #call retmsg(solver, info, iprint, nf, f, x, cstrv, constr)
+    return x, f, nf, info
+
+        
+
diff --git a/python/src/prima/cobyla/geometry.py b/python/src/prima/cobyla/geometry.py
new file mode 100644
index 0000000000..b230e45e58
--- /dev/null
+++ b/python/src/prima/cobyla/geometry.py
@@ -0,0 +1,188 @@
+import numpy as np
+
+def setdrop_geo(delta, factor_alpha, factor_beta, sim, simi):
+    '''
+    This function finds (the index) of a current interpolation point to be replaced with a
+    geometry-improving point. See (15)-(16) of the COBYLA paper.
+    N.B.: COBYLA never sets jrop to num_vars
+    '''
+
+    num_vars = sim.shape[0]
+
+    # Calculate the values of sigma and eta
+    # vsig[j] for j = 0...num_vars-1 is the Euclidean distance from vertex J to the opposite face of the simplex.
+    vsig = 1 / np.sqrt(np.sum(simi**2, axis=1))
+    veta = np.sqrt(sum(sum[:, :num_vars]**2, axis=0))
+
+    # Decide which vertex to drop from the simplex. It will be replaced with a new point to improve the
+    # acceptability of the simplex. See equations (15) and (16) of the COBYLA paper.
+    if any(veta > factor_beta * delta):
+        jdrop = np.where(veta == max(veta[~np.isnan(veta)]))[0][0]
+    elif any(vsig < factor_alpha * delta):
+        jdrop = np.where(vsig == min(vsig[~np.isnan(vsig)]))[0][0]
+    else:
+        # We arrive here if vsig and veta are all nan, which can happen due to nan in sim and simi
+        # which should not happen unless there is a bug
+        jdrop = None
+
+    # Zaikun 230202: What if we consider veta and vsig together? The following attempts do not work well.
+    # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)*np.sum(simi**2, axis=1))  # Condition number
+    # jdrop = max(np.sum(sim[:, :num_vars]**2, axis=0)**2 * np.sum(simi**2, axis=1))  # Condition number times distance
+    return jdrop
+
+def geostep(jdrop, cpen, conmat, cval, delta, fval, factor_gamma, simi):
+    '''
+    This function calculates a geometry step so that the geometry of the interpolation set is improved
+    when sim[: jdrop_geo] is replaced with sim[:num_vars] + d
+    '''
+    num_constraints = conmat.shape[0]
+    num_vars = simi.shape[0]
+
+    # simi[jdrop, :] is a vector perpendicular to the face of the simplex to the opposite of vertex
+    # jdrop. This vsigj * simi[jdrop, :] is the unit vector in this direction
+    vsigj = 1 / np.sqrt(np.sum(simi[jdrop, :]**2))
+
+    # Set d to the vector in the above-mentioned direction and with length factor_gamma * delta. Since
+    # factor_alpha < factor_gamma < factor_beta, d improves the geometry of the simplex as per (14) of
+    # the COBYLA paper. This also explains why this subroutine does not replace delta with
+    # delbar = max(min(np.sqrt(max(distsq))/10, delta/2), rho) as in NEWUOA.
+    d = factor_gamma * delta * (vsigj * simi[jdrop, :])
+
+    # Calculate the coefficients of the linear approximations to the objective and constraint functions,
+    # placing minus the objective function gradient after the constraint gradients in the array A
+    A = np.zeros((num_vars, num_constraints + 1))
+    A[:, :num_constraints] = ((conmat[:, :num_vars] - np.tile(conmat[:, num_vars], (num_vars, 1)).T)@simi).T
+    A[:, num_constraints] = (fval[num_vars] - fval[:num_vars])@simi
+    cvmaxp = max(0, max(-d@A[:, :num_constraints] - conmat[:, num_vars]))
+    cvmaxn = max(0, max(d@A[:, :num_constraints] - conmat[:, num_vars]))
+    if 2 * np.dot(d, A[:, num_constraints]) < cpen * (cvmaxp - cvmaxn):
+        d *= -1
+    return d
+
+def setdrop_tr(ximproved, d, sim, simi):
+    '''
+    This function finds (the index) of a current interpolation point to be replaced with the
+    trust-region trial point. See (19)-(22) of the COBYLA paper.
+    N.B.:
+    1. If ximproved == True, then jdrop >= 0 so that d is included into xpt. Otherwise, it is a bug.
+    2. COBYLA never sets drop = num_vars
+    TODO: Check whether it improves the performance if jdrop = num_vars is allowed when ximproved is True.
+    Note that updatexfc should be revised accordingly.
+    '''
+
+    num_vars = sim.shape[0]
+
+    # -------------------------------------------------------------------------------------------------- #
+    #  The following code is Powell's scheme for defining JDROP.
+    # -------------------------------------------------------------------------------------------------- #
+    # ! JDROP = 0 by default. It cannot be removed, as JDROP may not be set below in some cases (e.g.,
+    # ! when XIMPROVED == FALSE, MAXVAL(ABS(SIMID)) <= 1, and MAXVAL(VETA) <= EDGMAX).
+    # jdrop = 0
+    # 
+    # ! SIMID(J) is the value of the J-th Lagrange function at D. It is the counterpart of VLAG in UOBYQA
+    # ! and DEN in NEWUOA/BOBYQA/LINCOA, but it excludes the value of the (N+1)-th Lagrange function.
+    # simid = matprod(simi, d)
+    # if (any(abs(simid) > 1) .or. (ximproved .and. any(.not. is_nan(simid)))) then
+    #     jdrop = int(maxloc(abs(simid), mask=(.not. is_nan(simid)), dim=1), kind(jdrop))
+    #     !!MATLAB: [~, jdrop] = max(simid, [], 'omitnan');
+    # end if
+    # 
+    # ! VETA(J) is the distance from the J-th vertex of the simplex to the best vertex, taking the trial
+    # ! point SIM(:, N+1) + D into account.
+    # if (ximproved) then
+    #     veta = sqrt(sum((sim(:, 1:n) - spread(d, dim=2, ncopies=n))**2, dim=1))
+    #     !!MATLAB: veta = sqrt(sum((sim(:, 1:n) - d).^2));  % d should be a column! Implicit expansion
+    # else
+    #     veta = sqrt(sum(sim(:, 1:n)**2, dim=1))
+    # end if
+    # 
+    # ! VSIG(J) (J=1, .., N) is the Euclidean distance from vertex J to the opposite face of the simplex.
+    # vsig = ONE / sqrt(sum(simi**2, dim=2))
+    # sigbar = abs(simid) * vsig
+    # 
+    # ! The following JDROP will overwrite the previous one if its premise holds.
+    # mask = (veta > factor_delta * delta .and. (sigbar >= factor_alpha * delta .or. sigbar >= vsig))
+    # if (any(mask)) then
+    #     jdrop = int(maxloc(veta, mask=mask, dim=1), kind(jdrop))
+    #     !!MATLAB: etamax = max(veta(mask)); jdrop = find(mask & ~(veta < etamax), 1, 'first');
+    # end if
+    # 
+    # ! Powell's code does not include the following instructions. With Powell's code, if SIMID consists
+    # ! of only NaN, then JDROP can be 0 even when XIMPROVED == TRUE (i.e., D reduces the merit function).
+    # ! With the following code, JDROP cannot be 0 when XIMPROVED == TRUE, unless VETA is all NaN, which
+    # ! should not happen if X0 does not contain NaN, the trust-region/geometry steps never contain NaN,
+    # ! and we exit once encountering an iterate containing Inf (due to overflow).
+    # if (ximproved .and. jdrop <= 0) then  ! Write JDROP <= 0 instead of JDROP == 0 for robustness.
+    #     jdrop = int(maxloc(veta, mask=(.not. is_nan(veta)), dim=1), kind(jdrop))
+    #     !!MATLAB: [~, jdrop] = max(veta, [], 'omitnan');
+    # end if
+    # -------------------------------------------------------------------------------------------------- #
+    #  Powell's scheme ends here.
+    # -------------------------------------------------------------------------------------------------- #
+
+    # The following definition of jdrop is inspired by setdrop_tr in UOBYQA/NEWUOA/BOBYQA/LINCOA.
+    # It is simpler and works better than Powell's scheme. Note that we allow jdrop to be num_vars if
+    # ximproved is True, whereas Powell's code does not.
+    # See also (4.1) of Scheinberg-Toint-2010: Self-Correcting Geometry in Model-Based Algorithms for
+    # Derivative-Free Unconstrained Optimization, which refers to the strategy here as the "combined
+    # distance/poisedness criteria".
+
+    # distsq[j] is the square of the distance from the jth vertex of the simplex to get "best" point so
+    # far, taking the trial point sim[:, num_vars] + d into account.
+    distsq = np.zeros(sim.shape[0])
+    if ximproved:
+        distsq[:num_vars] = sum((sim[:, :num_vars] - np.tile(d, (num_vars, 1)).T)**2, axis=0)
+        distsq[num_vars] = sum(d**2)
+    else:
+        distsq[:num_vars] = sum(sim[:, :num_vars]**2, axis=0)
+        distsq[num_vars] = 0
+
+    weight = distsq
+
+    # Other possible definitions of weight. They work almost the same as the one above.
+    # weight = max(1, max(25 * distsq / delta**2))
+    # weight = max(1, max(10 * distsq / delta**2))
+    # weight = max(1, max(1e2 * distsq / delta**2))
+    # weight = max(1, max(distsq / max(rho, delta/10)**2))
+    # weight = max(1, max(distsq / rho**2))
+
+    # If 0 <= j < num_vars, simid[j] is the value of the jth Lagrange function at d; the value of the
+    # (num_vars+1)th Lagrange function is 1 - sum(simid). [simid, 1 - sum(simid)] is the counterpart of
+    # vlag in UOBYQA and den in NEWUOA/BOBYQA/LINCOA.
+    simid = simi@d
+    score = weight * abs(np.array([*simid, 1 - sum(simid)]))
+
+    # If ximproved is False (the new d does not render a "better" x), we set score[num_vars] = -1 to avoid
+    # jdrop = num_vars. This is not really needed if weight is defined to distsq to some power, in which case
+    # score[num_vars] = 0. We keep the code for robustness (in case the definition of weight changes later).
+    if not ximproved:
+        score[num_vars] = -1
+
+    if any(score > 0):
+        jdrop = np.where(score == max(score[~np.isnan(score)]))[0][0]
+    elif ximproved:
+        jdrop = np.argmax(distsq)
+    else:
+        jdrop = None
+
+    return jdrop
+
+def assess_geo(delta, factor_alpha, factor_beta, sim, simi):
+    # This function checks if an interpolation set has acceptable geometry as
+    # (14) of the COBYLA paper
+
+    # Calculate the values of sigma and eta
+    # veta[j] (0 <= j < n) is the distance between the vertices j and 0 (the best vertex)
+    # of the simplex.
+    # vsig[j] (0 <= j < n) is the distance from vertex j to its opposite face of the simplex.
+    # Thus vsig <= veta.
+    # N.B.: What about the distance from vertex N+1 to its opposite face? Consider the simplex
+    # {V_{N+1}, V_{N+1} + L*e_1,... v_{N+1} + L*e_N}, where V_{N+1} is vertex N+1,
+    # namely the current "best" point, [e_1, ..., e_n] is an orthogonal matrix, and L is a
+    # constant in the order of delta. This simplex is optimal in the sense that the interpolation
+    # system has the minimal condition number, i.e. 1. For this simplex, the distance from
+    # V_{N+1} to its opposite face is L/sqrt(n).
+    vsig = 1/np.sqrt(np.sum(simi**2, axis=1)) # TODO: Check axis
+    veta = np.sqrt(np.sum(sim[:, :-1]**2, axis=0))
+    adequate_geo = all(vsig >= factor_alpha * delta) and all(veta <= factor_beta * delta)
+    return adequate_geo
\ No newline at end of file
diff --git a/python/src/prima/cobyla/trustregion.py b/python/src/prima/cobyla/trustregion.py
new file mode 100644
index 0000000000..676b0e0a39
--- /dev/null
+++ b/python/src/prima/cobyla/trustregion.py
@@ -0,0 +1,447 @@
+'''
+This module provides subrouties concerning the trust-region calculations of COBYLA.
+
+Coded by Zaikun ZHANG (www.zhangzk.net) based on Powell's code and the COBYLA paper.
+
+Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
+
+Python implementation by Nickolai Belakovski
+'''
+
+import numpy as np
+import numpy.typing as npt
+from prima.common.consts import REALMIN, REALMAX, EPS
+from prima.common.powalg import qradd_Rdiag, qrexc_Rdiag
+from prima.common.linalg import isminor
+
+def trrad(delta_in, dnorm, eta1, eta2, gamma1, gamma2, ratio):
+    # This function updates the trust region radius according to ratio and dnorm.
+
+    if ratio <= eta1:
+        delta = gamma1 * dnorm  # Powell's UOBYQA/NEWOUA
+        # delta = gamma1 * delta_in  # Powell's COBYLA/LINCOA
+        # delta = min(gamma1 * delta_in, dnorm)  # Powell's BOBYQA
+    elif ratio <= eta2:
+        delta = max(gamma1 * delta_in, dnorm)  # Powell's UOBYQA/NEWOUA/BOBYQA/LINCOA
+    else:
+        delta = max(gamma1 * delta_in, gamma2 * dnorm)  # Powell's NEWUOA/BOBYQA
+        # delta = max(delta_in, gamma2 * dnorm)  # Modified version. Works well for UOBYQA
+        # For noise-free CUTEst problems of <= 100 variables, Powell's version works slightly better
+        # than the modified one.
+        # delta = max(delta_in, 1.25*dnorm, dnorm + rho)  # Powell's UOBYQA
+        # delta = min(max(gamma1 * delta_in, gamma2 * dnorm), gamma3 * delta_in)  # Powell's LINCOA, gamma3 = np.sqrt(2)
+
+    # For noisy problems, the following may work better.
+    # if ratio <= eta1:
+    #     delta = gamma1 * dnorm
+    # elseif ratio <= eta2:  # Ensure DELTA >= DELTA_IN
+    #     delta = delta_in
+    # else:  # Ensure DELTA > DELTA_IN with a constant factor
+    #     delta = max(delta_in * (1 + gamma2) / 2, gamma2 * dnorm)
+    return delta
+
+def redrat(ared, pred, rshrink):
+    # This function evaluates the reduction ratio of a trust-region step, handling inf/nan properly.
+    if np.isnan(ared):
+        # This should not happen in unconstrained problems due to the moderated extreme barrier.
+        ratio = -REALMAX
+    elif np.isnan(pred) or pred <= 0:
+        # The trust-region subproblem solver fails in this rare case. Instead of terminating as Powell's
+        # original code does, we set ratio as follows so that the solver may continue to progress.
+        if ared > 0:
+            # The trial point will be accepted, but the trust-region radius will be shrunk if rshrink>0
+            ratio = rshrink/2
+        else:
+            # Set the ration to a large negative number to signify a bad trust-region step, so that the
+            # solver will check whether to take a geometry step or reduce rho.
+            ratio = -REALMAX
+    elif np.isposinf(pred) and np.isposinf(ared):
+        ratio = 1  # ared/pred = NaN if calculated directly
+    elif np.isposinf(pred) and np.isneginf(ared):
+        ration = -REALMAX  # ared/pred = NaN if calculated directly
+    else:
+        ratio = ared/pred        
+    return ratio
+
+def trstlp(A_in, b_in, delta):
+    '''
+    This function calculated an n-component vector d by the following two stages. In the first
+    stage, d is set to the shortest vector that minimizes the greatest violation of the constraints
+        np.dot(A[:n, k], d) >= b(k), k = 0, 1, 2, ..., m-1
+    subject to the Euclidean length of d being at most delta. If its length is strictly less than
+    delta, then the second stage uses the resultant freedom in d to minimize the objective function
+        np.dot(-A[:n, m], d)
+    subject to no increase in any greatest constraint violation. This notation allows the gradient of
+    the objective function to be regarded as the gradient of a constraint. Therefore the two stages
+    are distinguished by mcon == m and mcon > m respectively.
+
+    It is possible but rare that a degeneracy may prevent d from attaining the target length delta.
+
+    cviol is the largest constraint violation of the current d: max(b[:m] - A[:,:m].T@d, 0). TODO: Not sure if it should be an @, or what max(vector, scalar) means
+    icon is the index of a most violated constraint if cviol is positive.
+
+    nact is the number of constraints in the active set and iact[0], ..., iact[nact-1] are their indices,
+    while the remainder of the iact contains a permutation of the remaining constraint indicies.
+    N.B.: nact <= min(num_constraints, num_vars). Obviously nact <= num_constraints. In addition, the constraints
+    in iact[0, ..., nact-1] have linearly independent gradients (see the comments above the instruction
+    that delete a constraint from the active set to make room for the new active constraint with index iact[icon]);
+    it can also be seen from the update of nact: starting from 0, nact is incremented only if nact < n.
+
+    Further, Z is an orthogonal matrix whose first nact columns can be regarded as the result of
+    Gram-Schmidt applied to the active constraint gradients. For j = 0, 1, ..., nact-1, the number
+    zdota[j] is the scalar product of the jth column of Z with the gradient of the jth active
+    constraint. d is the current vector of variables and here the residuals of the active constraints
+    should be zero. Further, the active constraints have nonnegative Lagrange multipliers that are
+    held at the beginning of vmultc. The remaineder of this vector holds the residuals of the inactive
+    constraints at d, the ordering of the componenets of vmultc being in agreement with the permutation
+    of the indices of the constraints that is in iact. All these residuals are nonnegative, which is
+    achieved by the shift cviol that makes the least residual zero.
+
+    N.B.:
+    1. The algorithm was NOT documented in the COBYLA paper. A note should be written to introduce it!
+    2. As a major part of the algorithm (see trstlp_sub), the code maintains and updates the QR
+    factorization of A[iact[:nact]], i.e. the gradients of all the active (linear) constraints. The
+    matrix Z is indeed Q, and the vector zdota is the diagonal of R. The factorization is updated by
+    Givens rotations when an index is added in or removed from iact.
+    3. There are probably better algorithms available for this trust-region linear programming problem.
+    '''
+
+    A = A_in.copy()
+    b = b_in.copy()
+    vmultc = np.zeros(len(b))
+    iact = np.zeros(len(b))
+    nact = 0
+    d = np.zeros(A_in.shape[0])
+    z = np.zeros((len(d), len(d)))
+    num_constraints = A.shape[1] - 1
+
+    # Scale the problem if A contains large values. Otherwise floating point exceptions may occur.
+    # Note that the trust-region step is scale invariant.
+    for i in range(num_constraints+1):  # Note that A.shape[1] == len(b) == num_constraints+1 != num_constraints
+        if maxval:=max(abs(A_in[:, i]) > 1e12):
+            modscal = max(2*REALMIN, 1/maxval)
+            A[:, i] *= modscal
+            b[i] *= modscal
+
+    # Stage 1: minimize the 1+infinity constraint violation of the linnearized constraints.
+    iact, nact, d, vmultc, z = trstlp_sub(iact[:num_constraints], nact, 1, A[:, :num_constraints], b[:num_constraints], delta, d, vmultc[:num_constraints], z)
+
+    # Stage 2: minimize the linearized objective without increasing the 1_infinity constraint violation.
+    iact, nact, d, vmultc, z = trstlp_sub(iact, nact, 2, A, b, delta, d, vmultc, z)
+    # TODO: I think I need to define vmultc and z, and I should double check what's going on with A_in vs A in the fortran code
+    return d
+
+def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z):
+    '''
+    This subroutine does the real calculations for trstlp, both stage 1 and stage 2.
+    Major differences between stage 1 and stage 2:
+    1. Initializiation. Stage 2 inherits the values of some variables from stage 1, so they are
+    initialized in stage 1 but not in stage 2.
+    2. cviol. cviol is updated after at iteration in stage 1, while it remains a constant in stage2.
+    3. sdirn. See the definition of sdirn in the code for details.
+    4. optnew. The two stages have different objectives, so optnew is updated differently.
+    5. step. step <= cviol in stage 1.
+    '''
+    # zdasav = np.zeros(z.shape[1])
+    vmultd = np.zeros(len(vmultc))
+
+    num_vars = A.shape[0]
+    mcon = A.shape[1]
+    zdota = np.zeros(z.shape[1])
+
+    # Initialize according to stage
+    if stage == 1:
+        iact = np.linspace(1, mcon, mcon)
+        nact = 0
+        d = 0
+        cviol = max(*b, 0)
+        vmultc = cviol - b
+        z = np.eye(num_vars)
+        if mcon == 0 or cviol <= 0:
+            # Check whether a quick return is possible. Make sure the in-outputs have been initialized.
+            return iact, nact, d, vmultc, z
+        
+        if all(np.isnan(b)):
+            return iact, nact, d, vmultc, z
+        else:
+            icon = np.where(b == max(b[~np.isnan(b)]))[0][0]
+        num_constraints = mcon
+        sdirn = np.zeros(len(d))
+    else:
+        if np.dot(d, d) >= delta**2:
+            # Check whether a quick return is possible.
+            return iact, nact, d, vmultc, z
+        
+        iact[mcon-1] = mcon
+        vmultc[mcon-1] = 0
+        num_constraints = mcon - 1
+        icon = mcon - 1
+
+        # In Powell's code, stage 2 uses the zdota and cviol calculated by stage1. Here we recalculate
+        # them so that they need not be passed from stage 1 to 2, and hence the coupling is reduced.
+        cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0)
+    zdota[:nact] = [np.dot(z[:, k], A[:, iact[k]]) for k in range(nact)]
+
+    # More initialization
+    optold = REALMAX
+    nactold = nact
+    nfail = 0
+
+    # Zaikun 20211011: vmultd is computed from scratch at each iteration, but vmultc is inherited
+
+    # Powell's code can encounter infinite cycling, which did happen when testing the following CUTEst
+    # problems: DANWOODLS, GAUSS1LS, GAUSS2LS, GAUSS3LS, KOEBHELB, TAX13322, TAXR13322. Indeed, in all
+    # these cases, Inf/NaN appear in d due to extremely large values in A (up to 10^219). To resolve
+    # this, we set the maximal number of iterations to maxiter, and terminate if Inf/NaN occurs in d.
+    maxiter = min(10000, 100*max(num_constraints, num_vars))
+    for iter in range(maxiter):
+        if stage == 1:
+            optnew = cviol
+        else:
+            optnew = -np.dot(d, A[:, mcon-1])
+        
+        # End the current stage of the calculation if 3 consecutive iterations have either failed to
+        # reduce the best calculated value of the objective function or to increase the number of active
+        # constraints since the best value was calculated. This strategy prevents cycling, but there is
+        # a remote possibility that it will cause premature termination.
+        if optnew < optold or nact > nactold:
+            nactold = nact
+            nfail = 0
+        else:
+            nfail += 1
+        optold = min(optold, optnew)
+        if nfail == 3:
+            break
+
+        # If icon exceeds nact, then we add the constraint with index iact[icon] to the active set.
+        # Apply Givens rotations so that the last num_vars-nact-1 columns of Z are orthogonal to the gradient
+        # of the new constraint, a scalar product being set to 0 if its nonzero value could be due to
+        # computer rounding errors, which is tested by ISMINOR.
+        if icon > nact:
+            # zdasav[:nact] = zdota
+            nactsav = nact
+            z, zdota, nact = qradd_Rdiag(A[:, iact[icon]], z, zdota, nact)  # May update nact to nact+1
+            # Indeed it suffices to pass zdota[:min(num_vars, nact+1)] to qradd as follows:
+            # qradd(A[:, iact[icon]], z, zdota[:min(num_vars, nact+1)], nact)
+
+            if nact == nactsav + 1:
+                # N.B.: It is possible to index arrays using [nact, icon] when nact == icon.
+                # Zaikun 20211012: Why should vmultc[nact] = 0?
+                if nact != icon:
+                    vmultc[[icon, nact]] = vmultc[nact], 0
+                    iact[[icon, nact]] = iact[[nact, icon]]
+                else:
+                    vmultc[nact] = 0
+            else:
+                # Zaikun 20211011:
+                # 1. vmultd is calculated from scratch for the first time (out of 2) in one iteration.
+                # 2. Note that iact has not been updated to replace iact[nact] with iact[icon]. Thus
+                # A[:, iact[:nact]] is the UNUPDATED version before qradd (note Z[:, :nact] remains the
+                # same before and after qradd). Therefore if we supply zdota to lsqr (as Rdiag) as
+                # Powell did, we should use the UNUPDATED version, namely zdasav.
+                # vmultd[:nact] = lsqr(A[:, iact[:nact]], A[:, iact[icon]], z[:, :nact], zdasav[:nact])
+                vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], A[:, iact[icon]])[0]
+                if not any(np.logical_and(vmultd[:nact] > 0, iact[:nact] <= num_constraints)):
+                    # N.B.: This can be triggered by nact == 0 (among other possibilities)! This is
+                    # important, because nact will be used as an index in the sequel.
+                    break
+                # vmultd[nact+1:mcon] is not used, but we have to initialize it in Fortran, or compilers
+                # complain about the where construct below (another solution: restrict where to 1:nact).
+                # TODO: How does this apply to Python?
+                vmultd[nact:mcon] = -1  # len(vmultd) == mcon
+
+                # Revise the Lagrange multipliers. The revision is not applicable to vmultc[nact:num_constraints].
+                fracmult = [vmultc[i]/vmultd[i] if vmultd[i] > 0 and iact[i] <= num_constraints else REALMAX for i in range(nact)]
+                # Only the places with vmultd > 0 and iact <= m is relevant below, if any.
+                frac = min(fracmult[:nact])  # fracmult[nact:mcon] may contain garbage
+                vmultc[:nact] = max(0, vmultc[:nact] - frac*vmultd[:nact])
+
+                # Zaikun 20210811: Powell's code includes the following, which is IMPOSSIBLE TO REACH
+                # !if (icon < nact) then
+                # !    do k = icon, nact-1
+                # !        hypt = sqrt(zdota(k+1)**2+inprod(z(:, k), A(:, iact(k+1)))**2)
+                # !        grot = planerot([zdota(k+1), inprod(z(:, k), A(:, iact(k+1)))])
+                # !        z(:, [k, k+1]) = matprod(z(:, [k+1, k]), transpose(grot))
+                # !        zdota([k, k+1]) = [hypt, (zdota(k+1) / hypt) * zdota(k)]
+                # !    end do
+                # !    iact(icon:nact) = [iact(icon+1:nact), iact(icon)]
+                # !    vmultc(icon:nact) = [vmultc(icon+1:nact), vmultc(icon)]
+                # !end if
+
+                # Reorder the active constraints so that the one to be replaced is at the end of the list.
+                # Exit if the new value of zdota[nact] is not acceptable. Powell's condition for the
+                # following If: non abs(zdota[nact]) > 0. Note that it is different from
+                # 'abs(zdota[nact]) <=0)' as zdota[nact] can be NaN. TODO: Does that make sense with Python?
+                # It seems Python returns false in both cases, whereas Fortran returns true for abs(NaN) > 0
+                # and false for the other one.
+                # N.B.: We cannot arrive here with nact == 0, which should have triggered a break above
+                if np.isnan(zdota[nact]) or abs(zdota[nact]) <= EPS**2:
+                    break
+                vmultc[[icon, nact]] = 0, frac  # vmultc[[icon, nact]] is valid as icon > nact
+                iact[[icon, nact]] = iact[[nact, icon]]
+            # end if nact == nactsav + 1
+
+            # In stage 2, ensure that the objective continues to be treated as the last active constraint.
+            # Zaikun 20211011, 20211111: Is it guaranteed for stage 2 that iact[nact-1] = mcon when
+            # iact[nact] != mcon??? If not, then how does the following procedure ensure that mcon is
+            # the last of iact[:nact]?
+            if stage == 2 and iact[nact] != mcon: 
+                assert nact > 1, "nact > 1 is required"
+                z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], nact - 1)
+                # Indeed, it suffices to pass Z[:, :nact] to qrexc as follows:
+                # z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], nact - 1)
+                iact[[nact-1, nact]] = iact[[nact, nact-1]]
+                vmultc[[nact-1, nact]] = vmultc[[nact, nact-1]]
+            # Zaikun 20211117: It turns out that the last few lines do not guarantee iact[nact] == num_vars in
+            # stage 2; the following test cannot be passed. IS THIS A BUG?!
+            # assert iact[nact] == mcon or stage == 1, 'iact[nact] must == mcon in stage 2'
+
+            # Powell's code does not have the following. It avoids subsequent floating points exceptions.
+            if np.isnan(zdota[nact]) or abs(zdota[nact]) <= EPS**2:
+                break
+
+            # Set sdirn to the direction of the next change to the current vector of variables
+            # Usually during stage 1 the vector sdirn gives a search direction that reduces all the
+            # active constraint violations by one simultaneously.
+            if stage == 1:
+                sdirn -= ((np.dot(sdirn, A[:, iact[nact]]) - 1)/zdota[nact])*z[:, nact]
+            else:
+                sdirn = 1/zdota[nact]*z[:, nact]
+        else:  # icon <= nact
+            # Delete the constraint with the index iact[icon] from the active set, which is done by
+            # reordering iact[icont:nact] into [iact[icon+1:nact], iact[icon]] and then reduce nact to
+            # nact - 1. In theory, icon > 0.
+            assert icon > 0, "icon > 0 is required"
+            z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], icon)  # qrexc does nothing if icon == nact
+            # Indeed, it suffices to pass Z[:, :nact] to qrexc as follows:
+            # z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], icon)
+            iact[icon:nact+1] = iact[icon+1:nact+1], iact[icon]
+            vmultc[icon:nact+1] = vmultc[icon+1:nact+1], vmultc[icon]
+            nact -= 1
+
+            # Powell's code does not have the following. It avoids subsequent exceptions.
+            # Zaikun 20221212: In theory, nact > 0 in stage 2, as the objective function should always
+            # be considered as an "active constraint" --- more precisely, iact[nact] = mcon. However,
+            # looking at the copde, I cannot see why in stage 2 nact must be positive after the reduction
+            # above. It did happen in stage 1 that nact became 0 after the reduction --- this is
+            # extremely rare, and it was never observed until 20221212, after almost one year of
+            # random tests. Maybe nact is theoretically positive even in stage 1?
+            assert stage == 1 or nact > 0, "nact > 0 is required in stage 2"
+            if stage == 2 and nact < 0:
+                break  # If this case ever occurs, we have to break, as nact is used as an index below.
+            if nact > 0:
+                if np.isnan(zdota[nact]) or abs(zdota[nact]) <= EPS**2:
+                    break
+
+            # Set sdirn to the direction of the next change to the current vector of variables.
+            if stage == 1:
+                sdirn -= np.dot(sdirn, z[:, nact+1]) * z[:, nact+1]
+                # sdirn is orthogonal to z[:, nact+1]
+            else:
+                sdirn = 1/zdota[nact] * z[:, nact]
+        # end if icon > nact
+
+        # Calculate the step to the trust region boundary or take the step that reduces cviol to 0.
+        # ----------------------------------------------------------------------------------------- #
+        # The following calculation of step is adopted from NEWUOA/BOBYQA/LINCOA. It seems to improve
+        # the performance of COBYLA. We also found that removing the precaution about underflows is
+        # beneficial to the overall performance of COBYLA --- the underflows are harmless anyway.
+        dd = delta**2 - np.dot(d, d)
+        ss = np.dot(sdirn, sdirn)
+        sd = np.dot(sdirn, d)
+        if dd <= 0 or ss <= EPS * delta**2 or np.isnan(sd):
+            break
+        # sqrtd: square root of a discriminant. The max avoids sqrtd < abs(sd) due to underflow
+        sqrtd = max(np.sqrt(ss*dd + sd**2), abs(sd), np.sqrt(ss * dd))
+        if sd > 0:
+            step = dd / (sqrtd + sd)
+        else:
+            step = (sqrtd - sd) / ss
+        # step < 0 should not happen. Step can be 0 or NaN when, e.g., sd or ss becomes inf
+        if step <= 0 or not np.isfinite(step):
+            break
+
+        # Powell's approach and comments are as follows.
+        # -------------------------------------------------- #
+        # The two statements below that include the factor eps prevent
+        # some harmless underflows that occurred in a test calculation
+        # (Zaikun: here, eps is the machine epsilon; Powell's original
+        # code used 1.0e-6, and Powell's code was written in single
+        # precision). Further, we skip the step if it could be 0 within
+        # a reasonable tolerance for computer rounding errors.
+
+        #  !dd = delta**2 - sum(d**2, mask=(abs(d) >= EPS * delta))
+        #  !ss = inprod(sdirn, sdirn)
+        #  !if (dd  <= 0) then
+        #  !    exit
+        #  !end if
+        #  !sd = inprod(sdirn, d)
+        #  !if (abs(sd) >= EPS * sqrt(ss * dd)) then
+        #  !    step = dd / (sqrt(ss * dd + sd**2) + sd)
+        #  !else
+        #  !    step = dd / (sqrt(ss * dd) + sd)
+        #  !end if
+        # -------------------------------------------------- #
+
+        if stage == 1:
+            if isminor(cviol, step):
+                break
+            step = min(step, cviol)
+
+        # Set dnew to the new variables if step is the steplength, and reduce cviol to the corresponding
+        # maximum residual if thage 1 is being done
+        dnew = d + step * sdirn
+        if stage == 1:
+            cviol = max(max(b[iact[:nact]] - dnew@A[:, iact[:nact]]), 0)
+            # N.B.: cviol will be used when calculating vmultd[nact+1:mcon].
+
+        # Zaikun 20211011:
+        # 1. vmultd is computed from scratch for the second (out of 2) time in one iteration.
+        # 2. vmultd[:nact] and vmultd[nact:mcon] are calculated separately with no coupling.
+        # 3. vmultd will be calculated from scratch again in the next iteration.
+        # Set vmultd to the vmultc vector that would occur if d became dnew. A device is included to
+        # force multd[k] = 0 if deviations from this value can be attributed to computer rounding
+        # errors. First calculate the new Lagrange multipliers.
+        # vmultd[:nact] = lsqr(A[:, iact[:nact]], dnew, z[:, :nact], zdota[:nact])
+        vmultd[:nact] = np.linalg.lstsq(A[:, iact[:nact]], dnew)[0]
+        if stage == 2:
+            vmultd[nact-1] = max(0, vmultd[nact-1])  # This seems never activated.
+        # Complete vmultd by finding the new constraint residuals. (Powell wrote "Complete vmultc ...")
+        cvshift = dnew@A[:, iact] - b[iact] + cviol  # Only cvshift[nact+1:mcon] is needed
+        # TODO: The fortran code has a matprod for two vectors, not sure what that's supposed to return? cvshift is supposed to be a vector
+        cvsabs = abs(dnew)@abs(A[:, iact]) + abs(b[iact]) + cviol
+        cvshift[isminor[cvshift, cvsabs]] = 0
+        vmultd[nact+1:mcon] = cvshift[nact+1:mcon]
+
+        # Calculate the fraction of the step from d to dnew that will be taken
+        fracmult = [vmultc[i]/(vmultc[i] - vmultd[i]) if vmultd[i] < 0 else REALMAX for i in range(nact)]
+        # Only the places with vmultd < 0 are relevant below, if any.
+        icon = np.where(fracmult == min(1, *fracmult))[0][0] - 1
+        # TODO: The above code is a little strange. It seems like icon could be -1 which in our context is invalid
+        frac = min(1, *fracmult)
+
+        # Update d, vmultc, and cviol
+        dold = d
+        d = (1 - frac)*d + frac * dnew
+        # Break in the case of inf/nan in d.
+        if not np.isfinite(d).all():
+            d = dold  # Should we restore also iact, nact, vmultc, and z?
+            break
+
+        vmultc = max(0, max((1 - frac)*vmultc + frac*vmultd))
+        if stage == 1:
+            # cviol = (1 - frac) * cvold + frac * cviol  # Powell's version
+            # In theory, cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0), yet the
+            # cviol updated as above can be quite different from this value if A has huge entries (e.g., > 1e20)
+            cviol = max(max(b[:num_constraints] - d@A[:, :num_constraints]), 0)
+
+        if icon < 0 or icon >= mcon:
+            # In Powell's code, the condition is icon == 0. Indeed, icon < 0 cannot hold unless
+            # fracmult contains only nan, which should not happen; icon >= mcon should never occur.
+            break
+    return iact, nact, d, vmultc, z
+        
+                
+
+
+
diff --git a/python/src/prima/cobyla/update.py b/python/src/prima/cobyla/update.py
new file mode 100644
index 0000000000..8a3273853b
--- /dev/null
+++ b/python/src/prima/cobyla/update.py
@@ -0,0 +1,164 @@
+from prima.common.consts import DAMAGING_ROUNDING, INFO_DEFAULT
+import numpy as np
+
+
+def updatexfc(jdrop, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi):
+    # This function revises the simplex by updating the elements of sim, simi, fval, conmat, and cval
+    
+    itol = 1
+
+    num_constraints = len(constr)
+    num_vars = sim.shape[0]
+
+    # Do nothing when jdrop is None. This can only happen after a trust-region step.
+    if jdrop is None:  # jdrop is None is impossible if the input is correct.
+        return conmat, cval, fval, sim, simi, INFO_DEFAULT
+    
+    sim_old = sim
+    simi_old = simi
+
+    if jdrop < num_vars:
+        sim[:, jdrop] = d
+        simi_jdrop = simi[jdrop, :] / np.dot(simi[jdrop, :], d)
+        simi -= np.outer(simi@d, simi_jdrop)
+        simi[jdrop, :] = simi_jdrop
+    else:  # jdrop == num_vars
+        sim[:, num_vars] += d
+        sim[:, :num_vars] -= np.tile(d, (num_vars, 1)).T
+        simid = simi@d
+        sum_simi = sum(simi, axis=0)
+        simi += np.outer(simid, sum_simi / (1 - sum(simid)))
+
+    erri = max(abs(simi@sim[:, :num_vars] - np.eye(num_vars)))
+    if erri > 0.1 * itol or np.isnan(erri):
+        simi_test = np.linalg.inv(sim[:, :num_vars])
+        erri_test = max(abs(simi_test@sim[:, :num_vars] - np.eye(num_vars)))
+        if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)):
+            simi = simi_test
+            erri = erri_test
+
+    if erri <= itol:
+        fval[jdrop] = f
+        conmat[:, jdrop] = constr
+        cval[jdrop] = cstrv
+        # Switch the best vertex to the pole position sim[: num_vars] if it is not there already
+        conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim, simi)
+    else:
+        info = DAMAGING_ROUNDING
+        sim = sim_old
+        simi = simi_old
+    
+    
+    return sim, simi, fval, conmat, cval, info
+
+def findpole(cpen, cval, fval):
+    # This subroutine identifies the best vertex of the current simplex with respect to the merit
+    # function PHI = F + CPEN * CSTRV.
+    n = len(fval) - 1  # used for debugging which I have not implemented yet
+
+    # Identify the optimal vertex of the current simplex
+    jopt = len(fval) - 1
+    phi = fval + cpen * cval
+    phimin = min(phi)
+    joptcandidate = np.argmin(phi)
+    if phi[joptcandidate] < phi[jopt]:
+        jopt = joptcandidate
+    if cpen <= 0 and any(cval < cval[jopt] and phi <= phimin):
+        # jopt is the index of the minimum value of cval
+        # on the set of cval values where phi <= phimin
+        jopt = np.where(cval == min(cval[phi <= phimin]))[0][0]
+    return jopt
+
+
+def updatepole(cpen, conmat, cval, fval, sim, simi):
+    #--------------------------------------------------------------------------------------------------!
+    # This subroutine identifies the best vertex of the current simplex with respect to the merit
+    # function PHI = F + CPEN * CSTRV, and then switch this vertex to SIM(:, N + 1), which Powell called
+    # the "pole position" in his comments. CONMAT, CVAL, FVAL, and SIMI are updated accordingly.
+    #
+    # N.B. 1: In precise arithmetic, the following two procedures produce the same results:
+    # 1) apply UPDATEPOLE to SIM twice, first with CPEN = CPEN1 and then with CPEN = CPEN2;
+    # 2) apply UPDATEPOLE to SIM with CPEN = CPEN2.
+    # In finite-precision arithmetic, however, they may produce different results unless CPEN1 = CPEN2.
+    #
+    # N.B. 2: When JOPT == N+1, the best vertex is already at the pole position, so there is nothing to
+    # switch. However, as in Powell's code, the code below will check whether SIMI is good enough to
+    # work as the inverse of SIM(:, 1:N) or not. If not, Powell's code would invoke an error return of
+    # COBYLB; our implementation, however, will try calculating SIMI from scratch; if the recalculated
+    # SIMI is still of poor quality, then UPDATEPOLE will return with INFO = DAMAGING_ROUNDING,
+    # informing COBYLB that SIMI is poor due to damaging rounding errors.
+    #
+    # N.B. 3: UPDATEPOLE should be called when and only when FINDPOLE can potentially returns a value
+    # other than N+1. The value of FINDPOLE is determined by CPEN, CVAL, and FVAL, the latter two being
+    # decided by SIM. Thus UPDATEPOLE should be called after CPEN or SIM changes. COBYLA updates CPEN at
+    # only two places: the beginning of each trust-region iteration, and when REDRHO is called;
+    # SIM is updated only by UPDATEXFC, which itself calls UPDATEPOLE internally. Therefore, we only
+    # need to call UPDATEPOLE after updating CPEN at the beginning of each trust-region iteration and
+    # after each invocation of REDRHO.
+    #--------------------------------------------------------------------------------------------------!
+    # List of local arrays (including function-output arrays; likely to be stored on the stack):
+    # REAL(RP) :: CONMAT_OLD(M, N+1), CVAL_OLD(N+1), FVAL_OLD(N+1), SIM_JDROP(N), SIM_OLD(N, N+1), &
+    #    & SIMI_OLD(N, N), SIMI_TEST(N, N)
+    # Size of local arrays: REAL(RP)*((M+N+2)*(N+1) + N + 2*N^2)  (TO BE REDUCED TO 2*N +N^2 by removing
+    # *_OLD. Should we allocate SIMI_TEST? Yes, it is needed quite rarely!)
+    #--------------------------------------------------------------------------------------------------!
+    num_constraints = conmat.shape[0]
+    num_vars = sim.shape[0]
+    info = INFO_DEFAULT
+    itol = 1  # Why is this a separate variable? It's not even an input
+
+    # Identify the optimal vertex of the current simplex.
+    jopt = findpole(cpen, cval, fval)
+
+    # Switch the best vertex to the pole position sim[:, -1] if it is not there already. Then update
+    # CONMAT etc. Before the update, save a copy of CONMAT etc. If the update is unsuccessful due to
+    # damaging rounding errors, we restore them for COBYLA to extract X/F/C from the undamaged data.
+    fval_old = fval
+    conmat_old = conmat
+    cval_old = cval
+    sim_old = sim
+    simi_old = simi
+    if 0 <= jopt < num_vars:
+        # Unless there is a bug in findpole it is guaranteed that jopt is >= 0
+        # When jopt == num_vars+1, there is nothing to switch, and simi[jopt, :]
+        # would be illegal
+        fval[[jopt, -1]] = fval[[-1, jopt]]
+        conmat[:, [jopt, -1]] = conmat[:, [-1, jopt]] # Exchange CONMAT[:, JOPT] AND CONMAT[:, -1]
+        cval[[jopt, -1]] = cval[[-1, jopt]]
+        sim[:, -1] += + sim[:, jopt]
+        sim_jopt = sim[:, jopt].copy()
+        sim[:, jopt] = 0
+        sim[:, :num_vars] -= np.tile(sim_jopt, (num_vars, 1)).T
+        # The above update is equivalent to multiplying sim[:, :num_vars] by
+        # from the right side by a matrix whose jopt-th row is [-1, -1, ... -1],
+        # while all other rows are the same as those of the identity matrix. It
+        # is easy to check that the inverse of this matrix is itself. Therefore
+        # simi should be updated by a multiplication with this matrix (i.e. its inverse)
+        # from the left side, as is done in the following line. The jopt-th row of
+        # the updated simi is minus the sum of all rows of the original simi, whereas
+        # all the other rows remain unchanged.
+        simi[jopt, :] - -sum(simi, axis=0)
+        # TODO: I don't quite understand the above comment, I'd like to check it
+        # againt the fortran code
+
+    # Check whether SIMI is a poor approximation to the inverse of sim[:, :num_vars]
+    # Calculate simi from scratch if the current one is damaged by rounding errors.
+    erri = max(abs(simi@sim[:, :num_vars] - np.eye(num_vars)))
+    if erri > 0.1 * itol or np.isnan(erri):
+        simi_test = np.linalg.inv(sim[:, :num_vars])
+        erri_test = max(abs(simi_test@sim[:, :num_vars] - np.eye(num_vars)))
+        if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)):
+            simi = simi_test
+            erri = erri_test
+
+    # If the recalculated simi is still damaged, the restore the data to the
+    # version before the update
+    if erri > itol or np.isnan(erri):
+        info = DAMAGING_ROUNDING
+        fval = fval_old
+        conmat = conmat_old
+        cval = cval_old
+        sim = sim_old
+        simi = simi_old
+
+    return conmat, cval, fval, sim, simi, info
\ No newline at end of file
diff --git a/python/src/prima/common/__init__.py b/python/src/prima/common/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/src/prima/common/consts.py b/python/src/prima/common/consts.py
new file mode 100644
index 0000000000..fde68b38aa
--- /dev/null
+++ b/python/src/prima/common/consts.py
@@ -0,0 +1,22 @@
+import numpy as np
+
+REALMIN = np.finfo(float).tiny
+REALMAX = np.finfo(float).max
+RHOBEG_DEFAULT = 1
+RHOEND_DEFAULT = 1e-6
+FUNCMAX = 2.0**100
+CONSTRMAX = FUNCMAX
+EPS = np.finfo(float).eps
+CTOL_DEFAULT = EPS
+CWEIGHT_DEFAULT = 1e8
+FTARGET_DEFAULT = -REALMAX
+IPRINT_DEFAULT = False
+MAXFUN_DIM_DEFAULT = 500
+INFO_DEFAULT = 0
+SMALL_TR_RADIUS = 0  # Why is this same as default?
+MAXTR_REACHED = 20
+DAMAGING_ROUNDING = 7
+NAN_INF_X = -1
+NAN_INF_F = -2
+FTARGET_ACHIEVED = 1
+MAXFUN_REACHED = 3
\ No newline at end of file
diff --git a/python/src/prima/common/evaluate.py b/python/src/prima/common/evaluate.py
new file mode 100644
index 0000000000..d16cabf419
--- /dev/null
+++ b/python/src/prima/common/evaluate.py
@@ -0,0 +1,36 @@
+import numpy as np
+from prima.common.consts import FUNCMAX, CONSTRMAX
+
+# This is a module evaluating the objective/constraint function with
+# Nan/Inf handling.
+
+
+def moderatex(x):
+    np.nan_to_num(x, copy=False, nan=FUNCMAX)
+    x = np.clip(x, -np.finfo(float).max, np.finfo(float).max)
+    return x
+
+def moderatef(f):
+    f = FUNCMAX if np.isnan(f) else f
+    return min(FUNCMAX, f)
+
+def moderatec(c):
+    np.nan_to_num(c, copy=False, nan=-CONSTRMAX)
+    c = np.clip(c, -CONSTRMAX, CONSTRMAX)
+    return c
+
+
+def evaluate(calfun, x):
+    """
+    This function evaluates calfun at x, setting to f the objective function value.
+    Nan/Inf are handled by a moderated extreme barrier
+    """
+
+    assert not any(np.isnan(x)), "x contains NaN"
+
+    f, constr = calfun(moderatex(x))
+    f = moderatef(f)
+    constr = moderatec(constr)
+    cstrv = max([*-constr, 0])
+
+    return f, constr, cstrv
\ No newline at end of file
diff --git a/python/src/prima/common/linalg.py b/python/src/prima/common/linalg.py
new file mode 100644
index 0000000000..c6dcbec7c7
--- /dev/null
+++ b/python/src/prima/common/linalg.py
@@ -0,0 +1,49 @@
+import numpy as np
+
+def planerot(x):
+    '''
+    As in MATLAB, planerot(x) returns a 2x2 Givens matrix G for x in R2 so that Y=G@x has Y[1] = 0.
+    Roughly speaking, G = np.array([[x[0]/R, x[1]/R], [-x[1]/R, x[0]/R]]), where R = np.linalg.norm(x).
+    0. We need to take care of the possibilities of R=0, Inf, NaN, and over/underflow.
+    1. The G defined above is continuous with respect to X except at 0. Following this definition,
+    G = np.array([[np.sign(x[0]), 0], [0, np.sign(x[0])]]) if x[1] == 0,
+    G = np.array([[0, np.sign(x[1])], [np.sign(x[1]), 0]]) if x[0] == 0
+    Yet some implementations ignore the signs, leading to discontinuity and numberical instability.
+    2. Difference from MATLAB: if x contains NaN of consists of only Inf, MATLAB returns a NaN matrix,
+    but we return an identity matrix or a matrix of +/-np.sqrt(2). We intend to keep G always orthogonal.
+    '''
+    assert len(x) == 2, "x must be a 2-vector"
+
+    # Define c = x[0]/R and s = x[1]/R with R = hypot(x). Handle Inf/NaN
+    # if any(np.isnan(x)):
+        
+    #     c = 1
+    #     s = 0
+    # elif all(np.isinf(x)):
+        
+    # TODO: Handle inf/nan
+    R = np.linalg.norm(x)
+    G = np.array([[x[0]/R, x[1]/R], [-x[1]/R, x[0]/R]])
+    return G
+
+
+def isminor(x, ref):
+    '''
+    This function tests whether x is minor compared to ref. It is used by Powell, e.g., in COBYLA.
+    In precise arithmetic, isminor(x, ref) is true if and only if x == 0; in floating point
+    arithmetic, isminor(x, ref) is true if x is 0 or its nonzero value can be attributed to
+    computer rounding errrors according to ref.
+    Larger sensitivity means the function is more strict/precise, the value 0.1 being due to Powell.
+
+    For example:
+    isminor(1e-20, 1e300) -> True, because in floating point arithmetic 1e-20 cannot be added to
+    1e300 without being rounded to 1e300.
+    isminor(1e300, 1e-20) -> False, because in floating point arithmetic adding 1e300 to 1e-20
+    dominates the latter number.
+    isminor(3, 4) -> False, because 3 can be added to 4 without being rounded off
+    '''
+
+    sensitivity = 0.1
+    refa = abs(ref) + sensitivity * abs(x)
+    refb = abs(ref) + 2 * sensitivity * abs(x)
+    return abs(ref) >= refa or refa >= refb
diff --git a/python/src/prima/common/output.py b/python/src/prima/common/output.py
new file mode 100644
index 0000000000..151b150eb9
--- /dev/null
+++ b/python/src/prima/common/output.py
@@ -0,0 +1,20 @@
+def fmsg(solver, iprint, nf, f, x, cstrv=None, constr=None):
+    if abs(iprint) < 3:
+        return
+    elif iprint > 0:
+        output = "stdout"
+    else:
+        output = "file"
+        filename = solver + "_output.txt"
+
+    # Decide whether the problem is truly constrained.
+    if constr is None:
+        is_constrained = cstrv is not None
+    else:
+        is_constrained = bool(len(constr))
+
+    # Decide the constraint violation
+
+    # Will finish this later, not important for now
+
+
diff --git a/python/src/prima/common/powalg.py b/python/src/prima/common/powalg.py
new file mode 100644
index 0000000000..1a0a469067
--- /dev/null
+++ b/python/src/prima/common/powalg.py
@@ -0,0 +1,109 @@
+import numpy as np
+from prima.common.linalg import isminor, planerot
+from prima.common.consts import EPS
+
+
+def qradd_Rdiag(c, Q, Rdiag, n):
+    '''
+    This function updates the QR factorization of an MxN matrix A of full column rank, attempting to
+    add a new column C to this matrix as the LAST column while maintaining the full-rankness.
+    Case 1. If C is not in range(A) (theoretically, it implies N < M), then the new matrix is np.hstack([A, C])
+    Case 2. If C is in range(A), then the new matrix is np.hstack([A[:, :n-1], C])
+    N.B.:
+    0. Instead of R, this subroutine updates Rdiag, which is np.diag(R), with a size at most M and at
+    least min(m, n+1). The number is min(m, n+1) rather than min(m, n) as n may be augmented by 1 in
+    the function.
+    1. With the two cases specified as above, this function does not need A as an input.
+    2. The function changes only Q[:, nsave:m] (nsave is the original value of n) and
+    R[:, n-1] (n takes the updated value)
+    3. Indeed, when C is in range(A), Powell wrote in comments that "set iOUT to the index of the
+    constraint (here, coloumn of A --- Zaikun) to be deleted, but branch if no suitable index can be
+    found". The idea is to replace a column of A by C so that the new matrix still has full rank
+    (such a column must exist unless C = 0). But his code essentially sets iout=n always. Maybe he
+    found this worked well enough in practice. Meanwhile, Powell's code includes a snippet that can
+    never be reached, which was probably intended to deal with the case that IOUT != n
+    '''
+    m = Q.shape[1]
+    nsave = n  # Needed for debugging (only)
+
+    # As in Powell's COBYLA, CQ is set to 0 at the positions with CQ being negligible as per ISMINOR.
+    # This may not be the best choice if the subroutine is used in other contexts, e.g. LINCOA.
+    cq = c @ Q
+    cqa = abs(c) @ abs(Q)
+    # The line below basically makes an element of cq 0 if adding it to the corresponding element of
+    # cqa does not change the latter.
+    cq = np.array([0 if isminor(cqi, cqai) else cqi for cqi, cqai in zip(cq, cqa)])
+
+    # Update Q so that the columns of Q[:, n+1:m] are orthogonal to C. This is done by applying a 2D
+    # Givens rotation to Q[:, [k, k+1]] from the right to zero C' @ Q[:, k+1] out for K=n+1, ... m-1.
+    # Nothing will be done if n >= m-1
+    for k in range(m-1, n, -1):
+        if abs(cq[k+1]) > 0:
+            # Powell wrote cq[k+1] != 0 instead of abs. The two differ if cq[k+1] is NaN.
+            # If we apply the rotation below when cq[k+1] = 0, then cq[k] will get updated to |cq[k]|.
+            G = planerot(cq[k, k+1])  # TODO: Implement planerot
+            Q[:, [k, k+1]] = Q[:, [k, k+1]] @ G.T
+            cq[k] = np.linalg.norm(cq[k:k+2])
+    # TODO: I have no idea if I'm handling these indices correctly. Will figure it out at debugging.
+
+    # Augment n by 1 if C is not in range(A)
+    # The two ifs cannot be merged as Fortran may evaluate cq[n+1] even if n>=m, leading to a segfault.
+    # TODO: Reword the above as appropriate for Python. RangeError as opposed to segafult perhaps
+    if n < m:
+        # Powell's condition for the following if: cq[n+1] != 0
+        if abs(cq[n+1]) > EPS*2 and not isminor(cq[n+1], cqa[n+1]):
+            n += 1
+
+    # Update Rdiag so that Rdiag[n] = cq[n] = npdot(c, q[:, n]). Note that N may be been augmented.
+    if n >= 1 and n <= m:  # n > m should not happen unless the input is wrong
+        Rdiag[n] = cq[n]
+    return Q, Rdiag, n
+
+
+def qrexc_Rdiag(A, Q, Rdiag, i):  # Used in COBYLA
+    '''
+    This function updates the QR factorization for an MxN matrix A=Q@R so that the updated Q and
+    R form a QR factorization of [A_0, ..., A_{I-1}, A_{I+1}, ..., A_{N-1}, A_I] which is the matrix
+    obtained by rearranging columns [I, I+1, ... N-1] of A to [I+1, ..., N-1, I]. Here A is ASSUMED TO
+    BE OF FULL COLUMN RANK, Q is a matrix whose columns are orthogonal, and R, which is not present,
+    is an upper triangular matrix whose diagonal entries are nonzero. Q and R need not be square.
+    N.B.:
+    0. Instead of R, this function updates Rdiag, which is np.diag(R), the size being n.
+    1. With L = Q.shape[1] = R.shape[0], we have M >= L >= N. Most often L = M or N.
+    2. This function changes only Q[:, i:] and Rdiag[i:]
+    '''
+    m, n = A.shape
+
+
+    if i < 0 or i >= n:
+        return Q, Rdiag
+    
+    # Let R be the upper triangular matrix in the QR factorization, namely R = Q.T@A.
+    # For each k, find the Givens rotation G with G@(R[k:k+2, :]) = [hypt, 0], and update Q[:, k:k+2]
+    # to Q[:, k:k+2]@(G.T). Then R = Q.T@A is an upper triangular matrix as long as A[:, [k, k+1]] is
+    # updated to A[:, [k+1, k]]. Indeed, this new upper triangular matrix can be obtained by first
+    # updating R[[k, k+1], :] to G@(R[[k, k+1], :]) and then exchanging its columns K and K+1; at the same
+    # time, entries k and k+1 of R's diagonal Rdiag become [hypt, -(Rdiag[k+1]/hypt)*RDiag[k]].
+    # After this is done for each k = 0, ..., n-2, we obtain the QR factorization of the matrix that
+    # rearranges columns [i, i+1, ... n-1] of A as [i+1, ..., n-1, i].
+    # Powell's code, however, is slightly different: before everything, he first exchanged columns k and
+    # k+1 of Q (as well as rows k and k+1 of R). This makes sure that the entries of the update Rdiag
+    # are all positive if it is the case for the original Rdiag.
+    # TODO: I need some confidence that the i coming into this function is correct
+    for k in range(i, n):
+        G = planerot(Rdiag[k+1], np.dot(Q[:, k], A[:, k+1]))
+        Q[:, [k, k+1]] = Q[:, [k+1, k]]@(G.T)
+        # Powell's code updates Rdiag in the following way:
+        # hypt = np.sqrt(Rdiag[k+1]**2 + np.dot(Q[:, k], A[:, k+1])**2)
+        # Rdiag[[k, k+1]] = [hypt, (Rdiag[k+1]/hypt)*Rdiag[k]]
+        # Note that Rdiag[n-1] inherits all rounding in Rdiag[i:n-1] and Q[:, i:n-1] and hence contains
+        # significant errors. Thus we may modify Powell's code to set only Rdiag[k] = hypt here and then
+        # calculate Rdiag[n] by an inner product after the loop. Nevertheless, we simple calculate RDiag
+        # from scratch below.
+
+    # Calculate Rdiag from scratch
+    Rdiag = [np.dot(Q[:, k], A[:, k+1]) for k in range(i, n-2)]
+    Rdiag.append(np.dot(Q[:, n], A[:, i]))
+    Rdiag = np.array(Rdiag)
+
+    return Q, Rdiag
\ No newline at end of file
diff --git a/python/src/prima/common/selectx.py b/python/src/prima/common/selectx.py
new file mode 100644
index 0000000000..0edde534e0
--- /dev/null
+++ b/python/src/prima/common/selectx.py
@@ -0,0 +1,179 @@
+'''
+This module provides subroutines that ensure the returned X is optimal among all the calculted
+points in the sense that no other point achieves both lower function value and lower constraint
+violation at the same time. This module is needed only in the constrained case.
+
+Coded by Zaikun ZHANG (www.zhangzk.net).
+
+Python implementation by Nickolai Belakovski
+'''
+
+import numpy as np
+import numpy.typing as npt
+from prima.common.consts import EPS, CONSTRMAX, REALMAX, FUNCMAX
+
+def isbetter00(f1: float, c1: float, f2: float, c2: float, ctol: float) -> bool:
+    isbetter = False
+    isbetter = isbetter or (any(np.isnan([f1, c1])) and not any(np.isnan([f2, c2])))
+    isbetter = isbetter or (f1 < f2 and c1 <= c2)
+    isbetter = isbetter or (f1 <= f2 and c1 < c2)
+    # If C1 <= CTOL and C2 is significantly larger/worse than CTOL, i.e., C2 > MAX(CTOL,CREF),
+    # then FC1 is better than FC2 as long as F1 < REALMAX. Normally CREF >= CTOL so MAX(CTOL, CREF)
+    # is indeed CREF. However, this may not be true if CTOL > 1E-1*CONSTRMAX.
+    cref = 10 * max(EPS, min(ctol, 1.0E-2 * CONSTRMAX))  # The MIN avoids overflow.
+    isbetter = isbetter or (f1 < REALMAX and c1 <= ctol and (c2 > max(ctol, cref) or np.isnan(c2)))
+    return isbetter
+
+
+def isbetter10(f1: npt.NDArray | list[float], c1: npt.NDArray | list[float], f2: float, c2: float, ctol: float) -> bool:
+    isbetter = [isbetter00(f1i, c1i, f2, c2, ctol) for f1i, c1i in zip(f1, c1)]
+    return isbetter
+
+
+def isbetter01(f1: float, c1: float, f2: float, c2: npt.NDArray | list[float], ctol: npt.NDArray | list[float]) -> bool:
+    isbetter = [isbetter00(f1, c1, f2i, c2i, ctol) for f2i, c2i in zip(f2, c2)]
+    return isbetter
+
+
+def savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr=None, confilt=None):
+    '''
+    This subroutine saves x, f, and cstrv in xfilt, ffilt, and cfilt (and constr in confilt
+    if they are present), unless a vector in xfilt[:, :nfilt] is better than X.
+    If X is better than some vectors in XFLIT[:, :nfilt] then these vectors will be
+    removed. If X is not better than any of XFILT[:, :NFILT], but nfilt == maxfilt,
+    then we remove a column from xfilt according to the merit function
+    phi = ffilt + cweight * max (cfilt - ctol, 0)
+    N.B.:
+    1. Only xfilt[:, :nfilt] and ffilt[:, :nfilt] etc contains valid information,
+    while xfilt[:, nfilt+1:maxfilt] and ffilt[:, nfilt+1:maxfilt] etc are not
+    initialized yet.
+    2. We decide whether and X is better than another by the ISBETTER function
+    '''
+    if constr is None:
+        num_constraints = 0
+    else:
+        num_constraints = constr.size
+    num_vars = x.size
+    maxfilt = ffilt.size
+
+    # Return immeditely if any column of xfilt is better than x.
+    if any(isbetter10(ffilt[:nfilt], cfilt[:nfilt], f, cstrv, ctol)):
+        return nfilt, cfilt, ffilt, xfilt, confilt
+    
+    # If NFILT == MAXFILT and X is not better than any column of XFILT, then we remove the worst column
+    # of XFILT according to the merit function PHI = FFILT + CWEIGHT * MAX(CFILT - CTOL, ZERO).
+    keep = [not element for element in isbetter01(f, cstrv, ffilt[:nfilt], cfilt[:nfilt], ctol)]
+    if sum(keep) == maxfilt:  # In this case, NFILT = SIZE(KEEP) = COUNT(KEEP) = MAXFILT > 0.
+        cfilt_shifted = max(cfilt - ctol, 0)
+        if cweight <= 0:
+            phi = ffilt
+        elif np.isposinf(cweight):
+            phi = cfilt_shifted
+            # We should not use CFILT here; if MAX(CFILT_SHIFTED) is attained at multiple indices, then
+            # we will check FFILT to exhaust the remaining degree of freedom.
+        else:
+            phi = max(ffilt, -REALMAX) + cweight * cfilt_shifted
+            # MAX(FFILT, -REALMAX) makes sure that PHI will not contain NaN (unless there is a bug).
+            # TODO: Does that make sense in Python?
+        # We select X to maximize PHI. In case there are multiple maximizers, we take the one with the
+        # largest CSTRV_SHIFTED; if there are more than one choices, we take the one with the largest F;
+        # if there are several candidates, we take the one with the largest CSTRV; if the last comparison
+        # still leads to more than one possibilities, then they are equally bad and we choose the first.
+        # N.B.:
+        # 1. This process is the opposite of selecting KOPT in SELECTX.
+        # 2. In finite-precision arithmetic, PHI_1 == PHI_2 and CSTRV_SHIFTED_1 == CSTRV_SHIFTED_2 do
+        # not ensure that F_1 == F_2!
+        phimax = max(phi)
+        cref = max(cfilt_shifted[phi >= phimax])
+        fref = max(ffilt[cfilt_shifted >= cref])
+        kworst = np.where(cfilt == max(cfilt[ffilt <= fref]))[0][0]
+        if kworst < 0 or kworst >= len(keep):  #  For security. Should not happen.
+            kworst = 0
+        keep[kworst] = False
+
+    # TODO: This code doesn't make much sense to me. I'll have
+    # to step through the fortran code to see what's going on
+    nfilt = sum(keep)
+    index_to_keep = np.zeros(len(ffilt))
+    index_to_keep[:nfilt] = np.where(keep)[0]
+    xfilt[:, :nfilt] = x[:, index_to_keep]
+    ffilt[:nfilt] = f[index_to_keep]
+    cfilt[:nfilt] = cstrv[index_to_keep]
+    if confilt is not None and constr is not None:
+        confilt[:, :nfilt] = confilt[:, index_to_keep]
+
+    nfilt += 1
+    xfilt[:, nfilt] = x
+    ffilt[nfilt] = f
+    cfilt[nfilt] = cstrv
+    if confilt is not None and constr is not None:
+        confilt[:, nfilt] = constr
+
+    return nfilt, cfilt, ffilt, xfilt, confilt
+
+
+def selectx(fhist: npt.NDArray, chist: npt.NDArray, cweight: float, ctol: float):
+    '''
+    This subroutine selects X according to fhist and chist, which represents (a part of) history
+    of f and cstrv. Normally, fhist and chist are not the full history but only a filter, e.g. ffilt
+    and cfilt generated by savefilt. However, we name them as fhist and chist because the [f, cstrv]
+    in a filter should not dominate each other, but this subroutine does NOT assume such a property.
+    N.B.: ctol is the tolerance of the constraint violation (cstrv). A point is considered feasible if
+    its constraint violation is at most ctol. Not that ctol is absolute, not relative.
+    '''
+
+    nhist = len(fhist)
+
+    # We select X among the points with f < fref and cstrv < cref.
+    # Do NOT use f <= fref, because f == fref (funcmax or realmax) may mean f == inf in practice!
+    if any(np.logical_and(fhist < FUNCMAX, chist < CONSTRMAX)):
+        fref = FUNCMAX
+        cref = CONSTRMAX
+    elif any(np.logical_and(fhist < REALMAX, chist < CONSTRMAX)):
+        fref = REALMAX
+        cref = CONSTRMAX
+    elif any(np.logical_and(fhist < FUNCMAX, chist < REALMAX)):
+        fref = FUNCMAX
+        cref = REALMAX
+    else:
+        fref = REALMAX
+        cref = REALMAX
+
+    if not any(np.logical_and(fhist < fref, chist < cref)):
+        kopt = nhist - 1
+    else:
+        # Shift the constraint violations by ctol, so that cstrv <= ctol is regarded as no violation.
+        chist_shifted = np.maximum(chist - ctol, 0)
+        # cmin is the minimal shift constraint violation attained in the history.
+        cmin = min(chist_shifted[fhist < fref])
+        # We consider only the points whose shifted constraint violations are at most the cref below.
+        # N.B.: Without taking max(EPS, .), cref would be 0 if cmin = 0. In that case, asking for
+        # cstrv_shift < cref would be WRONG!
+        cref = max(EPS, 2*cmin)
+        # We use the following phi as our merit function to select X.
+        if cweight <= 0:
+            phi = fhist
+        elif np.isposinf(cweight):
+            phi = chist_shifted
+            # We should not use chist here; if min(chist_shifted) is attained at multiple indices, then
+            # we will check fhist to exhaust the remaining degree of freedom.
+        else:
+            phi = max(fhist, -REALMAX) + cweight * chist_shifted
+            # max(fhist, -REALMAX) makes sure that phi will not contain NaN (unless there is a bug).
+        
+        # We select X to minimize phi subject to f < fref and cstrv_shift <= cref (see the comments
+        # above for the reason of taking "<" and "<=" in these two constraints). In case there are
+        # multiple minimizers, we take the one with the least cstrv_shift; if there is more than one
+        # choice, we take the one with the least f; if there are several candidates, we take the one
+        # with the least cstrv; if the last comparison still leads to more than one possibility, then
+        # they are equally good and we choose the first.
+        # N.B.:
+        # 1. This process is the opposite of selecting kworst in savefilt
+        # 2. In finite-precision arithmetic, phi_2 == phi_2 and cstrv_shift_1 == cstrv_shifted_2 do
+        # not ensure thatn f_1 == f_2!
+        phimin = min(phi[np.logical_and(fhist < fref, chist_shifted <= cref)])
+        cref = min(chist_shifted[np.logical_and(fhist < fref, phi <= phimin)])
+        fref = min(fhist[chist_shifted <= cref])
+        kopt = np.where(chist == min(chist[fhist <= fref]))[0][0]
+
+    return kopt
\ No newline at end of file
diff --git a/python/src/prima/examples/cobyla/cobyla_example.py b/python/src/prima/examples/cobyla/cobyla_example.py
new file mode 100644
index 0000000000..365fe7488f
--- /dev/null
+++ b/python/src/prima/examples/cobyla/cobyla_example.py
@@ -0,0 +1,322 @@
+import numpy as np
+from prima.common.evaluate import evaluate
+from prima.common.consts import EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, \
+    CTOL_DEFAULT, CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT, \
+    MAXFUN_DIM_DEFAULT
+from prima.cobyla.cobylb import cobylb
+
+debugging = True
+
+def test_chebyquad():
+    f, _ = calcfc_chebyquad([1, 2])
+    assert np.isclose(f, 91+1/9, atol=1e-6)
+
+
+def calcfc_chebyquad(x):
+    x = np.array(x)
+    n = len(x)  # or shape?
+    y = np.zeros((n + 1, n + 1))
+    y[0:n, 0] = 1
+    y[0:n, 1] = 2*x - 1
+    for i in range(1, n):
+        y[0:n, i+1] = 2*y[0:n, 1] * y[0:n, i] - y[0:n, i - 1]
+
+    f = 0
+    for i in range(n+1):
+        tmp = sum(y[0:n, i]) / n
+        if i % 2 == 0:
+            tmp += 1/((i)**2 - 1)
+        f += tmp**2
+    constr = 0
+    return f, constr
+
+def calcfc_hexagon(x):
+    # Test problem 10 in Powell's original algorithm
+    
+    assert len(x) == 9
+    
+    f = -0.5 * (x[0] * x[3] - x[1] * x[2] + x[2] * x[8] - x[4] * x[8] + x[4] * x[7] - x[5] * x[6])
+    constr = np.zeros(14)
+    constr[0] = 1 - x[2]**2 - x[3]**2
+    constr[1] = 1 - x[8]**2
+    constr[2] = 1 - x[4]**2 - x[5]**2
+    constr[3] = 1 - x[1-1]**2 - (x[1] - x[8])**2
+    constr[4] = 1 - (x[1-1] - x[4])**2 - (x[1] - x[5])**2
+    constr[5] = 1 - (x[1-1] - x[6])**2 - (x[1] - x[7])**2
+    constr[6] = 1 - (x[2] - x[4])**2 - (x[3] - x[5])**2
+    constr[7] = 1 - (x[2] - x[6])**2 - (x[3] - x[7])**2
+    constr[8] = 1 - x[6]**2 - (x[7] - x[8])**2
+    constr[9] = x[1-1] * x[3] - x[1] * x[2]
+    constr[10] = x[2] * x[8]
+    constr[11] = -x[4] * x[8]
+    constr[12] = x[4] * x[7] - x[5] * x[6]
+    constr[13] = x[8]
+
+    return f, constr
+
+def cobyla(calcfc, m, x, f, cstrv=0, constr=0, f0=None, constr0=None, nf=None, rhobeg=None,
+           rhoend=None, ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT,
+           maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None, gamma1=0.5, gamma2=2,
+           xhist=False, fhist=False, chist=False, conhist=False, maxhist=0, maxfilt=2000):
+    """
+ Among all the arguments, only CALCFC, M, X, and F are obligatory. The others are OPTIONAL and you
+ can neglect them unless you are familiar with the algorithm. Any unspecified optional input will
+ take the default value detailed below. For instance, we may invoke the solver as follows.
+
+ ! First define CALCFC, M, and X, and then do the following.
+ call cobyla(calcfc, m, x, f)
+
+ or
+
+ ! First define CALCFC, M, and X, and then do the following.
+ call cobyla(calcfc, m, x, f, rhobeg = 0.5D0, rhoend = 1.0D-3, maxfun = 100)
+
+ ! IMPORTANT NOTICE: The user must set M correctly to the number of constraints, namely the size of
+ ! CONSTR introduced below. Set M to 0 if there is no constraint.
+
+ See examples/cobyla_exmp.py for a concrete example.
+
+ A detailed introduction to the arguments is as follows.
+ N.B.: RP and IK are defined in the module CONSTS_MOD. See consts.F90 under the directory named
+ "common". By default, RP = kind(0.0D0) and IK = kind(0), with REAL(RP) being the double-precision
+ real, and INTEGER(IK) being the default integer. For ADVANCED USERS, RP and IK can be defined by
+ setting PRIMA_REAL_PRECISION and PRIMA_INTEGER_KIND in common/ppf.h. Use the default if unsure.
+
+ CALCFC
+   Input, subroutine.
+   CALCFC(X, F, CONSTR) should evaluate the objective function and constraints at the given
+   REAL(RP) vector X; it should set the objective function value to the REAL(RP) scalar F and the
+   constraint value to the REAL(RP) vector CONSTR. It must be provided by the user, and its
+   definition must conform to the following interface:
+   !-------------------------------------------------------------------------!
+    subroutine calcfc(x, f, constr)
+    real(RP), intent(in) :: x(:)
+    real(RP), intent(out) :: f
+    real(RP), intent(out) :: constr(:)
+    end subroutine calcfc
+   !-------------------------------------------------------------------------!
+   Besides, the subroutine should NOT access CONSTR beyond CONSTR(1:M), where M is the second
+   compulsory argument (see below), signifying the number of constraints.
+
+ M
+   Input, INTEGER(IK) scalar.
+   M must be set to the number of constraints, namely the size (length) of CONSTR(X).
+   N.B.:
+   1. M must be specified correctly, or the program will crash!!!
+   2. Why don't we define M as optional and default it to 0 when it is absent? This is because
+   we need to allocate memory for CONSTR_LOC according to M. To ensure that the size of CONSTR_LOC
+   is correct, we require the user to specify M explicitly.
+
+ X
+   Input and output, REAL(RP) vector.
+   As an input, X should be an N-dimensional vector that contains the starting point, N being the
+   dimension of the problem. As an output, X will be set to an approximate minimizer.
+
+ F
+   Output, REAL(RP) scalar.
+   F will be set to the objective function value of X at exit.
+
+ CSTRV
+   Output, REAL(RP) scalar.
+   CSTRV will be set to the constraint violation of X at exit, i.e., MAXVAL([-CONSTR(X), 0]).
+
+ CONSTR
+   Output, ALLOCATABLE REAL(RP) vector.
+   CONSTR will be set to the constraint value of X at exit.
+
+ F0
+   Input, REAL(RP) scalar.
+   F0, if present, should be set to the objective function value of the starting X.
+
+ CONSTR0
+   Input, REAL(RP) vector.
+   CONSTR0, if present, should be set to the constraint value of the starting X; in addition,
+   SIZE(CONSTR0) must be M, or the solver will abort.
+
+ NF
+   Output, INTEGER(IK) scalar.
+   NF will be set to the number of calls of CALCFC at exit.
+
+ RHOBEG, RHOEND
+   Inputs, REAL(RP) scalars, default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be set to
+   the initial and final values of a trust-region radius, both being positive and RHOEND <= RHOBEG.
+   Typically RHOBEG should be about one tenth of the greatest expected change to a variable, and
+   RHOEND should indicate the accuracy that is required in the final values of the variables.
+
+ FTARGET
+   Input, REAL(RP) scalar, default: -Inf.
+   FTARGET is the target function value. The algorithm will terminate when a feasible point with a
+   function value <= FTARGET is found.
+
+ CTOL
+   Input, REAL(RP) scalar, default: machine epsilon.
+   CTOL is the tolerance of constraint violation. Any X with MAXVAL(-CONSTR(X)) <= CTOL is
+   considered feasible.
+   N.B.: 1. CTOL is absolute, not relative. 2. CTOL is used only when selecting the returned X.
+   It does not affect the iterations of the algorithm.
+
+ CWEIGHT
+   Input, REAL(RP) scalar, default: CWEIGHT_DFT defined in the module CONSTS_MOD in common/consts.F90.
+   CWEIGHT is the weight that the constraint violation takes in the selection of the returned X.
+
+ MAXFUN
+   Input, INTEGER(IK) scalar, default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in the module
+   CONSTS_MOD (see common/consts.F90). MAXFUN is the maximal number of calls of CALCFC.
+
+ IPRINT
+   Input, INTEGER(IK) scalar, default: 0.
+   The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls how much
+   information will be printed during the computation:
+   0: there will be no printing;
+   1: a message will be printed to the screen at the return, showing the best vector of variables
+      found and its objective function value;
+   2: in addition to 1, each new value of RHO is printed to the screen, with the best vector of
+      variables so far and its objective function value; each new value of CPEN is also printed;
+   3: in addition to 2, each function evaluation with its variables will be printed to the screen;
+   -1, -2, -3: the same information as 1, 2, 3 will be printed, not to the screen but to a file
+      named COBYLA_output.txt; the file will be created if it does not exist; the new output will
+      be appended to the end of this file if it already exists.
+   Note that IPRINT = +/-3 can be costly in terms of time and/or space.
+
+ ETA1, ETA2, GAMMA1, GAMMA2
+   Input, REAL(RP) scalars, default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2.
+   ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the trust-region radius
+   detailed in the subroutine TRRAD in trustregion.f90. Roughly speaking, the trust-region radius
+   is contracted by a factor of GAMMA1 when the reduction ratio is below ETA1, and enlarged by a
+   factor of GAMMA2 when the reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2
+   < 1 and 0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set ETA1 >= 0.5.
+
+ XHIST, FHIST, CHIST, CONHIST, MAXHIST
+   XHIST: Output, ALLOCATABLE rank 2 REAL(RP) array;
+   FHIST: Output, ALLOCATABLE rank 1 REAL(RP) array;
+   CHIST: Output, ALLOCATABLE rank 1 REAL(RP) array;
+   CONHIST: Output, ALLOCATABLE rank 2 REAL(RP) array;
+   MAXHIST: Input, INTEGER(IK) scalar, default: MAXFUN
+   XHIST, if present, will output the history of iterates; FHIST, if present, will output the
+   history function values; CHIST, if present, will output the history of constraint violations;
+   CONHIST, if present, will output the history of constraint values; MAXHIST should be a
+   nonnegative integer, and XHIST/FHIST/CHIST/CONHIST will output only the history of the last
+   MAXHIST iterations. Therefore, MAXHIST= 0 means XHIST/FHIST/CONHIST/CHIST will output nothing,
+   while setting MAXHIST = MAXFUN requests XHIST/FHIST/CHIST/CONHIST to output all the history.
+   If XHIST is present, its size at exit will be (N, min(NF, MAXHIST)); if FHIST is present, its
+   size at exit will be min(NF, MAXHIST); if CHIST is present, its size at exit will be
+   min(NF, MAXHIST); if CONHIST is present, its size at exit will be (M, min(NF, MAXHIST)).
+
+   IMPORTANT NOTICE:
+   Setting MAXHIST to a large value can be costly in terms of memory for large problems.
+   MAXHIST will be reset to a smaller value if the memory needed exceeds MAXHISTMEM defined in
+   CONSTS_MOD (see consts.F90 under the directory named "common").
+   Use *HIST with caution!!! (N.B.: the algorithm is NOT designed for large problems).
+
+ MAXFILT
+   Input, INTEGER(IK) scalar.
+   MAXFILT is a nonnegative integer indicating the maximal length of the filter used for selecting
+   the returned solution; default: MAXFILT_DFT (a value lower than MIN_MAXFILT is not recommended);
+   see common/consts.F90 for the definitions of MAXFILT_DFT and MIN_MAXFILT.
+
+ INFO
+   Output, INTEGER(IK) scalar.
+   INFO is the exit flag. It will be set to one of the following values defined in the module
+   INFOS_MOD (see common/infos.f90):
+   SMALL_TR_RADIUS: the lower bound for the trust region radius is reached;
+   FTARGET_ACHIEVED: the target function value is reached;
+   MAXFUN_REACHED: the objective function has been evaluated MAXFUN times;
+   MAXTR_REACHED: the trust region iteration has been performed MAXTR times (MAXTR = 2*MAXFUN);
+   NAN_INF_X: NaN or Inf occurs in X;
+   DAMAGING_ROUNDING: rounding errors are becoming damaging.
+   !--------------------------------------------------------------------------!
+   The following case(s) should NEVER occur unless there is a bug.
+   NAN_INF_F: the objective function returns NaN or +Inf;
+   NAN_INF_MODEL: NaN or Inf occurs in the model;
+   TRSUBP_FAILED: a trust region step failed to reduce the model
+   !--------------------------------------------------------------------------!
+    """
+    if debugging:
+        # TODO: need to check how to figure out if python variables are present
+        # I guess the way to go is to set the default to None
+        assert (f0 is None) == (constr0 is None), "f0 and constr0 must be both present or both absent"
+
+    n = len(x)
+
+    assert len(constr0) == m if constr0 is not None else True, "len(constr0) != m"
+
+    constr_loc = np.zeros(m)
+    if f0 is not None and constr0 is not None and all(np.isfinite(x)):
+        f = f0
+        constr_loc = constr0
+    else:
+        # Replace all NaNs with 0s and clip all -inf/inf to -floatmax/floatmax
+        np.nan_to_num(x, copy=False, neginf=-np.finfo(float).max, posinf=np.finfo(float).max)
+        f, constr_loc, cstrv_loc = evaluate(calcfc, x)
+    
+    if rhobeg is not None:
+        rhobeg_loc = rhobeg
+    elif rhoend is not None and np.isfinite(rhoend) and rhoend > 0:
+        rhobeg_loc = max(10*rhoend, RHOBEG_DEFAULT)
+    else:
+        rhobeg_loc = RHOBEG_DEFAULT
+
+    if rhoend is not None:
+        rhoend_loc = rhoend
+    elif rhobeg_loc > 0:
+        rhoend_loc = max(EPS, min(0.1*rhobeg_loc, RHOEND_DEFAULT))
+    else:
+        rhoend_loc = RHOEND_DEFAULT
+
+    maxfun = maxfun if maxfun is not None else MAXFUN_DIM_DEFAULT*n
+
+    if eta1 is not None:
+        eta1_loc = eta1
+    elif eta2 is not None and 0 < eta2 < 1:
+        eta1_loc = max(EPS, eta2/7)
+    else:
+        eta1_loc = 0.1
+
+    if eta2 is not None:
+        eta2_loc = eta2
+    else:
+        if 0 < eta1_loc < 1:
+            eta2_loc = (eta1_loc + 2)/3
+        else:
+            eta2_loc = 0.7
+
+    if maxhist is not None:
+        maxhist_loc = maxhist
+    else:
+        maxhist_loc = max(maxfun, n+2, MAXFUN_DIM_DEFAULT*n)
+
+    # preprocess
+
+    '''
+    ! Further revise MAXHIST_LOC according to MAXHISTMEM, and allocate memory for the history.
+    ! In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize
+    ! CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN)
+    ! if they are requested; replace MAXFUN with 0 for the history that is not requested.
+    call prehist(maxhist_loc, n, present(xhist), xhist_loc, present(fhist), fhist_loc, &
+        & present(chist), chist_loc, m, present(conhist), conhist_loc)
+    '''
+
+    # call coblyb, which performs the real calculations
+    x, f, nf, info = cobylb(calcfc, iprint, maxfilt, maxfun, ctol, cweight, eta1_loc, eta2_loc,
+      ftarget, gamma1, gamma2, rhobeg_loc, rhoend_loc, constr_loc, f, x, 
+      cstrv_loc)
+    
+    # Write the outputs.
+    # TODO: Pick up from here, to finish writing the cobyla top level routine, and then we can start testing
+    # We'll be able to test up until the trust region calls, until I hear back from Zaikun about how to handle them
+    return x, f
+
+
+if __name__ == "__main__":
+    # n_chebyquad = 6
+    # x_chebyquad = [i/n_chebyquad for i in range(1, n_chebyquad+1)]
+    m = 0
+    f = 0
+    cstrv = 0
+    # x, f = cobyla(calcfc_chebyquad, m, x_chebyquad, f, cstrv)
+    # print(x, f)
+    x_hexagon = np.zeros(9) + 2
+    m_hexagon = 14
+    f = 0
+    cstrv = 0  # Should this b e a length 14 array? Of inf?
+    x, f = cobyla(calcfc_hexagon, m_hexagon, x_hexagon, f, cstrv)
\ No newline at end of file