6.1 等式约束的牛顿法

import numpy as np
import cvxpy as cp
import matplotlib.pyplot as plt

考虑课本的例子，问题是

\text{minimize}\ f(x) = - \sum_{i=1}^{n} \log(x_i)

(1)

其中约束条件是

A x = b

(2)

因为 $\log(x_i)$ 要求 $x_i>0$ ，下面的回溯线搜索会显式检查定义域。

class EqualityConstrainedProblem:
    # 将目标函数、导数、Hessian 和等式约束 Ax=b 封装在同一个类中。
    def __init__(self, f, df, hess, A, b, is_in_domain=None):
        # is_in_domain 用来检查目标函数定义域；本例中要求所有坐标都为正。
        self.f = f
        self.df = df
        self.hess = hess
        self.A = np.asarray(A, dtype=float)
        self.b = np.asarray(b, dtype=float)
        self.p, self.n = self.A.shape
        self.is_in_domain = is_in_domain or (lambda x: True)

        # 约束数量必须和 b 的长度一致。
        if self.b.shape != (self.p,):
            raise ValueError("b must have shape (p,).")

    def newton_step(self, x, nu=None):
        # 求解课件中的 KKT 线性方程组，得到 Newton 方向。
        # 可行初始点方法只需要 dx；不可行初始点方法还需要 dnu。
        if not self.is_in_domain(x):
            raise ValueError("Newton step is only defined inside the domain of f.")

        n = self.n
        p = self.p
        # KKT 矩阵为 [[H, A^T], [A, 0]]，其中 H 是 Hessian。
        kkt_matrix = np.zeros((n + p, n + p))
        kkt_matrix[:n, :n] = self.hess(x)
        kkt_matrix[:n, n:] = self.A.T
        kkt_matrix[n:, :n] = self.A

        # 右端向量对应 [-grad f(x), b - A x]。
        rhs = np.zeros(n + p)
        rhs[:n] = -self.df(x)
        rhs[n:] = self.b - self.A @ x

        solution = np.linalg.solve(kkt_matrix, rhs)
        dx = solution[:n]
        omega = solution[n:]

        if nu is not None:
            # 不可行初始点方法中，KKT 解给出 omega = nu + dnu。
            return dx, omega - nu
        return dx

    def primal_residual(self, x):
        # 衡量当前点违反等式约束 Ax=b 的程度。
        return np.linalg.norm(self.A @ x - self.b)

    def backtracking_line_search(self, x, dx, alpha, beta, max_line_search=100):
        # Armijo 回溯线搜索：既要求函数下降，也要求试探点仍在定义域内。
        fx = self.f(x)
        directional_derivative = np.dot(self.df(x), dx)
        t = 1.0

        for k in range(max_line_search + 1):
            xt = x + t * dx
            if self.is_in_domain(xt):
                # 先检查定义域，再计算 f(xt)，避免 log 遇到非正数。
                sufficient_decrease = self.f(xt) <= fx + alpha * t * directional_derivative
                if sufficient_decrease:
                    return t, k
            t *= beta

        raise RuntimeError("Line search failed to find a domain-feasible step.")

    def newton_with_linear_constraint(self, x, alpha, beta, epsilon, maxiter=100):
        # 可行初始点 Newton 法：要求初始点已经满足 Ax=b。
        x = np.asarray(x, dtype=float).copy()
        if self.primal_residual(x) > 1.0e-10:
            raise ValueError("Feasible-start Newton needs A @ x == b.")

        x_list = [x.copy()]
        for _ in range(maxiter):
            dx = self.newton_step(x)
            # Newton decrement 的平方：lambda(x)^2 = dx^T H dx。
            lambda_sq = np.dot(dx, self.hess(x) @ dx)
            if lambda_sq / 2 <= epsilon:
                break

            # 因为 A dx=0，任意步长都会保持 Ax=b；线搜索只负责下降和定义域。
            t, _ = self.backtracking_line_search(x, dx, alpha, beta)
            x = x + t * dx
            x_list.append(x.copy())
        return x, x_list

    def r(self, x, nu):
        # 原对偶残差 r(x, nu)，同时包含驻点误差和可行性误差。
        if not self.is_in_domain(x):
            return np.inf

        residual = (
            np.linalg.norm(self.df(x) + self.A.T @ nu) ** 2
            + self.primal_residual(x) ** 2
        )
        return np.sqrt(residual)

    def newton_with_infeasible_start(
        self, x, nu, alpha, beta, epsilon, eta=None, maxiter=100, max_line_search=100
    ):
        # 不可行初始点 Newton 法：x 不必满足 Ax=b，但必须在 f 的定义域内。
        # eta 是单独的可行性阈值；若不指定，则默认和 epsilon 相同。
        eta = epsilon if eta is None else eta
        x = np.asarray(x, dtype=float).copy()
        nu = np.asarray(nu, dtype=float).copy()
        x_list = [x.copy()]
        nu_list = [nu.copy()]

        for _ in range(maxiter):
            r0 = self.r(x, nu)
            # 同时检查 KKT 残差和等式约束残差，对应课件中的两个停止条件。
            if r0 <= epsilon and self.primal_residual(x) <= eta:
                break

            dx, dnu = self.newton_step(x, nu=nu)
            t = 1.0
            for _ in range(max_line_search + 1):
                xt = x + t * dx
                nut = nu + t * dnu
                if self.is_in_domain(xt):
                    # 这里用残差下降作为线搜索标准，而不是直接比较目标函数值。
                    residual_decrease = self.r(xt, nut) <= (1 - alpha * t) * r0
                    if residual_decrease:
                        break
                t *= beta
            else:
                raise RuntimeError(
                    "Line search failed to find a domain-feasible residual-decreasing step."
                )

            x = x + t * dx
            nu = nu + t * dnu
            x_list.append(x.copy())
            nu_list.append(nu.copy())

        return x, x_list, nu, nu_list

np.random.seed(1)

n = 500
p = 100
A = np.random.rand(p, n)
assert np.linalg.matrix_rank(A) == p, "Not full rank"
x0 = np.random.rand(n)
b = A @ x0

先使用cvxpy来求解¶

cp_x = cp.Variable(n)
constraints = [A @ cp_x == b]
obj = cp.Minimize(-cp.sum(cp.log(cp_x)))
cvx_prob = cp.Problem(obj, constraints)
cvx_prob.solve()
print("status:", cvx_prob.status)
print("optimal value = {:.4f}".format(cvx_prob.value))

status: optimal
optimal value = 351.0778

使用等式约束的牛顿法¶

alpha = 0.1
beta = 0.5
tol = 1.0e-7
eta = 1.0e-8

def is_positive(x):
    # log barrier 的定义域：每个坐标都必须大于 0。
    return np.all(x > 0)

def f(x):
    # 目标函数 f(x) = -sum log(x_i)，定义域外返回无穷大。
    if not is_positive(x):
        return np.inf
    return -np.sum(np.log(x))

def df(x):
    # 梯度：第 i 个分量为 -1 / x_i。
    return -1/x

def hess(x):
    # Hessian 是对角矩阵，第 i 个对角元为 1 / x_i^2。
    return np.diag(1/x**2)

problem = EqualityConstrainedProblem(f, df, hess, A, b, is_in_domain=is_positive)

final_x_ln, x_list_ln = problem.newton_with_linear_constraint(x0, alpha, beta, tol)

p_star_ln = f(final_x_ln)
print("Optimal value = {:.4f}".format(p_star_ln))
print("Iterations = {}".format(len(x_list_ln) - 1))
print("Final feasibility residual = {:.2e}".format(problem.primal_residual(final_x_ln)))

plt.figure(figsize=(4,3))
objective_gaps = [max(f(item) - p_star_ln, np.finfo(float).tiny) for item in x_list_ln[:-1]]
plt.semilogy(objective_gaps, 'o-')
plt.xlabel("iteration")
plt.ylabel("objective gap")

Optimal value = 351.0778
Iterations = 14
Final feasibility residual = 6.07e-13

使用不可行初始点牛顿法¶

x0_infeasible = 0.5 * x0 + 0.1
nu0 = np.random.randn(p)

print("Initial feasibility residual = {:.2e}".format(problem.primal_residual(x0_infeasible)))
final_x_pd, x_list_pd, final_nu_pd, nu_list_pd = problem.newton_with_infeasible_start(
    x0_infeasible, nu0, alpha, beta, tol, eta=eta
)

p_star_pd = f(final_x_pd)
print("Optimal value = {:.4f}".format(p_star_pd))
print("Iterations = {}".format(len(x_list_pd) - 1))
print("Final KKT residual = {:.2e}".format(problem.r(final_x_pd, final_nu_pd)))
print("Final feasibility residual = {:.2e}".format(problem.primal_residual(final_x_pd)))

plt.figure(figsize=(4,3))
kkt_residuals = [problem.r(x, nu) for x, nu in zip(x_list_pd, nu_list_pd)]
feasibility_residuals = [problem.primal_residual(x) for x in x_list_pd]
plt.semilogy(kkt_residuals, 'o-', label="KKT residual")
plt.semilogy(feasibility_residuals, 's-', label="feasibility")
plt.xlabel("iteration")
plt.ylabel("residual")
plt.legend()

Initial feasibility residual = 3.86e+02
Optimal value = 351.0778
Iterations = 7
Final KKT residual = 6.32e-13
Final feasibility residual = 6.32e-13

def relative_error(x, x_ref):
    return np.linalg.norm(x - x_ref) / np.linalg.norm(x_ref)

print("Compare cvxpy and newton (with linear constraints)")
print("Relative error of optimal x = {:.1E}".format(relative_error(final_x_ln, cp_x.value)))
print("Minimum coordinate of x = {:.2e}\n".format(np.min(final_x_ln)))

print("Compare cvxpy and newton (with infeasible start)")
print("Relative error of optimal x = {:.1E}".format(relative_error(final_x_pd, cp_x.value)))
print("Minimum coordinate of x = {:.2e}".format(np.min(final_x_pd)))

Compare cvxpy and newton (with linear constraints)
Relative error of optimal x = 9.1E-07
Minimum coordinate of x = 2.84e-01

Compare cvxpy and newton (with infeasible start)
Relative error of optimal x = 9.1E-07
Minimum coordinate of x = 2.84e-01