from math import sqrt

import matplotlib.pyplot as plt
import torch
from torchfem.utils import plot_contours

def f(a):
    return a[..., 0] + a[..., 1]

def g(a):
    return (
        8 / (16 * a[..., 0] + 9 * a[..., 1])
        - 4.5 / (9 * a[..., 0] + 16 * a[..., 1])
        - 0.1
    )

def CONLIN(func, a_k):
    a_lin = a_k.clone().requires_grad_()
    gradients = torch.autograd.grad(func(a_lin), a_lin)[0]

    def approximation(a):
        res = func(a_k)
        for j, grad in enumerate(gradients):
            if grad < 0.0:
                Gamma = a_k[j] / a[j]
            else:
                Gamma = 1.0
            res += grad * Gamma * (a[j] - a_k[j])
        return res

    return approximation

def test_function(x):
    return 5.0 / x + 2.0 * x


x = torch.linspace(0, 10, 100)[:, None]
x_0 = torch.tensor([1.0])
test_approximation = CONLIN(test_function, x_0)
plt.plot(x, test_function(x), label="f(x)")
plt.plot(x, [test_approximation(x_i) for x_i in x], label=f"CONLIN at x={x_0.item()}")
plt.axvline(x_0, color="tab:orange", linestyle="--")
plt.legend()
plt.xlabel("x")
plt.ylabel("f(x)")
plt.xlim(0, 10)
plt.ylim(0, 25)
plt.show()

def box_constrained_decent(
    func, x_init, x_lower, x_upper, eta=0.1, max_iter=100, **extra_args
):
    x = x_init.clone().requires_grad_()
    for _ in range(max_iter):
        grad = torch.autograd.grad(func(x, **extra_args), x)[0]
        x = x - eta * grad
        x = torch.clamp(x, x_lower, x_upper)
    return x

# Define the initial values, lower bound, and upper bound of "a"
a_0 = torch.tensor([2.0, 1.0], requires_grad=True)
a_lower = torch.tensor([0.2, 0.2])
a_upper = torch.tensor([2.5, 2.5])

# Define the initial value, lower bound, and upper bound of "mu"
mu_0 = torch.tensor([10.0])
mu_lower = torch.tensor([1e-10])
mu_upper = None

# Save intermediate values
a = [a_0]

for k in range(3):
    # Compute the current approximation function:
    g_tilde = CONLIN(g, a[k])

    # Define the Lagrangian
    def lagrangian(a, mu):
        return f(a) + mu * g_tilde(a)

    # Define a_star by minimizing the Lagrangian w. r. t. a numerically
    def a_star(mu):
        return box_constrained_decent(lagrangian, a[k], a_lower, a_upper, mu=mu)

    # Define (-1 times) the dual function
    def dual_function(mu):
        return -lagrangian(a_star(mu), mu)

    # Compute the maximum of the dual function
    mu_k = box_constrained_decent(dual_function, mu_0, mu_lower, mu_upper, eta=10.0)

    # Compute the next a_k from mu_k and append it to a
    a.append(a_star(mu_k))

# Plotting domain
a_1 = torch.linspace(0.1, 3.0, 200)
a_2 = torch.linspace(0.1, 3.0, 200)
a_grid = torch.stack(torch.meshgrid(a_1, a_2, indexing="xy"), dim=2)

# Make a plot
plot_contours(
    a_grid,
    f(a_grid),
    paths={"CONLIN": a},
    box=[a_lower, a_upper],
    opti=[a[-1][0], a[-1][1]],
    figsize=(5, 5),
)
plt.contour(a_1, a_2, g(a_grid), [0], colors="k", linewidths=3)
plt.contourf(a_1, a_2, g(a_grid), [0, 1], colors="gray", alpha=0.5)
plt.show()

# Save intermediate values
a = [a_0]

for k in range(3):
    # Compute the current approximation function:
    g_tilde = CONLIN(g, a[k])

    # Define the Lagrangian
    def lagrangian(a, mu):
        return f(a) + mu * g_tilde(a)

    # Define a_star by minimizing the Lagrangian w. r. t. a analytically
    a_lin = a[k].clone().requires_grad_()
    gradients = torch.autograd.grad(g(a_lin), a_lin)[0]

    def a_star(mu):
        a_hat = torch.zeros_like(gradients)
        pg = gradients >= 0
        ng = gradients < 0
        a_hat[pg] = a_lower[pg]
        a_hat[ng] = torch.sqrt(-mu * gradients[ng] * a[k][ng] ** 2)
        return torch.clamp(a_hat, a_lower, a_upper)

    # Define (-1 times) the dual function
    def dual_function(mu):
        return -lagrangian(a_star(mu), mu)

    # Compute the maximum of the dual function
    mu_k = box_constrained_decent(dual_function, mu_0, mu_lower, mu_upper, eta=10.0)

    # Compute the next a_k from mu_k and append it to a
    a.append(a_star(mu_k))

# Make a plot
plot_contours(
    a_grid,
    f(a_grid),
    paths={"CONLIN analytical": a},
    box=[a_lower, a_upper],
    opti=[a[-1][0], a[-1][1]],
    figsize=(5, 5),
)
plt.contour(a_1, a_2, g(a_grid), [0], colors="k", linewidths=3)
plt.contourf(a_1, a_2, g(a_grid), [0, 1], colors="gray", alpha=0.5)
plt.show()

def MMA(func, a_k, L_k, U_k):
    a_lin = a_k.clone().requires_grad_()
    grads = torch.autograd.grad(func(a_lin), a_lin)[0]
    pg = grads >= 0
    ng = grads < 0.0

    def approximation(a):
        p = torch.zeros_like(grads)
        p[pg] = (U_k[pg] - a_k[pg]) ** 2 * grads[pg]
        q = torch.zeros_like(grads)
        q[ng] = -((a_k[ng] - L_k[ng]) ** 2) * grads[ng]
        return (
            func(a_k)
            - torch.sum(p / (U_k - a_k) + q / (a_k - L_k))
            + torch.sum(p / (U_k - a) + q / (a - L_k))
        )

    return approximation

def test_function(x):
    return 5.0 / x + 2.0 * x


x = torch.linspace(0, 10, 100)[:, None]
x_0 = torch.tensor([1.0])
L_0 = torch.tensor([0.1])
U_0 = torch.tensor([8.0])
x_test = torch.linspace(L_0.item(), U_0.item(), 100)[:, None]
test_approximation = MMA(test_function, x_0, L_0, U_0)
plt.plot(x, test_function(x), label="f(x)")
plt.plot(x_test, [test_approximation(x_i) for x_i in x_test], label=f"MMA at x={x_0}")
plt.axvline(x_0, color="tab:orange", linestyle="--")
plt.axvline(L_0, color="black", linestyle="--")
plt.axvline(U_0, color="black", linestyle="--")
plt.legend()
plt.xlabel("x")
plt.ylabel("f(x)")
plt.xlim(0, 10)
plt.ylim(-5, 25)
plt.show()

# Define the initial values, lower bound, and upper bound of "a"
a_0 = torch.tensor([2.0, 1.0], requires_grad=True)
a_lower = torch.tensor([0.2, 0.2])
a_upper = torch.tensor([2.5, 2.5])

# Define the initial value, lower bound, and upper bound of "mu"
mu_0 = torch.tensor([10.0])
mu_lower = torch.tensor([1e-10])
mu_upper = None

# Save intermediate values
a = [a_0]
L = []
U = []

# Define factor s for shrinkage and growth of asymptotes
s = 0.7

for k in range(5):
    # Update asymptotes with heuristic procedure
    if k <= 1:
        L.append(a[k] - s * (a_upper - a_lower))
        U.append(a[k] + s * (a_upper - a_lower))
    else:
        L_k = torch.zeros_like(L[k - 1])
        U_k = torch.zeros_like(U[k - 1])
        # Shrink all oscillating asymptotes
        osci = (a[k] - a[k - 1]) * (a[k - 1] - a[k - 2]) < 0.0
        L_k[osci] = a[k][osci] - s * (a[k - 1][osci] - L[k - 1][osci])
        U_k[osci] = a[k][osci] + s * (U[k - 1][osci] - a[k - 1][osci])
        # Expand all non-oscillating asymptotes
        L_k[~osci] = a[k][~osci] - 1.0 / sqrt(s) * (a[k - 1][~osci] - L[k - 1][~osci])
        U_k[~osci] = a[k][~osci] + 1.0 / sqrt(s) * (U[k - 1][~osci] - a[k - 1][~osci])
        L.append(L_k)
        U.append(U_k)

    # Compute lower move limit in this step
    a_lower_k = torch.max(a_lower, 0.9 * L[k] + 0.1 * a[k])
    a_upper_k = torch.min(a_upper, 0.9 * U[k] + 0.1 * a[k])

    # Compute the current approximation function:
    g_tilde = MMA(g, a[k], L[k], U[k])

    # Define the Lagrangian
    def lagrangian(a, mu):
        return f(a) + mu * g_tilde(a)

    # Define a_star by minimizing the Lagrangian w. r. t. a
    def a_star(mu):
        return box_constrained_decent(lagrangian, a[k], a_lower_k, a_upper_k, mu=mu)

    # Define (-1 times) the dual function
    def dual_function(mu):
        return -lagrangian(a_star(mu), mu)

    # Compute the maximum of the dual function
    mu_k = box_constrained_decent(dual_function, mu_0, mu_lower, mu_upper, eta=10.0)

    # Compute the next a_k from mu_k and append it to a
    a.append(a_star(mu_k))

# Plotting domain
a_1 = torch.linspace(0.1, 3.0, 200)
a_2 = torch.linspace(0.1, 3.0, 200)
a_grid = torch.stack(torch.meshgrid(a_1, a_2, indexing="xy"), dim=2)

# Make a plot
plot_contours(
    a_grid,
    f(a_grid),
    paths={"MMA": a},
    box=[a_lower, a_upper],
    opti=[a[-1][0], a[-1][1]],
    figsize=(5, 5),
)
plt.contour(a_1, a_2, g(a_grid), [0], colors="k", linewidths=3)
plt.contourf(a_1, a_2, g(a_grid), [0, 1], colors="gray", alpha=0.5)
plt.show()

# Save intermediate values
a = [a_0]
L = []
U = []

# Define factor s for shrinkage and growth of asymptotes
s = 0.7

for k in range(5):
    # Update asymptotes with heuristic procedure
    if k <= 1:
        L.append(a[k] - s * (a_upper - a_lower))
        U.append(a[k] + s * (a_upper - a_lower))
    else:
        L_k = torch.zeros_like(L[k - 1])
        U_k = torch.zeros_like(U[k - 1])
        # Shrink oscillating asymptotes
        osci = (a[k] - a[k - 1]) * (a[k - 1] - a[k - 2]) < 0.0
        L_k[osci] = a[k][osci] - s * (a[k - 1][osci] - L[k - 1][osci])
        U_k[osci] = a[k][osci] + s * (U[k - 1][osci] - a[k - 1][osci])
        # Expand non-oscillating asymptotes
        L_k[~osci] = a[k][~osci] - 1.0 / sqrt(s) * (a[k - 1][~osci] - L[k - 1][~osci])
        U_k[~osci] = a[k][~osci] + 1.0 / sqrt(s) * (U[k - 1][~osci] - a[k - 1][~osci])
        L.append(L_k)
        U.append(U_k)

    # Compute lower move limit in this step
    a_lower_k = torch.max(a_lower, 0.9 * L[k] + 0.1 * a[k])
    a_upper_k = torch.min(a_upper, 0.9 * U[k] + 0.1 * a[k])

    # Compute the current approximation function:
    g_tilde = MMA(g, a[k], L[k], U[k])

    # Define the Lagrangian
    def lagrangian(a, mu):
        return f(a) + mu * g_tilde(a)

    # Define a_star by minimizing the Lagrangian w. r. t. a analytically
    a_lin = a[k].clone().requires_grad_()
    gradients = torch.autograd.grad(g(a_lin), a_lin)[0]

    def a_star(mu):
        a_hat = torch.zeros_like(gradients)
        pg = gradients >= 0
        ng = gradients < 0
        a_hat[pg] = a_lower[pg]
        a_hat[ng] = L[k][ng] + torch.sqrt(
            -mu * (a[k][ng] - L[k][ng]) ** 2 * gradients[ng]
        )

        return torch.clamp(a_hat, a_lower_k, a_upper_k)

    # Define (-1 times) the dual function
    def dual_function(mu):
        return -lagrangian(a_star(mu), mu)

    # Compute the maximum of the dual function
    mu_k = box_constrained_decent(dual_function, mu_0, mu_lower, mu_upper, eta=10.0)

    # Compute the next a_k from mu_k and append it to a
    a.append(a_star(mu_k))

# Make a plot
plot_contours(
    a_grid,
    f(a_grid),
    paths={"MMA analytical": a},
    box=[a_lower, a_upper],
    opti=[a[-1][0], a[-1][1]],
    figsize=(5, 5),
)
plt.contour(a_1, a_2, g(a_grid), [0], colors="k", linewidths=3)
plt.contourf(a_1, a_2, g(a_grid), [0, 1], colors="gray", alpha=0.5)
plt.show()

Exercise 04 - Local approximations¶

Task 1 - Defining the constrained optimization problem¶

Task 2 - CONLIN¶

Task 3 - MMA¶