"""
Low-level Python bindings to the HAPC C++ core.
These functions are 1:1 with the C++ kernels exposed by :mod:`hapc_core`
(``pchal_des``, ``ridge_call``, ``mkernel_call``, ``kernel_cross_call``,
``pcghal_call``, ``pcghal_classi_call``, ``fast_pchal_call``). They are the
Python counterparts of the R C-level entry points (``pchal_des``,
``ridge_call``, ``mkernel_call``, ``kernel_cross_call``, ``pcghal_call``,
``pcghal_classi_call``, ``fast_pchal_call``) and produce numerically identical
output for the same inputs.
For high-level model fitting use :func:`hapc.single.hapc` and
:func:`hapc.cv.cv_hapc` instead.
"""
from typing import NamedTuple
import numpy as np
from . import hapc_core
class DesignOutput(NamedTuple):
"""Output of :func:`design_hapc`.
Attributes
----------
H : np.ndarray, shape (n, B)
HAL design matrix (sparse 0/1 indicators) — only stored for small
problems; large problems may set this to an empty array.
U : np.ndarray, shape (n, npcs)
Left singular vectors (principal components) of ``H``.
d : np.ndarray, shape (npcs,)
Singular values of ``H``.
V : np.ndarray, shape (B, npcs)
Right singular vectors of ``H``.
"""
H: np.ndarray
U: np.ndarray
d: np.ndarray
V: np.ndarray
class OptimizerOutput(NamedTuple):
"""Output of :func:`pcghal` and :func:`pcghal_classification`.
Attributes
----------
alpha : np.ndarray, shape (npcs,)
Coefficients on the principal-component basis.
alphaiters : np.ndarray
History of α iterates (rows = iterations).
beta : np.ndarray
Coefficients on the original HAL basis (``β = V α``).
risk : float
Empirical risk at the final iterate.
iter : int
Number of iterations executed.
"""
alpha: np.ndarray
alphaiters: np.ndarray
beta: np.ndarray
risk: float
iter: int
def _C(x: np.ndarray, dtype=np.float64) -> np.ndarray:
"""Return a C-contiguous, ``dtype``-typed copy / view of ``x``."""
a = np.asarray(x, dtype=dtype)
return a if a.flags["C_CONTIGUOUS"] else np.ascontiguousarray(a)
[docs]
def design_hapc(X: np.ndarray, max_degree: int, npcs: int,
center: bool = True) -> DesignOutput:
"""Compute the PC-HAL design ingredients ``(H, U, d, V)``.
Counterpart to R ``design.hapc()``.
Parameters
----------
X : np.ndarray, shape (n, p)
Feature matrix.
max_degree : int
Maximum interaction order for the HAL basis.
npcs : int
Number of principal components to keep.
center : bool, default True
Whether to centre ``H`` before SVD.
Returns
-------
DesignOutput
Named tuple ``(H, U, d, V)``.
Examples
--------
>>> import numpy as np
>>> from hapc import design_hapc
>>> X = np.random.randn(50, 3)
>>> des = design_hapc(X, max_degree=2, npcs=20)
>>> des.U.shape
(50, 20)
"""
X = _C(X)
n = X.shape[0]
cap = n - 1 if center else n
npcs = max(1, min(int(npcs), cap))
out = hapc_core.pchal_des(X, int(max_degree), int(npcs), bool(center))
return DesignOutput(out.H, out.U, out.d, out.V)
[docs]
def ridge_regression(Y: np.ndarray, U: np.ndarray, D2: np.ndarray,
lambda_: float) -> np.ndarray:
"""Closed-form ridge in the rotated basis: ``α = (D² + λ)⁻¹ D Uᵀ Y``.
Counterpart to R ``ridge_call()``.
Parameters
----------
Y : np.ndarray, shape (n,)
Response.
U : np.ndarray, shape (n, k)
Left singular vectors.
D2 : np.ndarray, shape (k,)
Squared singular values.
lambda_ : float
Regularisation parameter.
Returns
-------
np.ndarray, shape (k,)
Ridge coefficients on the PC basis.
"""
return hapc_core.ridge_call(_C(Y), _C(U), _C(D2), float(lambda_))
[docs]
def fast_pchal(U: np.ndarray, D2: np.ndarray, Y: np.ndarray,
lambda_: float) -> np.ndarray:
"""Closed-form LASSO in the rotated basis (soft-thresholding).
Counterpart to R ``fast_pchal_call()``.
Parameters
----------
U : np.ndarray, shape (n, k)
Left singular vectors.
D2 : np.ndarray, shape (k,)
Squared singular values.
Y : np.ndarray, shape (n,)
Response.
lambda_ : float
Regularisation parameter.
Returns
-------
np.ndarray, shape (k,)
LASSO coefficients on the PC basis.
"""
return hapc_core.fast_pchal_call(_C(U), _C(D2), _C(Y), float(lambda_))
[docs]
def kernel_hapc(X: np.ndarray, max_degree: int = 1,
center: bool = True) -> np.ndarray:
"""HAL kernel matrix ``K = H Hᵀ`` (computed implicitly).
Counterpart to R ``kernel.hapc()``.
Parameters
----------
X : np.ndarray, shape (n, p)
Feature matrix.
max_degree : int, default 1
Maximum interaction order.
center : bool, default True
Whether to centre ``H``.
Returns
-------
np.ndarray, shape (n, n)
Kernel matrix.
"""
return hapc_core.mkernel_call(_C(X), int(max_degree), bool(center))
[docs]
def cross_kernel_hapc(X: np.ndarray, Xte: np.ndarray, max_degree: int = 1,
center: bool = True) -> np.ndarray:
"""Cross-kernel ``K = H_te H_trᵀ`` for prediction.
Counterpart to R ``cross_kernel.hapc()``.
Parameters
----------
X : np.ndarray, shape (n_tr, p)
Training features (defines the basis).
Xte : np.ndarray, shape (n_te, p)
Test features.
max_degree : int, default 1
Maximum interaction order.
center : bool, default True
Whether to apply the same column centring as in training.
Returns
-------
np.ndarray, shape (n_te, n_tr)
Cross-kernel matrix.
"""
return hapc_core.kernel_cross_call(_C(X), _C(Xte), int(max_degree),
bool(center))
def pcghal(Y: np.ndarray, Xtilde: np.ndarray, ENn: np.ndarray,
alpha0: np.ndarray, max_iter: int = 5000, tol: float = 1e-3,
step_factor: float = 0.8, verbose: bool = False,
crit: str = "grad") -> OptimizerOutput:
"""Projected gradient descent for ``norm="sv"``, gaussian loss.
Counterpart to R ``pcghal_call()`` (called by ``hapc(norm="sv")``).
Parameters
----------
Y : np.ndarray, shape (n,)
Centred response.
Xtilde : np.ndarray, shape (n, k)
``U @ diag(d)``.
ENn : np.ndarray, shape (B, k)
Right singular vectors ``V``.
alpha0 : np.ndarray, shape (k,)
Initial α (e.g. from :func:`fast_pchal` or :func:`ridge_regression`).
max_iter : int, default 5000
Maximum number of PGD iterations.
tol : float, default 1e-3
Stopping tolerance.
step_factor : float, default 0.8
Step-size factor ``δ = step_factor / max_j |h*(j)|``.
verbose : bool, default False
Print iteration progress.
crit : str, default "grad"
Stopping criterion: ``"grad"`` (gradient inf-norm) or ``"risk"``
(relative risk decrease).
Returns
-------
OptimizerOutput
"""
res = hapc_core.pcghal_call(_C(Y), _C(Xtilde), _C(ENn), _C(alpha0),
int(max_iter), float(tol),
float(step_factor), bool(verbose), str(crit))
return OptimizerOutput(res.alpha, res.alphaiters, res.beta,
res.risk, res.iter)
def pcghal_classification(Y: np.ndarray, Xtilde: np.ndarray, ENn: np.ndarray,
alpha0: np.ndarray, max_iter: int = 5000,
tol: float = 1e-3, step_factor: float = 0.8,
verbose: bool = False) -> OptimizerOutput:
"""Projected gradient descent for ``norm="sv"``, logistic loss.
Counterpart to R ``pcghal_classi_call()`` /
``pc_hal_classi_cpp()``.
Parameters
----------
Y : np.ndarray, shape (n,)
Response in {-1, +1}.
Xtilde : np.ndarray, shape (n, k)
``U @ diag(d)``.
ENn : np.ndarray, shape (B, k)
Right singular vectors ``V``.
alpha0 : np.ndarray, shape (k,)
Initial α (typically logistic ridge).
max_iter, tol, step_factor, verbose
See :func:`pcghal`.
Returns
-------
OptimizerOutput
"""
res = hapc_core.pcghal_classi_call(_C(Y), _C(Xtilde), _C(ENn), _C(alpha0),
int(max_iter), float(tol),
float(step_factor), bool(verbose))
return OptimizerOutput(res.alpha, res.alphaiters, res.beta,
res.risk, res.iter)
def pc_hal_classi(Y: np.ndarray, Xtilde: np.ndarray, ENn: np.ndarray,
alpha: np.ndarray, **kwargs) -> OptimizerOutput:
"""Alias of :func:`pcghal_classification` (R name ``pc_hal_classi_cpp``).
Parameters match ``pcghal_classification`` with ``alpha`` as the initial
coefficient vector (same role as ``alpha0`` in :func:`pcghal_classification`).
"""
return pcghal_classification(Y, Xtilde, ENn, alpha, **kwargs)
# --- Backward-compatible aliases (pre-0.3.0 names) -------------------------
pchal_design = design_hapc
mkernel = kernel_hapc
kernel_cross = cross_kernel_hapc
__all__ = [
"DesignOutput",
"OptimizerOutput",
"design_hapc",
"kernel_hapc",
"cross_kernel_hapc",
"ridge_regression",
"fast_pchal",
"pcghal",
"pcghal_classification",
"pc_hal_classi",
# aliases
"pchal_design",
"mkernel",
"kernel_cross",
]