Source code for moments.LD.Inference

import numpy as np
import math
import os, sys

from moments.LD import Numerics
from moments.LD import Util

import copy
import moments

# from moments.Misc import delayed_flush
from .. import Misc
from moments.LD.LDstats_mod import LDstats

from scipy.special import gammaln
import scipy.optimize

_counter = 0


[docs]def sigmaD2(y, normalization=0):
    """
    Compute the :math:`\\sigma_D^2` statistics normalizing by the heterozygosities
    in a given population.

    :param y: The input data.
    :type y: :class:`LDstats` object
    :param normalization: The index of the normalizing population
        (normalized by pi2_i_i_i_i and H_i_i), default set to 0.
    :type normalization: int, optional
    """
    if normalization >= y.num_pops or normalization < 0:
        raise ValueError("Normalization index must be for a present population")

    out = LDstats(copy.deepcopy(y[:]), num_pops=y.num_pops, pop_ids=y.pop_ids)

    for i in range(len(y))[:-1]:
        out[i] /= y[i][y.names()[0].index("pi2_{0}_{0}_{0}_{0}".format(normalization))]
    out[-1] /= y[-1][y.names()[1].index("H_{0}_{0}".format(normalization))]

    return out


[docs]def bin_stats(model_func, params, rho=[], theta=0.001, spread=None, kwargs={}):
    """
    Computes LD statist for a given model function over bins defined by ``rho``.
    Here, ``rho`` gives the bin edges, and we assume no spaces between bins. That
    is, if the length of the input recombination rates is :math:`l`, the number of
    bins is :math:`l-1`.

    :param model_func: The model function that takes parameters in the form
        ``model_func(params, rho=rho, theta=theta, **kwargs)``.
    :param params: The parameters to evaluate the model at.
    :type params: list of floats
    :param rho: The scaled recombination rate bin edges.
    :type rho: list of floats
    :param theta: The mutation rate
    :type theta: float, optional
    :param spread: A list of length rho-1 (number of bins), where each entry is an
        array of length rho+1 (number of bins plus amount outside bin range to each
        side). Each array must sum to one.
    :type spread: list of arrays
    :param kwargs: Extra keyword arguments to pass to ``model_func``.
    """
    if len(rho) < 2:
        raise ValueError(
            "number of recombination rates (bin edges) must be greater than one"
        )
    rho_mids = (np.array(rho[:-1]) + np.array(rho[1:])) / 2
    y_edges = model_func(params, rho=rho, theta=theta, **kwargs)
    y_mids = model_func(params, rho=rho_mids, theta=theta, **kwargs)
    y = [
        1.0 / 6 * (y_edges[i] + y_edges[i + 1] + 4 * y_mids[i])
        for i in range(len(rho_mids))
    ]
    if spread is None:
        y.append(y_edges[-1])
        return LDstats(y, num_pops=y_edges.num_pops, pop_ids=y_edges.pop_ids)
    else:
        if len(spread) != len(rho) - 1:
            raise ValueError("spread must be length of bins")
        y_spread = []
        for distr in spread:
            if len(distr) != len(rho) + 1:
                raise ValueError(
                    "spread distr is not the correct length (len(bins) + 2)"
                )
            if not np.isclose(np.sum(distr), 1):
                raise ValueError("spread distributions must sum to one")
            y_spread.append(
                (distr[0] * y_edges[0] + distr[1:-1].dot(y) + distr[-1] * y_edges[-2])
            )
        y_spread.append(y_edges[-1])
        return LDstats(y_spread, num_pops=y_edges.num_pops, pop_ids=y_edges.pop_ids)


[docs]def remove_normalized_lds(y, normalization=0):
    """
    Returns LD statistics with the normalizing statistic removed.

    :param y: An LDstats object that has been normalized to get
        :math:`\\sigma_D^2`-formatted statistics.
    :type y: :class:`LDstats` object
    :param normalization: The index of the normalizing population.
    :type normalization: int
    """
    to_delete_ld = y.names()[0].index("pi2_{0}_{0}_{0}_{0}".format(normalization))
    to_delete_h = y.names()[1].index("H_{0}_{0}".format(normalization))
    for i in range(len(y) - 1):
        y[i] = np.delete(y[i], to_delete_ld)
    y[-1] = np.delete(y[-1], to_delete_h)
    return y


[docs]def remove_normalized_data(
    means, varcovs, normalization=0, num_pops=1, statistics=None
):
    """
    Returns data means and covariance matrices with the normalizing
    statistics removed.

    :param means: List of means normalized statistics, where each entry is the
        full set of statistics for a given recombination distance.
    :type means: list of arrays
    :param varcovs: List of the corresponding variance covariance matrices.
    :type varcovs: list of arrays
    :param normalization: The index of the normalizing population.
    :type normalization: int
    :param num_pops: The number of populations in the data set.
    :type num_pops: int
    """
    if len(means) != len(varcovs):
        raise ValueError("Different lengths of means and covariances")
    if statistics is None:
        stats = Util.moment_names(num_pops)
    else:
        stats = copy.copy(statistics)
    to_delete_ld = stats[0].index("pi2_{0}_{0}_{0}_{0}".format(normalization))
    to_delete_h = stats[1].index("H_{0}_{0}".format(normalization))
    ms = []
    vcs = []
    for i in range(len(means) - 1):
        if (
            len(means[i]) != len(stats[0])
            or varcovs[i].shape[0] != len(stats[0])
            or varcovs[i].shape[1] != len(stats[0])
        ):
            raise ValueError(
                "Data and statistics mismatch. Some statistics are missing "
                "or the incorrect number of populations was given."
            )
        ms.append(np.delete(means[i], to_delete_ld))
        vcs.append(
            np.delete(np.delete(varcovs[i], to_delete_ld, axis=0), to_delete_ld, axis=1)
        )
    ms.append(np.delete(means[-1], to_delete_h))
    vcs.append(
        np.delete(np.delete(varcovs[-1], to_delete_h, axis=0), to_delete_h, axis=1)
    )
    if statistics is None:
        return ms, vcs
    else:
        stats[0].pop(to_delete_ld)
        stats[1].pop(to_delete_h)
        return ms, vcs, stats


[docs]def remove_nonpresent_statistics(y, statistics=[[], []]):
    """
    Removes data not found in the given set of statistics.

    :param y: LD statistics.
    :type y: :class:`LDstats` object.
    :param statistics: A list of lists for two and one locus statistics to keep.
    """
    to_delete = [[], []]
    for j in range(2):
        for i, s in enumerate(y.names()[j]):
            if s not in statistics[j]:
                to_delete[j].append(i)
    for i in range(len(y) - 1):
        y[i] = np.delete(y[i], to_delete[0])
    y[-1] = np.delete(y[-1], to_delete[1])
    return y


def _multivariate_normal_pdf(x, mu, Sigma):
    p = len(x)
    return np.sqrt(np.linalg.det(Sigma) / (2 * math.pi) ** p) * np.exp(
        -1.0 / 2 * np.dot(np.dot((x - mu).transpose(), np.linalg.inv(Sigma)), x - mu)
    )


def _ll(x, mu, Sigma_inv):
    """
    x = data
    mu = model function output
    Sigma_inv = inverse of the variance-covariance matrix
    """
    if len(x) == 0:
        return 0
    else:
        return -1.0 / 2 * np.dot(np.dot((x - mu).transpose(), Sigma_inv), x - mu)
        # - len(x)*np.pi - 1./2*np.log(np.linalg.det(Sigma))


_varcov_inv_cache = {}


[docs]def ll_over_bins(xs, mus, Sigmas):
    """
    Compute the composite log-likelihood over LD and heterozygosity statistics, given
    data and expectations. Inputs must be in the same order, and we assume each bin
    is independent, so we sum _ll(x, mu, Sigma) over each bin.

    :param xs: A list of data arrays.
    :param mus: A list of model function output arrays, same length as ``xs``.
    :param Sigmas: A list of var-cov matrices, same length as ``xs``.
    """
    it = iter([xs, mus, Sigmas])
    the_len = len(next(it))
    if not all(len(l) == the_len for l in it):
        raise ValueError(
            "Lists of data, means, and varcov matrices must be the same length"
        )
    ll_vals = []
    for ii in range(len(xs)):
        # get var-cov inverse from cache dictionary, or compute it
        if ii in _varcov_inv_cache:
            if np.all(_varcov_inv_cache[ii]["data"] == Sigmas[ii]):
                Sigma_inv = _varcov_inv_cache[ii]["inv"]
            else:
                _varcov_inv_cache[ii]["data"] = Sigmas[ii]
                Sigma_inv = np.linalg.inv(Sigmas[ii])
                _varcov_inv_cache[ii]["inv"] = Sigma_inv
        else:
            _varcov_inv_cache[ii] = {}
            _varcov_inv_cache[ii]["data"] = Sigmas[ii]
            Sigma_inv = np.linalg.inv(Sigmas[ii])
            _varcov_inv_cache[ii]["inv"] = Sigma_inv
        # append log-likelihood for this bin
        ll_vals.append(_ll(xs[ii], mus[ii], Sigma_inv))
    # sum over bins to get composite log-likelihood
    ll_val = np.sum(ll_vals)
    return ll_val


_out_of_bounds_val = -1e12


def _object_func(
    params,
    model_func,
    means,
    varcovs,
    fs=None,
    rs=None,
    theta=None,
    u=None,
    Ne=None,
    lower_bound=None,
    upper_bound=None,
    verbose=0,
    flush_delay=0,
    normalization=0,
    func_args=[],
    func_kwargs={},
    fixed_params=None,
    use_afs=False,
    Leff=None,
    multinom=True,
    ns=None,
    statistics=None,
    pass_Ne=False,
    spread=None,
    output_stream=sys.stdout,
):
    global _counter
    _counter += 1

    # Deal with fixed parameters
    params_up = _project_params_up(params, fixed_params)

    # Check our parameter bounds
    if lower_bound is not None:
        for pval, bound in zip(params_up, lower_bound):
            if bound is not None and pval < bound:
                return -_out_of_bounds_val
    if upper_bound is not None:
        for pval, bound in zip(params_up, upper_bound):
            if bound is not None and pval > bound:
                return -_out_of_bounds_val

    all_args = [params_up] + list(func_args)

    if theta is None:
        if Ne is None:
            Ne = params_up[-1]
            theta = 4 * Ne * u
            rhos = [4 * Ne * r for r in rs]
            if pass_Ne == False:
                all_args = [all_args[0][:-1]]
            else:
                all_args = [all_args[0][:]]
        else:
            theta = 4 * Ne * u
            rhos = [4 * Ne * r for r in rs]
    else:
        if Ne is not None:
            rhos = [4 * Ne * r for r in rs]

    ## first get ll of afs
    if use_afs == True:
        if Leff is None:
            model = theta * model_func[1](all_args[0], ns)
        else:
            model = Leff * theta * model_func[1](all_args[0], ns)
        if fs.folded:
            model = model.fold()
        if multinom == True:
            ll_afs = moments.Inference.ll_multinom(model, fs)
        else:
            ll_afs = moments.Inference.ll(model, fs)

    ## next get ll for LD stats
    func_kwargs = {"theta": theta, "rho": rhos, "spread": spread}
    stats = bin_stats(model_func[0], *all_args, **func_kwargs)
    stats = sigmaD2(stats, normalization=normalization)
    if statistics == None:
        stats = remove_normalized_lds(stats, normalization=normalization)
    else:
        stats = remove_nonpresent_statistics(stats, statistics=statistics)
    simp_stats = stats[:-1]
    het_stats = stats[-1]

    if use_afs == False:
        simp_stats.append(het_stats)

    ## resulting ll from afs (if used) plus ll from rho bins
    if use_afs == True:
        result = ll_afs + ll_over_bins(means, simp_stats, varcovs)
    else:
        result = ll_over_bins(means, simp_stats, varcovs)

    # Bad result
    if np.isnan(result):
        print("got bad results...")
        result = _out_of_bounds_val

    if (verbose > 0) and (_counter % verbose == 0):
        param_str = "array([%s])" % (", ".join(["%- 12g" % v for v in params_up]))
        output_stream.write(
            "%-8i, %-12g, %s%s" % (_counter, result, param_str, os.linesep)
        )
        Misc.delayed_flush(delay=flush_delay)

    return -result


def _object_func_log(log_params, *args, **kwargs):
    return _object_func(np.exp(log_params), *args, **kwargs)


[docs]def optimize_log_fmin(
    p0,
    data,
    model_func,
    rs=None,
    theta=None,
    u=2e-8,
    Ne=None,
    lower_bound=None,
    upper_bound=None,
    verbose=0,
    flush_delay=0.5,
    normalization=0,
    func_args=[],
    func_kwargs={},
    fixed_params=None,
    use_afs=False,
    Leff=None,
    multinom=False,
    ns=None,
    statistics=None,
    pass_Ne=False,
    spread=None,
    maxiter=None,
    maxfun=None,
):
    """
    Optimize (using the log of) the parameters using a downhill simplex
    algorithm. Initial parameters ``p0``, the data ``[means, varcovs]``,
    the demographic ``model_func``, and ``rs`` to specify recombination
    bin edges are required. ``Ne`` must either be specified as a keyword
    argument or is included as the *last* parameter in ``p0``.

    :param p0: The initial guess for demographic parameters,
        demography parameters plus (optionally) Ne.
    :type p0: list
    :param data: The parsed data[means, varcovs, fs]. The frequency spectrum
        fs is optional, and used only if use_afs=True.

        - Means: The list of mean statistics within each bin
          (has length ``len(rs)`` or ``len(rs) - 1`` if using AFS). If we are
          not using the AFS, which is typical, the heterozygosity statistics
          come last.
        - varcovs: The list of varcov matrices matching the data in ``means``.

    :type data: list
    :param model_func: The demographic model to compute statistics
        for a given rho. If we are using AFS, it's a list of the two models
        [LD func, AFS func]. If we're using LD stats alone, we pass a single LD
        model  as a list: [LD func].
    :type model_func: list
    :param rs: The list of raw recombination rates defining bin edges.
    :type rs: list
    :param theta: The population scaled per base mutation rate
        (4*Ne*mu, not 4*Ne*mu*L).
    :type theta: float, optional
    :param u: The raw per base mutation rate.
        Cannot be used with ``theta``.
    :type u: float, optional
    :param Ne: The fixed effective population size to scale
        u and r. If ``Ne`` is a parameter to fit, it should be the last parameter
        in ``p0``.
    :type Ne: float, optional
    :param lower_bound: Defaults to ``None``. Constraints on the
        lower bounds during optimization. These are given as lists of the same
        length of the parameters.
    :type lower_bound: list, optional
    :param upper_bound: Defaults to ``None``. Constraints on the
        upper bounds during optimization. These are given as lists of the same
        length of the parameters.
    :type upper_bound: list, optional
    :param verbose: If an integer greater than 0, prints updates
        of the optimization procedure at intervals given by that spacing.
    :type verbose: int, optional
    :param func_args: Additional arguments to be passed
        to ``model_func``.
    :type func_args: list, optional
    :param func_kwargs: Additional keyword arguments to be
        passed to ``model_func``.
    :type func_kwargs: dict, optional
    :param fixed_params: Defaults to ``None``. To fix some
        parameters, this should be a list of equal length as ``p0``, with
        ``None`` for parameters to be fit and fixed values at corresponding
        indexes.
    :type fixed_params: list, optional
    :param use_afs: Defaults to ``False``. We can pass a model
        to compute the frequency spectrum and use
        that instead of heterozygosity statistics for single-locus data.
    :type use_afs: bool, optional
    :param Leff: The effective length of genome from which
        the fs was generated (only used if fitting to afs).
    :type Leff: float, optional
    :param multinom: Only used if we are fitting the AFS.
        If ``True``, the likelihood is computed for an optimally rescaled FS.
        If ``False``, the likelihood is computed for a fixed scaling of the FS
        found by theta=4*Ne*u and Leff
    :type multinom: bool, optional
    :param ns: The sample size, which is only needed
        if we are using the frequency spectrum, as the sample size does not
        affect mean LD statistics.
    :type ns: list of ints, optional
    :param statistics: Defaults to ``None``, which assumes that
        all statistics are present and in the conventional default order. If
        the data is missing some statistics, we must specify which statistics
        are present using the subset of statistic names given by
        ``moments.LD.Util.moment_names(num_pops)``.
    :type statistics: list, optional
    :param pass_Ne: Defaults to ``False``. If ``True``, the
        demographic model includes ``Ne`` as a parameter (in the final position
        of input parameters).
    :type pass_Ne: bool, optional
    :param maxiter: Defaults to None. Maximum number of iterations to perform.
    :type maxiter: int
    :param maxfun: Defaults to None. Maximum number of function evaluations to make.
    :type maxfun: int
    """
    output_stream = sys.stdout

    means = data[0]
    varcovs = data[1]
    if use_afs is True:
        try:
            fs = data[2]
        except IndexError:
            raise ValueError(
                "if use_afs=True, need to pass frequency spectrum, "
                "as data=[means,varcovs,fs]"
            )

        if ns is None:
            raise ValueError("need to set ns if we are fitting frequency spectrum")

    else:
        fs = None

    if rs is None:
        raise ValueError("need to pass rs as bin edges")

    # get num_pops
    if Ne is None:
        if not pass_Ne:
            y = model_func[0](p0[:-1])
        else:
            y = model_func[0](p0[:])
    else:
        y = model_func[0](p0)
    num_pops = y.num_pops

    # remove normalized statisticsd
    ms = copy.copy(means)
    vcs = copy.copy(varcovs)
    if statistics is None:
        # if statistics is not None, assume we already filtered out the data
        ms, vcs = remove_normalized_data(
            ms, vcs, normalization=normalization, num_pops=num_pops
        )

    args = (
        model_func,
        ms,
        vcs,
        fs,
        rs,
        theta,
        u,
        Ne,
        lower_bound,
        upper_bound,
        verbose,
        flush_delay,
        normalization,
        func_args,
        func_kwargs,
        fixed_params,
        use_afs,
        Leff,
        multinom,
        ns,
        statistics,
        pass_Ne,
        spread,
        output_stream,
    )

    p0 = _project_params_down(p0, fixed_params)
    outputs = scipy.optimize.fmin(
        _object_func_log,
        np.log(p0),
        args=args,
        full_output=True,
        disp=False,
        maxiter=maxiter,
        maxfun=maxfun,
    )

    xopt, fopt, iter, funcalls, warnflag = outputs
    xopt = _project_params_up(np.exp(xopt), fixed_params)

    return xopt, fopt


[docs]def optimize_log_powell(
    p0,
    data,
    model_func,
    rs=None,
    theta=None,
    u=2e-8,
    Ne=None,
    lower_bound=None,
    upper_bound=None,
    verbose=0,
    flush_delay=0.5,
    normalization=0,
    func_args=[],
    func_kwargs={},
    fixed_params=None,
    use_afs=False,
    Leff=None,
    multinom=False,
    ns=None,
    statistics=None,
    pass_Ne=False,
    spread=None,
    maxiter=None,
    maxfun=None,
):
    """
    Optimize (using the log of) the parameters using the modified Powell's
    method, which optimizes slices of parameter space sequentially. Initial
    parameters ``p0``, the data ``[means, varcovs]``,
    the demographic ``model_func``, and ``rs`` to specify recombination
    bin edges are required. ``Ne`` must either be specified as a keyword
    argument or is included as the *last* parameter in ``p0``.

    :param p0: The initial guess for demographic parameters,
        demography parameters plus (optionally) Ne.
    :type p0: list
    :param data: The parsed data[means, varcovs, fs]. The frequency spectrum
        fs is optional, and used only if use_afs=True.

        - Means: The list of mean statistics within each bin
          (has length ``len(rs)`` or ``len(rs) - 1`` if using AFS). If we are
          not using the AFS, which is typical, the heterozygosity statistics
          come last.
        - varcovs: The list of varcov matrices matching the data in ``means``.

    :type data: list
    :param model_func: The demographic model to compute statistics
        for a given rho. If we are using AFS, it's a list of the two models
        [LD func, AFS func]. If we're using LD stats alone, we pass a single LD
        model  as a list: [LD func].
    :type model_func: list
    :param rs: The list of raw recombination rates defining bin edges.
    :type rs: list
    :param theta: The population scaled per base mutation rate
        (4*Ne*mu, not 4*Ne*mu*L).
    :type theta: float, optional
    :param u: The raw per base mutation rate.
        Cannot be used with ``theta``.
    :type u: float, optional
    :param Ne: The fixed effective population size to scale
        u and r. If ``Ne`` is a parameter to fit, it should be the last parameter
        in ``p0``.
    :type Ne: float, optional
    :param lower_bound: Defaults to ``None``. Constraints on the
        lower bounds during optimization. These are given as lists of the same
        length of the parameters.
    :type lower_bound: list, optional
    :param upper_bound: Defaults to ``None``. Constraints on the
        upper bounds during optimization. These are given as lists of the same
        length of the parameters.
    :type upper_bound: list, optional
    :param verbose: If an integer greater than 0, prints updates
        of the optimization procedure at intervals given by that spacing.
    :type verbose: int, optional
    :param func_args: Additional arguments to be passed
        to ``model_func``.
    :type func_args: list, optional
    :param func_kwargs: Additional keyword arguments to be
        passed to ``model_func``.
    :type func_kwargs: dict, optional
    :param fixed_params: Defaults to ``None``. To fix some
        parameters, this should be a list of equal length as ``p0``, with
        ``None`` for parameters to be fit and fixed values at corresponding
        indexes.
    :type fixed_params: list, optional
    :param use_afs: Defaults to ``False``. We can pass a model
        to compute the frequency spectrum and use
        that instead of heterozygosity statistics for single-locus data.
    :type use_afs: bool, optional
    :param Leff: The effective length of genome from which
        the fs was generated (only used if fitting to afs).
    :type Leff: float, optional
    :param multinom: Only used if we are fitting the AFS.
        If ``True``, the likelihood is computed for an optimally rescaled FS.
        If ``False``, the likelihood is computed for a fixed scaling of the FS
        found by theta=4*Ne*u and Leff
    :type multinom: bool, optional
    :param ns: The sample size, which is only needed
        if we are using the frequency spectrum, as the sample size does not
        affect mean LD statistics.
    :type ns: list of ints, optional
    :param statistics: Defaults to ``None``, which assumes that
        all statistics are present and in the conventional default order. If
        the data is missing some statistics, we must specify which statistics
        are present using the subset of statistic names given by
        ``moments.LD.Util.moment_names(num_pops)``.
    :type statistics: list, optional
    :param pass_Ne: Defaults to ``False``. If ``True``, the
        demographic model includes ``Ne`` as a parameter (in the final position
        of input parameters).
    :type pass_Ne: bool, optional
    :param maxiter: Defaults to None. Maximum number of iterations to perform.
    :type maxiter: int
    :param maxfun: Defaults to None. Maximum number of function evaluations to make.
    :type maxfun: int
    """
    output_stream = sys.stdout

    means = data[0]
    varcovs = data[1]
    if use_afs:
        try:
            fs = data[2]
        except IndexError:
            raise ValueError(
                "if use_afs=True, need to pass frequency spectrum in data=[means,varcovs,fs]"
            )

        if ns is None:
            raise ValueError("need to set ns if we are fitting frequency spectrum")

    else:
        fs = None

    if rs is None:
        raise ValueError("need to pass rs as bin edges")

    # get num_pops
    if Ne is None:
        if not pass_Ne:
            y = model_func[0](p0[:-1])
        else:
            y = model_func[0](p0[:])
    else:
        y = model_func[0](p0)
    num_pops = y.num_pops

    # remove normalized statistics
    ms = copy.copy(means)
    vcs = copy.copy(varcovs)
    if statistics is None:
        # if statistics is not None, assume we already filtered out the data
        ms, vcs = remove_normalized_data(
            ms, vcs, normalization=normalization, num_pops=num_pops
        )

    args = (
        model_func,
        ms,
        vcs,
        fs,
        rs,
        theta,
        u,
        Ne,
        lower_bound,
        upper_bound,
        verbose,
        flush_delay,
        normalization,
        func_args,
        func_kwargs,
        fixed_params,
        use_afs,
        Leff,
        multinom,
        ns,
        statistics,
        pass_Ne,
        spread,
        output_stream,
    )

    p0 = _project_params_down(p0, fixed_params)
    outputs = scipy.optimize.fmin_powell(
        _object_func_log,
        np.log(p0),
        args=args,
        full_output=True,
        disp=False,
        maxiter=maxiter,
        maxfun=maxfun,
    )

    xopt, fopt, direc, iter, funcalls, warnflag = outputs
    xopt = _project_params_up(np.exp(xopt), fixed_params)

    return xopt, fopt


[docs]def optimize_log_lbfgsb(
    p0,
    data,
    model_func,
    rs=None,
    theta=None,
    u=2e-8,
    Ne=None,
    lower_bound=None,
    upper_bound=None,
    verbose=0,
    flush_delay=0.5,
    normalization=0,
    func_args=[],
    func_kwargs={},
    fixed_params=None,
    use_afs=False,
    Leff=None,
    multinom=False,
    ns=None,
    statistics=None,
    pass_Ne=False,
    spread=None,
    maxiter=40000,
    epsilon=1e-3,
    pgtol=1e-5,
):
    """
    Optimize (using the log of) the parameters using the modified Powell's
    method, which optimizes slices of parameter space sequentially. Initial
    parameters ``p0``, the data ``[means, varcovs]``,
    the demographic ``model_func``, and ``rs`` to specify recombination
    bin edges are required. ``Ne`` must either be specified as a keyword
    argument or is included as the *last* parameter in ``p0``.

    It is best at burrowing down a single minimum. This method is
    better than optimize_log if the optimum lies at one or more of the
    parameter bounds. However, if your optimum is not on the bounds, this
    method may be much slower.

    Because this works in log(params), it cannot explore values of params < 0.
    It should also perform better when parameters range over scales.

    The L-BFGS-B method was developed by Ciyou Zhu, Richard Byrd, and Jorge
    Nocedal. The algorithm is described in:

    - R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
      Constrained Optimization, (1995), SIAM Journal on Scientific and
      Statistical Computing , 16, 5, pp. 1190-1208.
    - C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
      FORTRAN routines for large scale bound constrained optimization (1997),
      ACM Transactions on Mathematical Software, Vol 23, Num. 4, pp. 550-560.

    :param p0: The initial guess for demographic parameters,
        demography parameters plus (optionally) Ne.
    :type p0: list
    :param data: The parsed data[means, varcovs, fs]. The frequency spectrum
        fs is optional, and used only if use_afs=True.

        - Means: The list of mean statistics within each bin
          (has length ``len(rs)`` or ``len(rs) - 1`` if using AFS). If we are
          not using the AFS, which is typical, the heterozygosity statistics
          come last.
        - varcovs: The list of varcov matrices matching the data in ``means``.

    :type data: list
    :param model_func: The demographic model to compute statistics
        for a given rho. If we are using AFS, it's a list of the two models
        [LD func, AFS func]. If we're using LD stats alone, we pass a single LD
        model  as a list: [LD func].
    :type model_func: list
    :param rs: The list of raw recombination rates defining bin edges.
    :type rs: list
    :param theta: The population scaled per base mutation rate
        (4*Ne*mu, not 4*Ne*mu*L).
    :type theta: float, optional
    :param u: The raw per base mutation rate.
        Cannot be used with ``theta``.
    :type u: float, optional
    :param Ne: The fixed effective population size to scale
        u and r. If ``Ne`` is a parameter to fit, it should be the last parameter
        in ``p0``.
    :type Ne: float, optional
    :param lower_bound: Defaults to ``None``. Constraints on the
        lower bounds during optimization. These are given as lists of the same
        length of the parameters.
    :type lower_bound: list, optional
    :param upper_bound: Defaults to ``None``. Constraints on the
        upper bounds during optimization. These are given as lists of the same
        length of the parameters.
    :type upper_bound: list, optional
    :param verbose: If an integer greater than 0, prints updates
        of the optimization procedure at intervals given by that spacing.
    :type verbose: int, optional
    :param func_args: Additional arguments to be passed
        to ``model_func``.
    :type func_args: list, optional
    :param func_kwargs: Additional keyword arguments to be
        passed to ``model_func``.
    :type func_kwargs: dict, optional
    :param fixed_params: Defaults to ``None``. To fix some
        parameters, this should be a list of equal length as ``p0``, with
        ``None`` for parameters to be fit and fixed values at corresponding
        indexes.
    :type fixed_params: list, optional
    :param use_afs: Defaults to ``False``. We can pass a model
        to compute the frequency spectrum and use
        that instead of heterozygosity statistics for single-locus data.
    :type use_afs: bool, optional
    :param Leff: The effective length of genome from which
        the fs was generated (only used if fitting to afs).
    :type Leff: float, optional
    :param multinom: Only used if we are fitting the AFS.
        If ``True``, the likelihood is computed for an optimally rescaled FS.
        If ``False``, the likelihood is computed for a fixed scaling of the FS
        found by theta=4*Ne*u and Leff
    :type multinom: bool, optional
    :param ns: The sample size, which is only needed
        if we are using the frequency spectrum, as the sample size does not
        affect mean LD statistics.
    :type ns: list of ints, optional
    :param statistics: Defaults to ``None``, which assumes that
        all statistics are present and in the conventional default order. If
        the data is missing some statistics, we must specify which statistics
        are present using the subset of statistic names given by
        ``moments.LD.Util.moment_names(num_pops)``.
    :type statistics: list, optional
    :param pass_Ne: Defaults to ``False``. If ``True``, the
        demographic model includes ``Ne`` as a parameter (in the final position
        of input parameters).
    :type pass_Ne: bool, optional
    :param maxiter: Defaults to 40,000. Maximum number of iterations to perform.
    :type maxiter: int
    :param epsilon: Step-size to use for finite-difference derivatives.
    :type pgtol: float
    :param pgtol: Convergence criterion for optimization. For more info,
        see help(scipy.optimize.fmin_l_bfgs_b)
    :type pgtol: float
    """
    output_stream = sys.stdout

    means = data[0]
    varcovs = data[1]
    if use_afs:
        try:
            fs = data[2]
        except IndexError:
            raise ValueError(
                "if use_afs=True, need to pass frequency spectrum in data=[means,varcovs,fs]"
            )

        if ns is None:
            raise ValueError("need to set ns if we are fitting frequency spectrum")

    else:
        fs = None

    if rs is None:
        raise ValueError("need to pass rs as bin edges")

    # get num_pops
    if Ne is None:
        if not pass_Ne:
            y = model_func[0](p0[:-1])
        else:
            y = model_func[0](p0[:])
    else:
        y = model_func[0](p0)
    num_pops = y.num_pops

    # remove normalized statistics
    ms = copy.copy(means)
    vcs = copy.copy(varcovs)
    if statistics is None:
        # if statistics is not None, assume we already filtered out the data
        ms, vcs = remove_normalized_data(
            ms, vcs, normalization=normalization, num_pops=num_pops
        )

    args = (
        model_func,
        ms,
        vcs,
        fs,
        rs,
        theta,
        u,
        Ne,
        lower_bound,
        upper_bound,
        verbose,
        flush_delay,
        normalization,
        func_args,
        func_kwargs,
        fixed_params,
        use_afs,
        Leff,
        multinom,
        ns,
        statistics,
        pass_Ne,
        spread,
        output_stream,
    )

    # Make bounds list. For this method it needs to be in terms of log params.
    if lower_bound is None:
        lower_bound = [None] * len(p0)
    else:
        lower_bound = np.log(lower_bound)
        lower_bound[np.isnan(lower_bound)] = None
    lower_bound = _project_params_down(lower_bound, fixed_params)
    if upper_bound is None:
        upper_bound = [None] * len(p0)
    else:
        upper_bound = np.log(upper_bound)
        upper_bound[np.isnan(upper_bound)] = None
    upper_bound = _project_params_down(upper_bound, fixed_params)
    bounds = list(zip(lower_bound, upper_bound))

    p0 = _project_params_down(p0, fixed_params)
    outputs = scipy.optimize.fmin_l_bfgs_b(
        _object_func_log,
        np.log(p0),
        bounds=bounds,
        epsilon=epsilon,
        args=args,
        iprint=-1,
        pgtol=pgtol,
        maxiter=maxiter,
        approx_grad=True,
    )

    xopt, fopt, info_dict = outputs
    xopt = _project_params_up(np.exp(xopt), fixed_params)

    return xopt, fopt


def _project_params_down(pin, fixed_params):
    """
    Eliminate fixed parameters from pin.
    """
    if fixed_params is None:
        return pin

    if len(pin) != len(fixed_params):
        raise ValueError(
            "fixed_params list must have same length as input " "parameter array."
        )

    pout = []
    for ii, (curr_val, fixed_val) in enumerate(zip(pin, fixed_params)):
        if fixed_val is None:
            pout.append(curr_val)

    return np.array(pout)


def _project_params_up(pin, fixed_params):
    """
    Fold fixed parameters into pin.
    """
    if fixed_params is None:
        return pin

    if np.isscalar(pin):
        pin = [pin]

    pout = np.zeros(len(fixed_params))
    orig_ii = 0
    for out_ii, val in enumerate(fixed_params):
        if val is None:
            pout[out_ii] = pin[orig_ii]
            orig_ii += 1
        else:
            pout[out_ii] = fixed_params[out_ii]
    return pout