← Back to team overview

larry-discuss team mailing list archive

groupmean reduce

 

Here is a simple implementation of a reduce option in groupmean,
essentially it is two functions in one.

see https://blueprints.launchpad.net/larry/+spec/group-method-design
as a standalone function it could also be plugged into other larry
methods, e.g. larry.mean

Only tested on the example in the file.

Josef
# -*- coding: utf-8 -*-
"""
Created on Mon May 03 15:33:26 2010

Author: josef-pktd
"""

import numpy as np

from la import larry
from la.afunc import unique_group



def group_mean(x, groups, axis=0, reduce=False):
    """
    Mean with groups along an axis.
    
    Parameters
    ----------
    x : ndarray
        Input data.
    groups : list
        List of group membership of each element along the axis.
    axis : int, {default: 0}
        axis along which the mean is calculated
    reduce : boolean
        If reduce is false (default), then an array with groupmeans corresponding
        to the input array is returnd
        If reduce is true, then the means for unique sectors are returned,
        additionally a list of unique sectors is returned
        
        
    Returns
    -------
    idx : ndarray
        see reduce
        If reduce is false, then an array with the same shape as the 
        input array where every element is replaced by the group mean along 
        the given axis.
    ugroup : list
        list of unique sectors

    """

    # Find set of unique groups
    ugroups = unique_group(groups)
    
    # Convert groups to a numpy array
    groups = np.asarray(groups)    
  
    # Loop through unique groups and normalize
    if not reduce:
        xmean = np.nan * np.zeros(x.shape)    
        for group in ugroups:
            idx = groups == group
            idxall = [slice(None)] * x.ndim
            idxall[axis] = idx
            if idx.sum() > 0:
                norm = 1.0 * (~np.isnan(x[idxall])).sum(axis)
                xmean[idxall] = np.expand_dims(np.nansum(x[idxall], axis=axis) / norm, axis)
        
        return xmean
    
    else:
        resshape = list(x.shape)
        resshape[axis] = len(ugroups)
        xmean = np.nan * np.zeros(resshape)    
        print resshape
        print xmean.shape
        for gidx, group in enumerate(ugroups):
            idx = groups == group
            idxall = [slice(None)] * x.ndim
            idxall2 = idxall[:]
            idxall[axis] = idx
            idxall2[axis] = gidx
            if idx.sum() > 0:
                norm = 1.0 * (~np.isnan(x[idxall])).sum(axis)
                xmean[idxall2] = np.nansum(x[idxall], axis=axis) / norm

        return xmean, ugroups
    

if __name__ == '__main__':
    
    nan = np.nan

    x2d = np.array([[9.0, 3.0, nan, nan, 9.0, nan],
                  [1.0, 1.0, 1.0, nan, nan, nan],
                  [2.0, 2.0, 0.1, nan, 1.0, nan],  # 0.0 kills geometric mean
                  [3.0, 9.0, 2.0, nan, nan, nan],
                  [4.0, 4.0, 3.0, 9.0, 2.0, nan],
                  [5.0, 5.0, 4.0, 4.0, nan, nan]])
    sectors = ['a', 'b', 'a', 'b', 'a', 'c']
    lasectors = larry(np.array(sectors, dtype=object))
    x3 = np.dstack((x2d,x2d))
    x = np.rollaxis(x3, 2).copy()
    lar2d = larry(x2d)
    lar3 = larry(x3)
    lar = larry(x)

    print group_mean(x2d, sectors, axis=0)
    print group_mean(x2d, sectors, axis=0, reduce=True)
    xg,ug = group_mean(x2d, sectors, axis=0, reduce=True)
    
    axis = 0
    lab = lar2d.copylabel()
    lab[axis] = ug
    lagr = larry(xg, lab)
    print lagr
    
    axis = 1
    print group_mean(x, sectors, axis=axis)
    print group_mean(x, sectors, axis=axis, reduce=True)
    xg,ug = group_mean(x, sectors, axis=axis, reduce=True)
    
    #create larry with return
    lab = lar.copylabel()
    lab[axis] = ug
    lagr = larry(xg, lab)
    print lagr







Follow ups