larry-discuss team mailing list archive
-
larry-discuss team
-
Mailing list archive
-
Message #00144
groupmean reduce
Here is a simple implementation of a reduce option in groupmean,
essentially it is two functions in one.
see https://blueprints.launchpad.net/larry/+spec/group-method-design
as a standalone function it could also be plugged into other larry
methods, e.g. larry.mean
Only tested on the example in the file.
Josef
# -*- coding: utf-8 -*-
"""
Created on Mon May 03 15:33:26 2010
Author: josef-pktd
"""
import numpy as np
from la import larry
from la.afunc import unique_group
def group_mean(x, groups, axis=0, reduce=False):
"""
Mean with groups along an axis.
Parameters
----------
x : ndarray
Input data.
groups : list
List of group membership of each element along the axis.
axis : int, {default: 0}
axis along which the mean is calculated
reduce : boolean
If reduce is false (default), then an array with groupmeans corresponding
to the input array is returnd
If reduce is true, then the means for unique sectors are returned,
additionally a list of unique sectors is returned
Returns
-------
idx : ndarray
see reduce
If reduce is false, then an array with the same shape as the
input array where every element is replaced by the group mean along
the given axis.
ugroup : list
list of unique sectors
"""
# Find set of unique groups
ugroups = unique_group(groups)
# Convert groups to a numpy array
groups = np.asarray(groups)
# Loop through unique groups and normalize
if not reduce:
xmean = np.nan * np.zeros(x.shape)
for group in ugroups:
idx = groups == group
idxall = [slice(None)] * x.ndim
idxall[axis] = idx
if idx.sum() > 0:
norm = 1.0 * (~np.isnan(x[idxall])).sum(axis)
xmean[idxall] = np.expand_dims(np.nansum(x[idxall], axis=axis) / norm, axis)
return xmean
else:
resshape = list(x.shape)
resshape[axis] = len(ugroups)
xmean = np.nan * np.zeros(resshape)
print resshape
print xmean.shape
for gidx, group in enumerate(ugroups):
idx = groups == group
idxall = [slice(None)] * x.ndim
idxall2 = idxall[:]
idxall[axis] = idx
idxall2[axis] = gidx
if idx.sum() > 0:
norm = 1.0 * (~np.isnan(x[idxall])).sum(axis)
xmean[idxall2] = np.nansum(x[idxall], axis=axis) / norm
return xmean, ugroups
if __name__ == '__main__':
nan = np.nan
x2d = np.array([[9.0, 3.0, nan, nan, 9.0, nan],
[1.0, 1.0, 1.0, nan, nan, nan],
[2.0, 2.0, 0.1, nan, 1.0, nan], # 0.0 kills geometric mean
[3.0, 9.0, 2.0, nan, nan, nan],
[4.0, 4.0, 3.0, 9.0, 2.0, nan],
[5.0, 5.0, 4.0, 4.0, nan, nan]])
sectors = ['a', 'b', 'a', 'b', 'a', 'c']
lasectors = larry(np.array(sectors, dtype=object))
x3 = np.dstack((x2d,x2d))
x = np.rollaxis(x3, 2).copy()
lar2d = larry(x2d)
lar3 = larry(x3)
lar = larry(x)
print group_mean(x2d, sectors, axis=0)
print group_mean(x2d, sectors, axis=0, reduce=True)
xg,ug = group_mean(x2d, sectors, axis=0, reduce=True)
axis = 0
lab = lar2d.copylabel()
lab[axis] = ug
lagr = larry(xg, lab)
print lagr
axis = 1
print group_mean(x, sectors, axis=axis)
print group_mean(x, sectors, axis=axis, reduce=True)
xg,ug = group_mean(x, sectors, axis=axis, reduce=True)
#create larry with return
lab = lar.copylabel()
lab[axis] = ug
lagr = larry(xg, lab)
print lagr
Follow ups