import numpy as np # I have a sorted array which describes all possible values (distinct) for the field 'name' names = np.array(["jacques", "john", "pete", "robert"], dtype="|S8") # I have a recarray 'bank' which describes for all the people identified by their name the money they have. # Not every names from 'names' are represented in 'bank'. # Every name encountered in 'bank' should be found in 'names'. # Some names from 'names' can be encountered more than once in 'bank'. # 'bank' is sorted by name. bank_descriptor = [('name', '|S8'), ('money', int)] bank = np.array([("jacques", 100), ("jacques", 200), ("jacques", 550), # (no line for john) ("pete", 50), ("pete", 240), ("robert", 1000)], dtype=bank_descriptor) # I want an array of integers called 'bank_encoded' such as # bank['name'][i] == names[bank_encoded[i]] # With such an array, I can forget the field 'name' and I will # only work with arrays of integers to summarize data or merge some arrays together. bank_encoded = np.zeros(bank['name'].shape[0], dtype=int) i_bank, i_names = 0, 0 while i_bank < bank['name'].shape[0] and i_names < names.shape[0]: if bank['name'][i_bank] == names[i_names]: bank_encoded[i_bank] = i_names i_bank += 1 elif bank['name'][i_bank] > names[i_names]: i_names += 1 else: # bank['name'][i_bank] < names[i_names] means that the considered name is not in 'names' raise Exception, "Some names in bank are not in 'names'" if i_bank != bank['name'].shape[0]: # there are still some values to examine in bank, but no more available values in 'names' raise Exception, "Some names in bank are not in 'names'" else: # easy summarization of bank bank_summarized = np.zeros(names.shape[0], dtype=int) for i in range(bank['money'].shape[0]): bank_summarized[bank_encoded[i]] += bank['money'][i] print bank_summarized