我喜歡用zip和map這個東西:
from collections import Counter
# for test, import random:
import random
# define class
class User(object):
def __init__(self, a, b):
self.a = a # Always a bool
self.b = b # Always a bool
# create an arbitrary set
users = [ User(r % 2 == 0, r % 3 == 0) for r in (random.randint(0,100) for x in xrange(100)) ]
# and... count
aCounter, bCounter = map(Counter, zip(*((u.a, u.b) for u in users)))
更新: map(sum, zip(*tuples))
稍高於更快的for循環的樣本規模較小,但對於較大的樣本量,for-loop可以更好地進行縮放。與其他方法一樣,for循環在處理元組列表時並沒有獲得太多的性能提升。可能是因爲它已經非常優秀了。
collections.Counter
還是很慢。
import random
import itertools
import time
from collections import Counter
# define class
class User(object):
def __init__(self, a, b):
self.a = a # Always a bool
self.b = b # Always a bool
# create an arbitrary sample
users = [ User(r % 2 == 0, r % 3 == 0) for r in (random.randint(0,100) for x in xrange(100)) ]
# create a list of tuples of the arbitrary sample
users2 = [ (u.a,u.b) for u in users ]
# useful function-timer decorator
def timer(times=1):
def outer(fn):
def wrapper(*args, **kwargs):
t0 = time.time()
for n in xrange(times):
r = fn(*args, **kwargs)
dt = time.time() - t0
print '{} ran {} times in {} seconds with {:f} ops/sec'.format(fn.__name__, times, dt, times/dt)
return r
return wrapper
return outer
# now create the timeable functions
n=10000
@timer(times=n)
def time_sum():
return map(sum, zip(*((u.a, u.b) for u in users)))
@timer(times=n)
def time_counter():
return map(Counter, zip(*((u.a, u.b) for u in users)))
@timer(times=n)
def time_for():
a,b=0,0
for u in users:
if u.a is True:
a += 1
if u.b is True:
b += 1
return a,b
@timer(times=n)
def time_itermapzip():
return list(itertools.imap(sum, itertools.izip(*((u.a, u.b) for u in users))))
@timer(times=n)
def time_sum2():
return map(sum, zip(*users2))
@timer(times=n)
def time_counter2():
return map(Counter, zip(*users2))
@timer(times=n)
def time_for2():
a,b=0,0
for _a,_b in users2:
if _a is True:
a += 1
if _b is True:
b += 1
return a,b
@timer(times=n)
def time_itermapzip2():
return list(itertools.imap(sum, itertools.izip(*users2)))
v = time_sum()
v = time_counter()
v = time_for()
v = time_itermapzip()
v = time_sum2()
v= time_counter2()
v = time_for2()
v = time_itermapzip2()
# time_sum ran 10000 times in 0.446894168854 seconds with 22376.662523 ops/sec
# time_counter ran 10000 times in 1.29836297035 seconds with 7702.006471 ops/sec
# time_for ran 10000 times in 0.267076015472 seconds with 37442.523554 ops/sec
# time_itermapzip ran 10000 times in 0.459508895874 seconds with 21762.364319 ops/sec
# time_sum2 ran 10000 times in 0.174293994904 seconds with 57374.323226 ops/sec
# time_counter2 ran 10000 times in 0.989939928055 seconds with 10101.623055 ops/sec
# time_for2 ran 10000 times in 0.183295965195 seconds with 54556.574605 ops/sec
# time_itermapzip2 ran 10000 times in 0.193426847458 seconds with 51699.131384 ops/sec
print "True a's: {}\t False a's: {}\nTrue b's: {}\t False b's:{}".format(v[0], len(users)-v[0], v[1], len(users)-v[1])
# True a's: 53 False a's: 47
# True b's: 31 False b's:69
v
# [53, 31]
相同的代碼爲1000的樣品尺寸:
# time_sum ran 10000 times in 9.30428719521 seconds with 1074.773359 ops/sec
# time_counter ran 10000 times in 16.7009849548 seconds with 598.767080 ops/sec
# time_for ran 10000 times in 2.61371207237 seconds with 3825.976130 ops/sec
# time_itermapzip ran 10000 times in 9.40824103355 seconds with 1062.897939 ops/sec
# time_sum2 ran 10000 times in 5.70988488197 seconds with 1751.348794 ops/sec
# time_counter2 ran 10000 times in 13.4643371105 seconds with 742.702735 ops/sec
# time_for2 ran 10000 times in 2.49017906189 seconds with 4015.775473 ops/sec
# time_itermapzip2 ran 10000 times in 6.10926699638 seconds with 1636.857581 ops/sec
感謝大家的建議。我在所有解決方案上運行了100000次timeit。我會把結果作爲評論在每個答案中。最好的是凱爾斯特蘭德的2個計數器的解決方案,然後從列表長度中減去。一般來說,任何使用collections.Counter()的東西都非常慢。 使用timeit運行(以秒爲單位)我的上述兩種解決方案,進行比較: 計數器()解決方案:'5.78' 循環液W/4計數器瓦爾:'1.16' – Chad 2013-03-22 02:33:22