辞書のリストのリストで重複する要素を合計するにはどうすればよいですか?
サンプルリスト:
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
期待される出力:
op = [
[
{'user': 1, 'rating': 20},
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 30},
],
]
op = []
for lst in data:
rating_of_user = {}
for e in lst:
user, rating = e['user'], e['rating']
rating_of_user[user] = rating_of_user.get(user, 0) + rating
op.append([{'user': u, 'rating': r} for u, r in rating_of_user.items()])
N.B .: Python= 3.7辞書は公式に挿入順序を保持しているため
これはうまくいくはずです:
from collections import defaultdict
data_without_duplicates = []
for l in data:
users_ratings = defaultdict(int)
for d in l:
users_ratings[d["user"]] += d["rating"]
data_without_duplicates.append(
[{"user": user, "rating": rating} for user, rating in users_ratings.items()]
)
Pythonリスト内包表記:
from collections import Counter
x = [[
{'user': x[0], 'rating': x[1]} for x in
Counter({d['user']: d['rating'] for d in group}).most_common()] for group in data
]
出力:
[
[
{
"rating": 20,
"user": 1
},
{
"rating": 10,
"user": 2
},
{
"rating": 10,
"user": 3
}
],
[
{
"rating": 80,
"user": 2
},
{
"rating": 10,
"user": 1
},
{
"rating": 4,
"user": 4
}
]
]
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
keyname = "user"
all = []
for row in data:
row_out = []
for d in row:
key = d[keyname]
for d2 in row_out:
if d2[keyname] == d[keyname]:
break
else:
d2 = {keyname: key}
row_out.append(d2)
for k, v in d.items():
if k == keyname:
continue
d2[k] = d2.get(k, 0) + v
all.append(row_out)
print(all)
与える:
[[{'user': 1, 'rating': 20}, {'user': 2, 'rating': 10}, {'user': 3, 'rating': 10}], [{'user': 4, 'rating': 4}, {'user': 2, 'rating': 80}, {'user': 1, 'rating': 30}]]
各アイテムは1回のパスで処理できるため、並べ替えは避けてください。ハッシュベースのテクニックはもっと良いはずです。
これは、高価なsort/groupbyまたはpandasの代わりにdefaultdictを使用する代替ソリューションです。
from collections import defaultdict
from functools import reduce
def reduce_func(state, item):
new_obj = {
"user": item["user"],
"rating": state[item["user"]]["rating"] + item["rating"]}
}
state[item["user"]] = new_obj
return state
output = [list(reduce(reduce_func, elem, defaultdict(lambda: {"rating": 0})).values())
for elem in data]
import pprint
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
def find(user, l):
for i, d in enumerate(l):
if user == d['user']:
return i
return -1
data_sum = []
for l in data:
list_sum = []
for d in l:
idx = find(d['user'], list_sum)
if idx == -1:
list_sum.append(d)
else:
list_sum[idx]['rating'] += d['rating']
data_sum.append(list_sum)
pprint.pprint(data_sum)