I have file input which is alphabetically sorted
Asburn 9
Ashley 10
Ashley 11
Bojourn 12
I want the output like
Asburn 9
Ashley 21
Bojourn 12
Code I have used :
counts = defaultdict(int)
for line in sys.stdin:
word, count = line.split()
counts[word] = int(count)
for wrd, count in counts.items():
print("{}\t{}".format(wrd,count))
but I cannot used dictionary. How should I implement it
CodePudding user response:
With some help from Python's standard library:
from sys import stdin
from itertools import groupby
from operator import itemgetter
for word, group in groupby(map(str.split, stdin), itemgetter(0)):
print(word, sum(int(count) for _, count in group))
CodePudding user response:
Try this:
prev_word = None
prev_count = 0
for line in sys.stdin:
word, count = line.split()
count = int(count)
if word != prev_word:
if prev_word is not None:
print(prev_word, str(prev_count))
prev_count = count
prev_word = word
else:
prev_count = count
if prev_word != None:
print(prev_word, prev_count)
Quick test: the following code returns the expected output
prev_word = None
prev_count = 0
for line in ['ashutosh 9', 'ashutosh 10', 'check 11', 'check 11', 'zebra 11']:
word, count = line.split()
count = int(count)
if word != prev_word:
if prev_word is not None:
print(prev_word, prev_count)
prev_count = count
prev_word = word
else:
prev_count = count
if prev_word != None:
print(prev_word, prev_count)
CodePudding user response:
Used a regex to list the key-value like terms, group them by the key, decouple the keys-values of each group with zip
.
import re
import itertools as it
text = """Asburn 9
Ashley 10
Ashley 11
Bojourn 12"""
pairs = re.findall(r'^(. ?)\s(\d )\n*', text, re.M)
groups = [zip(*grp) for _, grp in it.groupby(pairs, lambda p: p[0])]
for keys, values in groups:
print(keys[0], sum(map(int, values)))