zip_longest for the left list always

Solutions

Chaining the repeated fillvalue behind the iterables other than the first:

from itertools import chain, repeat

def zip_first(first, *rest, fillvalue=None):
    return zip(first, *map(chain, rest, repeat(repeat(fillvalue))))

Or using zip_longest and trim it with a compress and zip trick:

def zip_first(first, *rest, fillvalue=None):
    a, b = tee(first)
    return compress(zip_longest(b, *rest, fillvalue=fillvalue), zip(a))

Just like zip and zip_longest, these take any number (well, at least one) of any kind of iterables (including infinite ones) and return an iterator (convert to list if needed).

Benchmark results

Benchmarks with other equally general solutions (all code is at the end of the answer):

10 iterables of 10,000 to 90,000 elements, first has 50,000:
────────────────────────────────────────────────────────────
 2.2 ms   2.2 ms   2.3 ms  limit_cheat
 2.6 ms   2.6 ms   2.6 ms  Kelly_Bundy_chain
 3.3 ms   3.3 ms   3.3 ms  Kelly_Bundy_compress
50.2 ms  50.6 ms  50.7 ms  CrazyChucky
54.7 ms  55.0 ms  55.0 ms  Sven_Marnach
74.8 ms  74.9 ms  75.0 ms  Mad_Physicist
 5.4 ms   5.4 ms   5.4 ms  Kelly_Bundy_3
 5.9 ms   6.0 ms   6.0 ms  Kelly_Bundy_4
 4.6 ms   4.7 ms   4.7 ms  Kelly_Bundy_5

10,000 iterables of 0 to 100 elements, first has 50:
────────────────────────────────────────────────────
 4.6 ms   4.7 ms   4.8 ms  limit_cheat
 4.8 ms   4.8 ms   4.8 ms  Kelly_Bundy_compress
 8.4 ms   8.4 ms   8.4 ms  Kelly_Bundy_chain
27.1 ms  27.3 ms  27.5 ms  CrazyChucky
38.3 ms  38.5 ms  38.7 ms  Sven_Marnach
73.0 ms  73.0 ms  73.1 ms  Mad_Physicist
 4.9 ms   4.9 ms   5.0 ms  Kelly_Bundy_3
 4.9 ms   4.9 ms   5.0 ms  Kelly_Bundy_4
 5.0 ms   5.0 ms   5.0 ms  Kelly_Bundy_5

The first one is a cheat that knows the length, included to show what’s probably a limit for how fast we can get.

Explanations

A little explanation of the above two solutions:

The first solution, if used with for example three iterables, is equivalent to this:

def zip_first(first, second, third, fillvalue=None):
    filler = repeat(fillvalue)
    return zip(first,
               chain(second, filler),
               chain(third, filler))

The second solution basically lets zip_longest do the job. The only problem with that is that it doesn’t stop when the first iterable is done. So I duplicate the first iterable (with tee) and then use one for its elements and the other for its length. The zip(a) wraps every element in a 1-tuple, and non-empty tuples are true. So compress gives me all tuples produced by zip_longest, as many as there are elements in the first iterable.

Benchmark code (Try it online!)

def limit_cheat(*iterables, fillvalue=None):
    return islice(zip_longest(*iterables, fillvalue=fillvalue), cheat_length)

def Kelly_Bundy_chain(first, *rest, fillvalue=None):
    return zip(first, *map(chain, rest, repeat(repeat(fillvalue))))

def Kelly_Bundy_compress(first, *rest, fillvalue=None):
    a, b = tee(first)
    return compress(zip_longest(b, *rest, fillvalue=fillvalue), zip(a))

def CrazyChucky(*iterables, fillvalue=None):
    SENTINEL = object()
    
    for first, *others in zip_longest(*iterables, fillvalue=SENTINEL):
        if first is SENTINEL:
            return
        others = [i if i is not SENTINEL else fillvalue for i in others]
        yield (first, *others)

def Sven_Marnach(first, *rest, fillvalue=None):
    rest = [iter(r) for r in rest]
    for x in first:
        yield x, *(next(r, fillvalue) for r in rest)

def Mad_Physicist(*args, fillvalue=None):
    # zip_by_first('ABCD', 'xy', fillvalue="-") --> Ax By C- D-
    # zip_by_first('ABC', 'xyzw', fillvalue="-") --> Ax By Cz
    if not args:
        return
    iterators = [iter(it) for it in args]
    while True:
        values = []
        for i, it in enumerate(iterators):
            try:
                value = next(it)
            except StopIteration:
                if i == 0:
                    return
                iterators[i] = repeat(fillvalue)
                value = fillvalue
            values.append(value)
        yield tuple(values)

def Kelly_Bundy_3(first, *rest, fillvalue=None):
    a, b = tee(first)
    return map(itemgetter(1), zip(a, zip_longest(b, *rest, fillvalue=fillvalue)))

def Kelly_Bundy_4(first, *rest, fillvalue=None):
    sentinel = object()
    for z in zip_longest(chain(first, [sentinel]), *rest, fillvalue=fillvalue):
        if z[0] is sentinel:
            break
        yield z

def Kelly_Bundy_5(first, *rest, fillvalue=None):
    stopped = False
    def stop():
        nonlocal stopped
        stopped = True
        return
        yield
    for z in zip_longest(chain(first, stop()), *rest, fillvalue=fillvalue):
        if stopped:
            break
        yield z


import timeit
from itertools import chain, repeat, zip_longest, islice, tee, compress
from operator import itemgetter
from collections import deque

funcs = [
    limit_cheat,
    Kelly_Bundy_chain,
    Kelly_Bundy_compress,
    CrazyChucky,
    Sven_Marnach,
    Mad_Physicist,
    Kelly_Bundy_3,
    Kelly_Bundy_4,
    Kelly_Bundy_5,
]

def test(args_creator):

    # Correctness
    expect = list(funcs[0](*args_creator()))
    for func in funcs:
        result = list(func(*args_creator()))
        print(result == expect, func.__name__)
    
    # Speed
    tss = [[] for _ in funcs]
    for _ in range(5):
        print()
        print(args_creator.__name__)
        for func, ts in zip(funcs, tss):
            t = min(timeit.repeat(lambda: deque(func(*args_creator()), 0), number=1))
            ts.append(t)
            print(*('%4.1f ms ' % (t * 1e3) for t in sorted(ts)[:3]), func.__name__)

def args_few_but_long_iterables():
    global cheat_length
    cheat_length = 50_000
    first = repeat(0, 50_000)
    rest = [repeat(i, 10_000 * i) for i in range(1, 10)]
    return first, *rest

def args_many_but_short_iterables():
    global cheat_length
    cheat_length = 50
    first = repeat(0, 50)
    rest = [repeat(i, i % 101) for i in range(1, 10_000)]
    return first, *rest

test(args_few_but_long_iterables)
funcs[1:3] = funcs[1:3][::-1]
test(args_many_but_short_iterables)

Leave a Comment

tech