pairlist

pairlist

Generates the pair list of atoms that are closer to each other than the given threshold under the periodic boundary conditions.

version 0.5.1.1

Usage

See pairlist.h for the function definition and pairlist-test.c for usage.

Python API is served in pairlist.py. The API document is here.

To find the neighbors in a face-centered cubic lattice of size 10x10x10 on a MacBook Air 2021 (Apple Silicon),

$ python benchmark.py
INFO crude: Neighboring pair list by a crude double loop.
INFO crude: 18024 ms
INFO crude: end.
24000 pairs
INFO numpyish: Neighboring pair list by numpy fancy array.
INFO numpyish: 741 ms
INFO numpyish: end.
24000.0 pairs
INFO pairlist_py: Neighboring pair list by pairlist in pure python.
INFO pairlist_py: 125 ms
INFO pairlist_py: end.
24000 pairs
INFO pairlist_c: Neighboring pair list by pairlist in c.
INFO pairlist_c: end.
INFO pairlist_c: 16 ms
24000 pairs
import pairlist as pl
from fcc import FaceCenteredCubic
from logging import getLogger, basicConfig, INFO, DEBUG
from decorator import timeit, banner
import numpy as np
from pairlist import pairs_py, pairs2_py


basicConfig(level=INFO, format="%(levelname)s %(message)s")
logger = getLogger()
logger.debug("Debug mode.")


@banner
@timeit
def crude(lattice, cell, rc=1.1):
    "Neighboring pair list by a crude double loop."
    rc2 = rc**2
    count = 0
    for i in range(len(lattice)):
        for j in range(i):
            d = lattice[i] - lattice[j]
            d -= np.floor(d + 0.5)
            d = d @ cell
            if d @ d < rc2:
                count += 1
    return count


@banner
@timeit
def numpyish(lattice, cell, rc=1.1):
    "Neighboring pair list by numpy fancy array."
    # cross-differences
    M = lattice[:, None, :] - lattice[None, :, :]
    # wrap
    M -= np.floor(M + 0.5)
    # in absolute coordinate
    M = M @ cell
    d = (M * M).sum(2)
    return d[(d < rc**2) & (0 < d)].shape[0] / 2


@banner
@timeit
def pairlist_py(lattice, cell, rc=1.1):
    "Neighboring pair list by pairlist in pure python."
    count = 0
    for i, j, d in pl.pairs_iter(
        lattice, maxdist=rc, cell=cell, _engine=(pairs_py, pairs2_py)
    ):
        count += 1
    return count


@timeit
@banner
def pairlist_c(lattice, cell, rc=1.1):
    "Neighboring pair list by pairlist in c."
    count = 0
    for i, j, d in pl.pairs_iter(lattice, maxdist=rc, cell=cell):
        count += 1
    return count


lattice, cell = FaceCenteredCubic(10)

print(crude(lattice, cell), "pairs")
print(numpyish(lattice, cell), "pairs")
print(pairlist_py(lattice, cell), "pairs")
print(pairlist_c(lattice, cell), "pairs")

benchmark

Algorithm

A simple cell division algorithm is implemented.

Demo

It requires GenIce to make the test data.

% make test

Requirements

  • python
  • numpy

Bugs

  1#!/usr/bin/env python
  2# -*- coding: utf-8 -*-
  3# even: stable; odd: develop
  4"""
  5.. include:: ./README.md
  6"""
  7
  8import math
  9import itertools as it
 10import numpy as np
 11from logging import getLogger
 12from cpairlist import pairs, pairs2
 13from typing import Generator
 14
 15__all__ = ["pairs_iter"]
 16
 17
 18def Address(pos, grid):
 19    # residents in each grid cell
 20    mol = pos % 1 % 1  # avoid cancellation
 21    return tuple((mol * grid).astype(int))
 22
 23
 24def ArrangeAddress(xyz, grid):
 25    # residents in each grid cell
 26    residents = dict()
 27    for i in range(len(xyz)):
 28        address = Address(xyz[i], grid)
 29        if address not in residents:
 30            residents[address] = set()
 31        residents[address].add(i)
 32    return residents
 33
 34
 35def pairs_py(xyz, GX, GY, GZ):
 36    grid = np.array([GX, GY, GZ])
 37    logger = getLogger()
 38    logger.debug("START Arrange")
 39    residents = ArrangeAddress(xyz, grid)
 40    logger.debug("END Arrange")
 41
 42    # key-value pairs in the dictionary
 43    donecellpair = set()
 44    pairs = []
 45    for address in residents:
 46        members = residents[address]
 47        # neighbor cells
 48        npa = np.array(address)
 49        k = np.array([(npa + j + grid) % grid for j in range(-1, 2)])
 50        for a2 in it.product(k[:, 0], k[:, 1], k[:, 2]):
 51            if address == a2:
 52                if frozenset((address, a2)) not in donecellpair:
 53                    donecellpair.add(frozenset((address, a2)))
 54                    for a, b in it.combinations(members, 2):
 55                        pairs.append((a, b))
 56            else:
 57                if a2 in residents:
 58                    if frozenset((address, a2)) not in donecellpair:
 59                        donecellpair.add(frozenset((address, a2)))
 60                        for a in members:
 61                            for b in residents[a2]:
 62                                pairs.append((a, b))
 63    return np.array(pairs)
 64
 65
 66def pairs2_py(xyz, xyz2, GX, GY, GZ):
 67    grid = np.array([GX, GY, GZ])
 68    return _pairs_hetero(xyz, xyz2, grid)
 69
 70
 71def _pairs_hetero(xyz, xyz2, grid):
 72    logger = getLogger()
 73    logger.debug("START Arrange")
 74    residents = ArrangeAddress(xyz, grid)
 75    residents2 = ArrangeAddress(xyz2, grid)
 76    logger.debug("END Arrange")
 77
 78    # key-value pairs in the dictionary
 79    donecellpair = set()
 80    pairs = []
 81    for address in residents:
 82        members = residents[address]
 83        ix, iy, iz = address
 84        # neighbor cells
 85        npa = np.array(address)
 86        k = np.array([(npa + j + grid) % grid for j in range(-1, 2)])
 87        for a2 in it.product(k[:, 0], k[:, 1], k[:, 2]):
 88            if a2 in residents2:
 89                if not ((address, a2) in donecellpair):
 90                    donecellpair.add((address, a2))
 91                    for a in members:
 92                        for b in residents2[a2]:
 93                            pairs.append((a, b))
 94    return np.array(pairs)
 95
 96
 97# assume xyz and box are numpy.array
 98def pairs_fine_slow(xyz, rc, cell, grid=None, distance=True):
 99    logger = getLogger()
100    if grid is None:
101        grid = determine_grid(cell, rc)
102    for i, j in pairs(xyz, *grid):
103        moli = xyz[i]
104        molj = xyz[j]
105        d = moli - molj
106        d -= np.floor(d + 0.5)
107        d = np.dot(d, cell)
108        rr = np.dot(d, d)
109        if rr < rc**2:
110            if distance:
111                yield i, j, math.sqrt(rr)
112            else:
113                yield i, j
114
115
116def pairs_nopbc_iter(pos, maxdist=None, pos2=None, distance=False):
117    """
118    Iterator to find pairs in an open space.
119    """
120    # assert pos2 is None, "Pairs for hetero group without PBC is not implemented yet."
121
122    def assign_to_bins(atoms):
123        # occupants[x] is a list of atom labels in a bin x.
124        occupants = dict()
125        for i, pos in enumerate(atoms):
126            # from position to bin
127            ipos = tuple(np.floor(pos / maxdist).astype(int))
128            # insert the atom in the bin
129            if ipos not in occupants:
130                occupants[ipos] = []
131            occupants[ipos].append(i)
132        return occupants
133
134    def nearby(A, hetero=False):
135        """
136        Obtain the list of atoms near A.
137
138        A: Label of the target atom
139        """
140        # bin of A
141        p, q, r = np.floor(pos[A] / maxdist).astype(int)
142
143        # adjacent bins
144        for ix in range(p - 1, p + 2):
145            for iy in range(q - 1, q + 2):
146                for iz in range(r - 1, r + 2):
147                    if (ix, iy, iz) in occupants:
148                        # candidates for the neighbors
149                        for B in occupants[ix, iy, iz]:
150                            # avoid double counts.
151                            if hetero:
152                                # relative vector
153                                d = pos[A] - pos2[B]
154                                d2 = d @ d
155                                # if the distance is shorter than the threshold,
156                                if d2 < maxdist**2:
157                                    yield B, d2**0.5
158                            elif A < B:
159                                # relative vector
160                                d = pos[A] - pos[B]
161                                d2 = d @ d
162                                # if the distance is shorter than the threshold,
163                                if d2 < maxdist**2:
164                                    yield B, d2**0.5
165
166    if pos2 is None:
167        # homo pairs
168        occupants = assign_to_bins(pos)
169        for i in range(len(pos)):
170            for j, L in nearby(i):
171                if distance:
172                    yield i, j, L
173                else:
174                    yield i, j
175
176    else:
177        # hetero pairs
178        occupants = assign_to_bins(pos2)
179        for i in range(len(pos)):
180            for j, L in nearby(i, hetero=True):
181                if distance:
182                    yield i, j, L
183                else:
184                    yield i, j
185
186
187# wrapper
188def pairs_iter(
189    pos: np.ndarray,
190    maxdist: float = None,
191    cell: np.ndarray[3, 3] = None,
192    fractional: bool = True,
193    pos2: np.ndarray = None,
194    rc: float = None,
195    distance: bool = True,
196    _raw: bool = False,
197    _engine=(pairs, pairs2),
198) -> Generator[tuple, None, None]:
199    """Yields pairs within the specified distance.
200
201    Args:
202        pos (np.ndarray): Positions of the atoms. (in fractional or absolute coordinate)
203        maxdist (formerly rc) (float, optional): Upper limit of the pair distance (in the same unit as the cell). Defaults to None.
204        cell (np.ndarray[3, 3], optional): Shape of the cell in a 3x3 numpy array. [[ax,ay,az],[bx,by,bz],[cx,cy,cz]]. Defaults to None.
205        fractional (bool, optional): If true, pos must be given in fractional coordinate. Defaults to True.
206        pos2 (np.ndarray, optional): Specify when you want to find the pairs between two sets of positions. Defaults to None.
207        distance (bool, optional): If true, distance between two positions are also returned. Defaults to True.
208
209    Yields:
210        Generator[tuple, None, None]: indices of a pair | indices and the distance between the pair (distance=True)
211    """
212    logger = getLogger()
213    if rc is not None:
214        logger.warning("rc is deprecated. Use maxdist instead.")
215        assert maxdist is None, "rc and maxdist are specified at a time."
216        maxdist = rc
217    if cell is None:
218        return pairs_nopbc_iter(pos, maxdist=maxdist, pos2=pos2, distance=distance)
219    grid = None
220    if fractional:
221        rpos = pos
222        rpos2 = pos2
223    else:
224        celli = np.linalg.inv(cell)
225        rpos = pos @ celli
226        if pos2 is None:
227            rpos2 = None
228        else:
229            rpos2 = pos2 @ celli
230
231    if rpos2 is None:
232        return pairs_fine(
233            rpos,
234            maxdist,
235            cell,
236            grid=grid,
237            distance=distance,
238            raw=_raw,
239            engine=_engine[0],
240        )
241    else:
242        return pairs_fine_hetero(
243            rpos,
244            rpos2,
245            maxdist,
246            cell,
247            grid=grid,
248            distance=distance,
249            raw=_raw,
250            engine=_engine[1],
251        )
252
253
254# fully numpy style
255def pairs_fine(xyz, rc, cell, grid=None, distance=True, raw=False, engine=pairs):
256    logger = getLogger()
257    if grid is None:
258        grid = determine_grid(cell, rc)
259    p = engine(xyz, *grid)
260    idx0 = p[:, 0]
261    # for i in range(idx0.shape[0]):
262    #    if idx0[i] > 1000:
263    #        print(i,idx0[i])
264    idx1 = p[:, 1]
265    p0 = xyz[idx0]
266    p1 = xyz[idx1]
267    d = p0 - p1
268    d -= np.floor(d + 0.5)
269    a = np.dot(d, cell)
270    L = np.linalg.norm(a, axis=1)
271    cond = L < rc
272    # pickup elements satisfying the condition.
273    j0 = np.compress(cond, idx0)
274    j1 = np.compress(cond, idx1)
275    if raw:
276        if not distance:
277            return j0, j1
278        else:
279            Ls = np.compress(cond, L)
280            return j0, j1, Ls  # no zipping
281    else:
282        if not distance:
283            return np.column_stack((j0, j1))
284        else:
285            Ls = np.compress(cond, L)
286            # return np.column_stack(j0, j1) all the values becomes float...
287            return zip(j0, j1, Ls)  # list of tuples
288
289
290def pairs_crude(xyz, rc, cell, distance=True):
291    logger = getLogger()
292    # logger.debug(xyz)
293    logger.debug(rc)
294    logger.debug(cell)
295    logger.debug(distance)
296    for i, j in it.combinations(range(len(xyz)), 2):
297        moli = xyz[i]
298        molj = xyz[j]
299        d = moli - molj
300        d -= np.floor(d + 0.5)
301        r = np.dot(d, cell)
302        rr = np.dot(r, r)
303
304        if rr < rc**2:
305            # logger.debug((d,r,rr,rc**2))
306            if distance:
307                yield i, j, math.sqrt(rr)
308            else:
309                yield i, j
310
311
312# assume xyz and box are numpy.array
313def pairs_fine_hetero_slow(xyz, xyz2, rc, cell, grid=None, distance=True):
314    if grid is None:
315        grid = determine_grid(cell, rc)
316    for i, j in pairs2(xyz, xyz2, *grid):
317        moli = xyz[i]
318        molj = xyz2[j]
319        d = moli - molj
320        d -= np.floor(d + 0.5)
321        d = np.dot(d, cell)
322        rr = np.dot(d, d)
323
324        if rr < rc**2:
325            if distance:
326                yield i, j, math.sqrt(rr)
327            else:
328                yield i, j
329
330
331def pairs_fine_hetero(
332    xyz, xyz2, rc, cell, grid=None, distance=True, raw=False, engine=pairs2
333):
334    logger = getLogger()
335    if grid is None:
336        grid = determine_grid(cell, rc)
337    p = engine(xyz, xyz2, *grid)
338    idx0 = p[:, 0]
339    idx1 = p[:, 1]
340    p0 = xyz[idx0]
341    p1 = xyz2[idx1]
342    d = p0 - p1
343    d -= np.floor(d + 0.5)
344    a = np.dot(d, cell)
345    L = np.linalg.norm(a, axis=1)
346    cond = L < rc
347    # pickup elements satisfying the condition.
348    j0 = np.compress(cond, idx0)
349    j1 = np.compress(cond, idx1)
350    if raw:
351        if not distance:
352            return j0, j1
353        else:
354            Ls = np.compress(cond, L)
355            return j0, j1, Ls  # no zipping
356    else:
357        if not distance:
358            return np.column_stack((j0, j1))
359        else:
360            Ls = np.compress(cond, L)
361            # return np.column_stack(j0, j1) all the values becomes float...
362            return zip(j0, j1, Ls)  # list of tuples
363
364
365# @lru_cache(maxsize=None)
366def determine_grid(cell, radius):
367    """
368    Determine grid division based on the cutoff radius.
369    """
370    logger = getLogger()
371    ct = cell  # .transpose()
372    # Cell vectors
373    a = ct[0]
374    b = ct[1]
375    c = ct[2]
376    logger.debug("cell a {0}".format(a))
377    logger.debug("cell b {0}".format(b))
378    logger.debug("cell c {0}".format(c))
379    # Edge lengths
380    al = np.linalg.norm(a)  # vector length
381    bl = np.linalg.norm(b)
382    cl = np.linalg.norm(c)
383    # Unit vectors of the axes.
384    ae = a / al  # unit vectors
385    be = b / bl
386    ce = c / cl
387    # Distance between the parallel faces
388    an = np.dot(a, np.cross(be, ce))  # distance to the bc plane
389    bn = np.dot(b, np.cross(ce, ae))
390    cn = np.dot(c, np.cross(ae, be))
391    # required number of grid cells
392    gf = np.array([an / radius, bn / radius, cn / radius])
393    if gf[0] < 1.0:
394        gf[0] = 1.0
395    if gf[1] < 1.0:
396        gf[1] = 1.0
397    if gf[2] < 1.0:
398        gf[2] = 1.0
399    # Check the lengths of four diagonals.
400    logger.debug("Grid divisions: {0}".format(np.floor(gf)))
401    # print(cell,radius,gf)
402    # import sys
403    # sys.exit(1)
404    return np.floor(gf).astype(int)
405
406
407def main():
408    xyz = []
409    for x in range(2):
410        for y in range(2):
411            for z in range(2):
412                xyz.append(
413                    np.array((x / 100.0 + 1, y / 100.0 + 1, z / 100.0 + 1)) / 4.0
414                )
415    xyz2 = []
416    for x in range(2):
417        for y in range(2):
418            for z in range(2):
419                xyz2.append(
420                    np.array((x / 100.0 + 2, y / 100.0 + 1, z / 100.0 + 1)) / 4.0
421                )
422    xyz = np.array(xyz)
423    xyz2 = np.array(xyz2)
424    box = np.diag((4, 4, 4))
425    rc = 1.000000001
426    for i, j, l in pairs_fine_hetero(xyz, xyz2, rc, box):
427        print(i, j, l)
428
429
430if __name__ == "__main__":
431    main()
def pairs_iter( pos: numpy.ndarray, maxdist: float = None, cell: numpy.ndarray[3, 3] = None, fractional: bool = True, pos2: numpy.ndarray = None, rc: float = None, distance: bool = True, _raw: bool = False, _engine=(<built-in function pairs>, <built-in function pairs2>)) -> Generator[tuple, NoneType, NoneType]:
189def pairs_iter(
190    pos: np.ndarray,
191    maxdist: float = None,
192    cell: np.ndarray[3, 3] = None,
193    fractional: bool = True,
194    pos2: np.ndarray = None,
195    rc: float = None,
196    distance: bool = True,
197    _raw: bool = False,
198    _engine=(pairs, pairs2),
199) -> Generator[tuple, None, None]:
200    """Yields pairs within the specified distance.
201
202    Args:
203        pos (np.ndarray): Positions of the atoms. (in fractional or absolute coordinate)
204        maxdist (formerly rc) (float, optional): Upper limit of the pair distance (in the same unit as the cell). Defaults to None.
205        cell (np.ndarray[3, 3], optional): Shape of the cell in a 3x3 numpy array. [[ax,ay,az],[bx,by,bz],[cx,cy,cz]]. Defaults to None.
206        fractional (bool, optional): If true, pos must be given in fractional coordinate. Defaults to True.
207        pos2 (np.ndarray, optional): Specify when you want to find the pairs between two sets of positions. Defaults to None.
208        distance (bool, optional): If true, distance between two positions are also returned. Defaults to True.
209
210    Yields:
211        Generator[tuple, None, None]: indices of a pair | indices and the distance between the pair (distance=True)
212    """
213    logger = getLogger()
214    if rc is not None:
215        logger.warning("rc is deprecated. Use maxdist instead.")
216        assert maxdist is None, "rc and maxdist are specified at a time."
217        maxdist = rc
218    if cell is None:
219        return pairs_nopbc_iter(pos, maxdist=maxdist, pos2=pos2, distance=distance)
220    grid = None
221    if fractional:
222        rpos = pos
223        rpos2 = pos2
224    else:
225        celli = np.linalg.inv(cell)
226        rpos = pos @ celli
227        if pos2 is None:
228            rpos2 = None
229        else:
230            rpos2 = pos2 @ celli
231
232    if rpos2 is None:
233        return pairs_fine(
234            rpos,
235            maxdist,
236            cell,
237            grid=grid,
238            distance=distance,
239            raw=_raw,
240            engine=_engine[0],
241        )
242    else:
243        return pairs_fine_hetero(
244            rpos,
245            rpos2,
246            maxdist,
247            cell,
248            grid=grid,
249            distance=distance,
250            raw=_raw,
251            engine=_engine[1],
252        )

Yields pairs within the specified distance.

Arguments:
  • pos (np.ndarray): Positions of the atoms. (in fractional or absolute coordinate)
  • maxdist (formerly rc) (float, optional): Upper limit of the pair distance (in the same unit as the cell). Defaults to None.
  • cell (np.ndarray[3, 3], optional): Shape of the cell in a 3x3 numpy array. [[ax,ay,az],[bx,by,bz],[cx,cy,cz]]. Defaults to None.
  • fractional (bool, optional): If true, pos must be given in fractional coordinate. Defaults to True.
  • pos2 (np.ndarray, optional): Specify when you want to find the pairs between two sets of positions. Defaults to None.
  • distance (bool, optional): If true, distance between two positions are also returned. Defaults to True.
Yields:

Generator[tuple, None, None]: indices of a pair | indices and the distance between the pair (distance=True)