diff --git a/src/geometry.cpp b/src/geometry.cpp
index 85c369c4ade..485419267f3 100644
--- a/src/geometry.cpp
+++ b/src/geometry.cpp
@@ -83,23 +83,48 @@ find_cell_inner(Particle& p, const NeighborList* neighbor_list)
     {
     for (int64_t i = 0; i < NEIGHBOR_SIZE ; i++) {
 
-      // Perform an atomic read of the neighbor list
-
-      // TODO: The below is the correct version.
-      // Note: Gives a runtime JIT compiler error (can't find the flush function or something)
-      //#pragma omp atomic read seq_cst
-
-      // TODO: The below is the workaround
-      // Note: gives a link time error
-      /*
-      __sync_synchronize();
-      #pragma omp atomic read
-      i_cell = neighbor_list->list_[i];
-      __sync_synchronize();
-      */
-
-      // TODO: The below is the workaround for the workaround
-      #pragma omp atomic read
+      // Perform a read of the neighbor list element. Note that this
+      // operation may be executed by this thread concurrently with another
+      // thread writing to the same element. The write operation occurs using
+      // an atomic compare-and-swap operation. However, this read operation
+      // is left unprotected. This is not stricly permitted and the results
+      // of the read could be undefined. However, in practice, since this
+      // is a 32-bit read, there are certain practical guarantees that can
+      // be relied on for all known computer architectures. The concerns,
+      // and their solutions, are:
+      //
+      // 1) The element begins as "-1". It gets written concurrently
+      // to "42" at the same time this thread reads it. If this were a
+      // 64-bit operation, a split-write could in theory occur resulting
+      // in decomposition of the store into two 32-bit operations, the
+      // second of which could be preempted by this thread. In that case,
+      // we could theoretically read a value that was neither -1 or 42,
+      // but some other garbage value, which could result in a segmentation
+      // fault when we try to check against that cell ID. However, since
+      // the element is a 32-bit integer, load and store operations will
+      // be implicitly atomic, so a split write is not possible.  
+      //
+      // 2) The elememnt begins as "-1". It gets written concurrently to
+      // "42" at the same time this thread reads it. As some caches on
+      // GPUs are not cache coherent, a read operation that
+      // occurs after the element is set to 42 could result in a -1 being
+      // read. Thankfully, our use case allows for this to happen. In this
+      // event, if 42 is indeed the correct cell neighbor, then the thread
+      // will later attempt to append the correct neighbor onto the list. The
+      // atomic CAS operations there will ensure that a duplicate is not
+      // appended. Thus, while an unnecessary CSG check will be performed
+      // in this case, the program will still behave correctly.
+      //
+      // 3) Atomic read operations can result in stricter memory
+      // fencing operations (e.g., flushing of cache lines) to enforce
+      // cache coherency. This can mean that the below read operation can
+      // become very costly if using OpenMP atomics, even if no write
+      // conflicts ever occur. As it is expected that all neighbors will be
+      // found very rapidly, it is
+      // not likely worth it to pay the cost of the stricter memory policy,
+      // and instead to allow for situation (2) to occur a few times during
+      // the first batch of the simulation.
+      
       i_cell = neighbor_list->list_[i];
 
       // If the neighbor list item is -1, this means the end of the list has been found