Circular 2D crystal with strain

This notebook computes the scattering from a circular crystal with a strain gradient from the center of the circle : $$ I(H,K) = \left | \sum_{n=0}^{N-1} \sum_{n=0}^{N-1} \Omega \left (n,m \right ) \exp \left \{ 2 \pi i \left [ H \left (n + \Delta n\right )+ K \left (m + \Delta m\right ) \right ] \right \} \right | ^2$$ $\Omega$ is the support function (1 inside the crystal, 0 outside). $\Delta n$ and $\Delta m$ are unit-cell displacements along each axis of the crystal.

First we load the external libraries.

In [1]:
import os
import math
import cmath
import numpy as np
import numba as nb
import numexpr as ne
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
n_cpu = os.cpu_count()

Then we define the size of the crystal (N) and the HK reflection to be computed. A complete reciprocal space unit-cell is rendered with a resolution defined by the sampling rate. For this particular example we use a radial lattice variation from the center of the particle.

In [2]:
H = 0 #Miller index of reflection
K = 4 #Miller index of reflection
N = 100 #Number of units cells per direction
oversampling = 6 #Defines how much points are needed to describe a single Laue fringe (2 = Nyquist frequency)
e0 = 0.01 #Maximum strain at surface
w = 20. #Width of the strain profile below the surface
#Generate real and reciprocal space coordinates
n = np.arange(N)
m = np.arange(N)
h = np.arange(H-0.5, H+0.5, 1./(oversampling*N))
k = np.arange(K-0.5, K+0.5, 1./(oversampling*N))

Pure python implementation

In [3]:
def Circ_python(n, m, h, k, e0, w):
    result = [[0 for x in range(len(h))] for y in range(len(k))]
    N = len(n)
    for i_h, v_h in enumerate(h): #loop over the reciprocal space coordinates
        for i_k, v_k in enumerate(k):
            tmp = 0.0
            for i_n in n:#loop and sum over unit-cells
                for i_m in m:
                    radius = math.sqrt((i_n - N/2)** 2 + (i_m - N/2)** 2)
                    if radius <= (N/2):
                        strain = e0 * (1 + math.tanh((radius-N/2)/w))
                        tmp += cmath.exp(2j*cmath.pi*(v_h*(i_n+strain*(i_n-N/2)) + v_k*(i_m+strain*(i_m-N/2))))
            result[i_h][i_k] = abs(tmp)**2
    return np.array(result)

%time ref_intensity = Circ_python(n, m, h, k, e0, w)
CPU times: user 8h 58min 26s, sys: 10.3 s, total: 8h 58min 36s
Wall time: 8h 58min 36s
In [4]:
%matplotlib notebook
plt.figure(1)
plt.imshow(ref_intensity.T, extent=(h.min(), h.max(), k.min(), k.max()), norm=LogNorm(), origin = 'lower')
plt.xlabel('H')
plt.ylabel('K')
plt.show()

Pure Numpy implementation

In [5]:
def Circ_numpy(n, m, h, k, e0, w):
    N = len(n)
    h = h[:, np.newaxis, np.newaxis, np.newaxis]
    k = k[np.newaxis, :, np.newaxis, np.newaxis]
    n = n[np.newaxis, np.newaxis, :, np.newaxis]
    m = m[np.newaxis, np.newaxis, np.newaxis, :]
    radius = np.sqrt((n - N/2)**2 + (m - N/2)**2)
    support = np.where(radius > N/2, 0, 1)
    strain = e0 * (1 + np.tanh((radius-N/2) / w))
    tmp = (support * np.exp(2j*np.pi*(h*(n+strain*(n-N/2)) + k*(m+strain*(m-N/2)))))
    return np.abs(tmp.sum(axis=(2,3)))**2

%time intensity = Circ_numpy(n, m, h, k, e0, w)
%timeit Circ_numpy(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 3min 15s, sys: 1min 21s, total: 4min 36s
Wall time: 4min 36s
1 loop, best of 3: 5min 9s per loop
Error: 1.39698386192e-07

Numpy + for loop

In [6]:
def Circ_numpy_hybrid(n, m, h, k, e0, w):
    result = np.zeros((h.size, k.size))
    N = len(n)
    n=n[:,np.newaxis]
    m=m[np.newaxis,:]
    n_c = (n - N/2)
    m_c = (m - N/2)
    radius = np.sqrt(n_c**2 + m_c**2)
    strain = e0 * (1 + np.tanh((radius-N/2) / w))
    support = np.where(radius > N/2, 0, 1)
    
    n_s = n + strain * n_c
    m_s = m + strain * m_c
    #loop over the reciprocal space coordinates
    for i_h, v_h in enumerate(h):
        for i_k, v_k in enumerate(k): 
            tmp = (support * np.exp(2j*np.pi*(v_h*n_s + v_k*m_s))).sum()
            result[i_h, i_k] = abs(tmp)**2
    return result

%time intensity = Circ_numpy_hybrid(n, m, h, k, e0, w)
%timeit Circ_numpy_hybrid(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 3min 7s, sys: 7.96 ms, total: 3min 7s
Wall time: 3min 7s
1 loop, best of 3: 3min 5s per loop
Error: 1.39698386192e-07

NumExpr implementation

In [7]:
def Circ_numexpr(n, m, h, k, e0, w):
    N = len(n)
    h = h[:, np.newaxis, np.newaxis, np.newaxis]
    k = k[np.newaxis, :, np.newaxis, np.newaxis]
    n = n[np.newaxis, np.newaxis, :, np.newaxis]
    m = m[np.newaxis, np.newaxis, np.newaxis, :]
    ne.set_num_threads(1)
    radius = ne.evaluate("sqrt((n - N/2)**2 + (m - N/2)**2)")
    strain = ne.evaluate("e0 * (1 + tanh((radius-N/2) / w))")
    j2pi = np.pi*2j
    tmp = ne.evaluate("where(radius > N/2, 0, exp(j2pi*(h*(n+strain*(n-N/2)) + k*(m+strain*(m-N/2)))))")
    tmp.shape = k.size, h.size, -1
    result = abs(tmp.sum(axis=-1))**2
    return result

%time  intensity = Circ_numexpr(n, m, h, k, e0, w)
%timeit Circ_numexpr(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 3min 32s, sys: 22.8 s, total: 3min 55s
Wall time: 3min 55s
1 loop, best of 3: 3min 48s per loop
Error: 1.39698386192e-07
In [8]:
def Circ_numexpr_hybrid(n, m, h, k, e0, w):
    result = np.zeros((h.size, k.size))
    N = len(n)
    n=np.atleast_2d(n).T
    m=np.atleast_2d(m)
    n_c = (n - N/2)
    m_c = (m - N/2)
    radius = np.sqrt(n_c**2 + m_c**2)
    strain = e0 * (1 + np.tanh((radius-N/2) / w))
    support = np.where(radius > N/2, 0, 1)
    j2pi = np.pi*2j
    n_s = n + strain * (n_c)
    m_s = m + strain * (m_c)
    #loop over the reciprocal space coordinates
    for i_h, v_h in enumerate(h): 
        for i_k, v_k in enumerate(k):
            tmp = ne.evaluate("where(radius > N/2, 0, exp(j2pi*(v_h*n_s + v_k*m_s)))").sum()
            result[i_h, i_k] = tmp.real*tmp.real + tmp.imag*tmp.imag
    return result

%time intensity = Circ_numexpr_hybrid(n, m, h, k, e0, w)
%timeit Circ_numexpr_hybrid(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 3min 9s, sys: 89 µs, total: 3min 9s
Wall time: 3min 9s
1 loop, best of 3: 3min 3s per loop
Error: 1.39698386192e-07

NumExpr (multi-threaded) implementation

In [9]:
def Circ_numexpr_parallel(n, m, h, k, e0, w):
    N = len(n)
    h = h[:, np.newaxis, np.newaxis, np.newaxis]
    k = k[np.newaxis, :, np.newaxis, np.newaxis]
    n = n[np.newaxis, np.newaxis, :, np.newaxis]
    m = m[np.newaxis, np.newaxis, np.newaxis, :]
    ne.set_num_threads(n_cpu)
    radius = ne.evaluate("sqrt((n - N/2)**2 + (m - N/2)**2)")
    strain = ne.evaluate("e0 * (1 + tanh((radius-N/2) / w))")
    j2pi = np.pi*2j
    #loop over the reciprocal space coordinates
    tmp = ne.evaluate("where(radius > N/2, 0, exp(j2pi*(h*(n+strain*(n-N/2)) + k*(m+strain*(m-N/2)))))")
    tmp.shape = k.size, h.size, -1
    result = abs(tmp.sum(axis=-1))**2
    return result

%time  intensity = Circ_numexpr_parallel(n, m, h, k, e0, w)
%timeit Circ_numexpr_parallel(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 8min 4s, sys: 42.5 s, total: 8min 47s
Wall time: 21.7 s
1 loop, best of 3: 26.5 s per loop
Error: 1.39698386192e-07

Numba (single thread) implementation

In [10]:
@nb.jit(nb.float64[:,:](nb.int64[:],nb.int64[:],nb.float64[:],nb.float64[:],nb.float64, nb.float64), 
        nopython=True)
def Circ_numba(n, m, h, k, e0, w):
    result = np.zeros(len(h)*len(k), dtype=nb.float64)
    N = len(n)
    for i in range(len(h)*len(k)): #loop over the reciprocal space coordinates
        tmp = 0j
        for i_n in n:#loop and sum over unit-cells
            for i_m in m:
                radius = math.sqrt((i_n - N/2)** 2 + (i_m - N/2)** 2)
                if radius <= (N/2):
                    strain = e0 * (1 + math.tanh((radius-N/2)/w))
                    tmp +=  cmath.exp(2j*cmath.pi*(h[i//len(h)]*(i_n+strain*(i_n-N/2)) + k[i%len(h)]*(i_m+strain*(i_m-N/2)))) #cmath library works better with numba than numpy math
        result[i] = abs(tmp)**2
    return result.reshape(len(h),len(k))

%time intensity = Circ_numba(n, m, h, k, e0, w)
%timeit Circ_numba(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 4min 57s, sys: 584 ms, total: 4min 58s
Wall time: 4min 57s
1 loop, best of 3: 4min 51s per loop
Error: 0.0

Numba (multi-threaded / parallel) implementation

In [11]:
@nb.jit(nb.float64[:,:](nb.int64[:],nb.int64[:],nb.float64[:],nb.float64[:],nb.float64, nb.float64), 
        nopython=True, parallel=True, fastmath=True)
def Circ_numba_parallel(n, m, h, k, e0, w):
    result = np.zeros(len(h)*len(k), dtype=nb.float64)
    N = len(n)
    for i in nb.prange(len(h)*len(k)): #loop over the reciprocal space coordinates
        tmp = 0j
        for i_n in n:#loop and sum over unit-cells
            for i_m in m:
                support = 1.0
                radius = math.sqrt((i_n - N/2)** 2 + (i_m - N/2)** 2)
                if (radius > (N/2.)):
                    support = 0.0
                    #continue #Numba isn't working using the same continue pattern as below
                strain = e0 * (1 + math.tanh((radius-N/2)/w))
                tmp +=  support * cmath.exp(2j*cmath.pi*(h[i//len(h)]*(i_n+strain*(i_n-N/2)) + k[i%len(h)]*(i_m+strain*(i_m-N/2)))) #cmath library works better with numba than numpy math
        result[i] = abs(tmp)**2
    return result.reshape((len(h),len(k)))

%time intensity = Circ_numba_parallel(n, m, h, k, e0, w)
%timeit Circ_numba_parallel(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 9min 58s, sys: 0 ns, total: 9min 58s
Wall time: 7.74 s
1 loop, best of 3: 7.74 s per loop
Error: 0.0

Pythran (single thread) implementation

In [12]:
%load_ext pythran.magic
In [13]:
%%pythran -fopenmp

#pythran export Circ_pythran(int[] or float[], int[] or float[], float[], float[], float, float or int)
import numpy as np

def Circ_pythran(n, m, h, k, e0, w):
    result = np.zeros((len(k),len(h)), dtype=np.float64)
    N = len(n)
    #"omp parallel for"
    for i_h, v_h in enumerate(h): #loop over the reciprocal space coordinates
        for i_k, v_k in enumerate(k):
            tmp = 0j
            for i_n in n:
                for i_m in m:
                    radius = np.sqrt((i_n - N/2.)** 2 + (i_m - N/2.)** 2)
                    if (radius > (N/2.)):
                        continue
                    strain = e0 * (1 + np.tanh((radius-N/2.)/w))
                    tmp += np.exp(2j*np.pi*(v_h*(i_n+strain*(i_n-N/2.)) + v_k*(i_m+strain*(i_m-N/2.))))
            result[i_h, i_k] = abs(tmp)**2
    return result
In [14]:
%time intensity = Circ_pythran(n, m, h, k, e0, w)
%timeit Circ_pythran(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 4min 29s, sys: 4.13 ms, total: 4min 29s
Wall time: 4min 29s
1 loop, best of 3: 4min 29s per loop
Error: 3.72529029846e-09

Pythran multi-threaded implementation

In [15]:
%%pythran -fopenmp

#pythran export Circ_pythran_parallel(int[] or float[], int[] or float[], float[], float[], float, float or int)
import numpy as np

def Circ_pythran_parallel(n, m, h, k, e0, w):
    result = np.zeros((len(k),len(h)), dtype=np.float64)
    N = len(n)
    "omp parallel for"
    for i_h, v_h in enumerate(h): #loop over the reciprocal space coordinates
        for i_k, v_k in enumerate(k):
            tmp = 0j
            for i_n in n:
                for i_m in m:
                    radius = np.sqrt((i_n - N/2.)** 2 + (i_m - N/2.)** 2)
                    if (radius > (N/2.)):
                        continue
                    strain = e0 * (1 + np.tanh((radius-N/2.)/w))
                    tmp += np.exp(2j*np.pi*(v_h*(i_n+strain*(i_n-N/2.)) + v_k*(i_m+strain*(i_m-N/2.))))
            result[i_h, i_k] = abs(tmp)**2
    return result
In [16]:
%time intensity = Circ_pythran_parallel(n, m, h, k, e0, w)
%timeit Circ_pythran_parallel(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 7min 50s, sys: 52.8 ms, total: 7min 50s
Wall time: 6.47 s
1 loop, best of 3: 6.32 s per loop
Error: 3.72529029846e-09

Cython (single thread) implementation

In [17]:
%load_ext Cython
In [18]:
%%cython -a --compile-args=-fopenmp --link-args=-fopenmp
import numpy as np
from cython.parallel import prange
import cython

from libc.math cimport sqrt, tanh

cdef extern from "complex.h" nogil:
    double complex cexp(double complex)
    double cabs(double complex)
    
@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
def Circ_cython(long[::1] n, long[::1] m, double[::1] h, 
                double[::1] k, double e0, int w):
    cdef:
        double[:, ::1] result
        double v_h, v_k, radius, strain
        double complex tmp, two_j_pi
        int i_h, i_k, i_m, i_n, size_h, size_k, N
        
    two_j_pi = np.pi*2j
    size_h = h.size
    size_k = k.size
    N = n.size
    result = np.zeros((size_k, size_h))
    #loop over the reciprocal space coordinates
    for i_k in range(size_k):
        v_k = k[i_k]
        for i_h in range(size_h): 
            v_h = h[i_h]
            tmp = 0
            #loop and sum over unit-cells
            for i_n in range(N):
                for i_m in range(N):
                    radius = sqrt((i_n - N/2.)** 2 + (i_m - N/2.)** 2)
                    if (radius > (N/2.)):
                        continue
                    strain = e0 * (1 + tanh((radius-N/2.)/w))
                    tmp = tmp + cexp(two_j_pi*(v_h*(i_n+strain*(i_n-N/2.)) + v_k*(i_m+strain*(i_m-N/2.))))
            result[i_h, i_k] += cabs(tmp)**2

    return result
Out[18]:
Cython: _cython_magic_6fdcce99eeb6f0482f97a60e7174311c.pyx

Generated by Cython 0.26.1

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+01: import numpy as np
  __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
/* … */
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 02: from cython.parallel import prange
 03: import cython
 04: 
 05: from libc.math cimport sqrt, tanh
 06: 
 07: cdef extern from "complex.h" nogil:
 08:     double complex cexp(double complex)
 09:     double cabs(double complex)
 10: 
 11: @cython.wraparound(False)
 12: @cython.boundscheck(False)
 13: @cython.cdivision(True)
+14: def Circ_cython(long[::1] n, long[::1] m, double[::1] h,
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_6fdcce99eeb6f0482f97a60e7174311c_1Circ_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_6fdcce99eeb6f0482f97a60e7174311c_1Circ_cython = {"Circ_cython", (PyCFunction)__pyx_pw_46_cython_magic_6fdcce99eeb6f0482f97a60e7174311c_1Circ_cython, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_46_cython_magic_6fdcce99eeb6f0482f97a60e7174311c_1Circ_cython(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  __Pyx_memviewslice __pyx_v_n = { 0, 0, { 0 }, { 0 }, { 0 } };
  CYTHON_UNUSED __Pyx_memviewslice __pyx_v_m = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_h = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_k = { 0, 0, { 0 }, { 0 }, { 0 } };
  double __pyx_v_e0;
  int __pyx_v_w;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("Circ_cython (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_n,&__pyx_n_s_m,&__pyx_n_s_h,&__pyx_n_s_k,&__pyx_n_s_e0,&__pyx_n_s_w,0};
    PyObject* values[6] = {0,0,0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_n)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_m)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython", 1, 6, 6, 1); __PYX_ERR(0, 14, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_h)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython", 1, 6, 6, 2); __PYX_ERR(0, 14, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_k)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython", 1, 6, 6, 3); __PYX_ERR(0, 14, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_e0)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython", 1, 6, 6, 4); __PYX_ERR(0, 14, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_w)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython", 1, 6, 6, 5); __PYX_ERR(0, 14, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "Circ_cython") < 0)) __PYX_ERR(0, 14, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 6) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
      values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
      values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
    }
    __pyx_v_n = __Pyx_PyObject_to_MemoryviewSlice_dc_long(values[0]); if (unlikely(!__pyx_v_n.memview)) __PYX_ERR(0, 14, __pyx_L3_error)
    __pyx_v_m = __Pyx_PyObject_to_MemoryviewSlice_dc_long(values[1]); if (unlikely(!__pyx_v_m.memview)) __PYX_ERR(0, 14, __pyx_L3_error)
    __pyx_v_h = __Pyx_PyObject_to_MemoryviewSlice_dc_double(values[2]); if (unlikely(!__pyx_v_h.memview)) __PYX_ERR(0, 14, __pyx_L3_error)
    __pyx_v_k = __Pyx_PyObject_to_MemoryviewSlice_dc_double(values[3]); if (unlikely(!__pyx_v_k.memview)) __PYX_ERR(0, 15, __pyx_L3_error)
    __pyx_v_e0 = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_e0 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error)
    __pyx_v_w = __Pyx_PyInt_As_int(values[5]); if (unlikely((__pyx_v_w == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L3_error)
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("Circ_cython", 1, 6, 6, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 14, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_6fdcce99eeb6f0482f97a60e7174311c.Circ_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_6fdcce99eeb6f0482f97a60e7174311c_Circ_cython(__pyx_self, __pyx_v_n, __pyx_v_m, __pyx_v_h, __pyx_v_k, __pyx_v_e0, __pyx_v_w);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_6fdcce99eeb6f0482f97a60e7174311c_Circ_cython(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_n, CYTHON_UNUSED __Pyx_memviewslice __pyx_v_m, __Pyx_memviewslice __pyx_v_h, __Pyx_memviewslice __pyx_v_k, double __pyx_v_e0, int __pyx_v_w) {
  __Pyx_memviewslice __pyx_v_result = { 0, 0, { 0 }, { 0 }, { 0 } };
  double __pyx_v_v_h;
  double __pyx_v_v_k;
  double __pyx_v_radius;
  double __pyx_v_strain;
  __pyx_t_double_complex __pyx_v_tmp;
  __pyx_t_double_complex __pyx_v_two_j_pi;
  int __pyx_v_i_h;
  int __pyx_v_i_k;
  int __pyx_v_i_m;
  int __pyx_v_i_n;
  int __pyx_v_size_h;
  int __pyx_v_size_k;
  int __pyx_v_N;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("Circ_cython", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_XDECREF(__pyx_t_7);
  __PYX_XDEC_MEMVIEW(&__pyx_t_8, 1);
  __Pyx_AddTraceback("_cython_magic_6fdcce99eeb6f0482f97a60e7174311c.Circ_cython", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_result, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_n, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_m, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_h, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_k, 1);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple__20 = PyTuple_Pack(20, __pyx_n_s_n, __pyx_n_s_m, __pyx_n_s_h, __pyx_n_s_k, __pyx_n_s_e0, __pyx_n_s_w, __pyx_n_s_result, __pyx_n_s_v_h, __pyx_n_s_v_k, __pyx_n_s_radius, __pyx_n_s_strain, __pyx_n_s_tmp, __pyx_n_s_two_j_pi, __pyx_n_s_i_h, __pyx_n_s_i_k, __pyx_n_s_i_m, __pyx_n_s_i_n, __pyx_n_s_size_h, __pyx_n_s_size_k, __pyx_n_s_N); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__20);
  __Pyx_GIVEREF(__pyx_tuple__20);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_6fdcce99eeb6f0482f97a60e7174311c_1Circ_cython, NULL, __pyx_n_s_cython_magic_6fdcce99eeb6f0482f); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_Circ_cython, __pyx_t_1) < 0) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(6, 0, 20, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_aboulle_cache_ipython_cyth, __pyx_n_s_Circ_cython, 14, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) __PYX_ERR(0, 14, __pyx_L1_error)
 15:                 double[::1] k, double e0, int w):
 16:     cdef:
 17:         double[:, ::1] result
 18:         double v_h, v_k, radius, strain
 19:         double complex tmp, two_j_pi
 20:         int i_h, i_k, i_m, i_n, size_h, size_k, N
 21: 
+22:     two_j_pi = np.pi*2j
  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 22, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_pi); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 22, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = PyComplex_FromDoubles(0.0, 2.0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 22, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = PyNumber_Multiply(__pyx_t_2, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_4 = __Pyx_PyComplex_As___pyx_t_double_complex(__pyx_t_3); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 22, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_v_two_j_pi = __pyx_t_4;
+23:     size_h = h.size
  __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_h, 1, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 23, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 23, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 23, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_size_h = __pyx_t_5;
+24:     size_k = k.size
  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_k, 1, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 24, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 24, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 24, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_v_size_k = __pyx_t_5;
+25:     N = n.size
  __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_n, 1, (PyObject *(*)(char *)) __pyx_memview_get_long, (int (*)(char *, PyObject *)) __pyx_memview_set_long, 0);; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 25, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 25, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 25, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_N = __pyx_t_5;
+26:     result = np.zeros((size_k, size_h))
  __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_zeros); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_size_k); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_size_h); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_6);
  __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __Pyx_GIVEREF(__pyx_t_3);
  PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_3);
  __Pyx_GIVEREF(__pyx_t_6);
  PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_6);
  __pyx_t_3 = 0;
  __pyx_t_6 = 0;
  __pyx_t_6 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
    __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_2);
    if (likely(__pyx_t_6)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_6);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_2, function);
    }
  }
  if (!__pyx_t_6) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_7); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 26, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(__pyx_t_2)) {
      PyObject *__pyx_temp[2] = {__pyx_t_6, __pyx_t_7};
      __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 26, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    } else
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(__pyx_t_2)) {
      PyObject *__pyx_temp[2] = {__pyx_t_6, __pyx_t_7};
      __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 26, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    } else
    #endif
    {
      __pyx_t_3 = PyTuple_New(1+1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 26, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_6); __pyx_t_6 = NULL;
      __Pyx_GIVEREF(__pyx_t_7);
      PyTuple_SET_ITEM(__pyx_t_3, 0+1, __pyx_t_7);
      __pyx_t_7 = 0;
      __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 26, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    }
  }
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_8 = __Pyx_PyObject_to_MemoryviewSlice_d_dc_double(__pyx_t_1);
  if (unlikely(!__pyx_t_8.memview)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_result = __pyx_t_8;
  __pyx_t_8.memview = NULL;
  __pyx_t_8.data = NULL;
 27:     #loop over the reciprocal space coordinates
+28:     for i_k in range(size_k):
  __pyx_t_5 = __pyx_v_size_k;
  for (__pyx_t_9 = 0; __pyx_t_9 < __pyx_t_5; __pyx_t_9+=1) {
    __pyx_v_i_k = __pyx_t_9;
+29:         v_k = k[i_k]
    __pyx_t_10 = __pyx_v_i_k;
    __pyx_v_v_k = (*((double *) ( /* dim=0 */ ((char *) (((double *) __pyx_v_k.data) + __pyx_t_10)) )));
+30:         for i_h in range(size_h):
    __pyx_t_11 = __pyx_v_size_h;
    for (__pyx_t_12 = 0; __pyx_t_12 < __pyx_t_11; __pyx_t_12+=1) {
      __pyx_v_i_h = __pyx_t_12;
+31:             v_h = h[i_h]
      __pyx_t_13 = __pyx_v_i_h;
      __pyx_v_v_h = (*((double *) ( /* dim=0 */ ((char *) (((double *) __pyx_v_h.data) + __pyx_t_13)) )));
+32:             tmp = 0
      __pyx_v_tmp = __pyx_t_double_complex_from_parts(0, 0);
 33:             #loop and sum over unit-cells
+34:             for i_n in range(N):
      __pyx_t_14 = __pyx_v_N;
      for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_14; __pyx_t_15+=1) {
        __pyx_v_i_n = __pyx_t_15;
+35:                 for i_m in range(N):
        __pyx_t_16 = __pyx_v_N;
        for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_16; __pyx_t_17+=1) {
          __pyx_v_i_m = __pyx_t_17;
+36:                     radius = sqrt((i_n - N/2.)** 2 + (i_m - N/2.)** 2)
          __pyx_v_radius = sqrt((pow((__pyx_v_i_n - (((double)__pyx_v_N) / 2.)), 2.0) + pow((__pyx_v_i_m - (((double)__pyx_v_N) / 2.)), 2.0)));
+37:                     if (radius > (N/2.)):
          __pyx_t_18 = ((__pyx_v_radius > (((double)__pyx_v_N) / 2.)) != 0);
          if (__pyx_t_18) {
/* … */
          }
+38:                         continue
            goto __pyx_L9_continue;
+39:                     strain = e0 * (1 + tanh((radius-N/2.)/w))
          __pyx_v_strain = (__pyx_v_e0 * (1.0 + tanh(((__pyx_v_radius - (((double)__pyx_v_N) / 2.)) / ((double)__pyx_v_w)))));
+40:                     tmp = tmp + cexp(two_j_pi*(v_h*(i_n+strain*(i_n-N/2.)) + v_k*(i_m+strain*(i_m-N/2.))))
          __pyx_v_tmp = __Pyx_c_sum_double(__pyx_v_tmp, cexp(__Pyx_c_prod_double(__pyx_v_two_j_pi, __pyx_t_double_complex_from_parts(((__pyx_v_v_h * (__pyx_v_i_n + (__pyx_v_strain * (__pyx_v_i_n - (((double)__pyx_v_N) / 2.))))) + (__pyx_v_v_k * (__pyx_v_i_m + (__pyx_v_strain * (__pyx_v_i_m - (((double)__pyx_v_N) / 2.)))))), 0))));
          __pyx_L9_continue:;
        }
      }
+41:             result[i_h, i_k] += cabs(tmp)**2
      __pyx_t_19 = __pyx_v_i_h;
      __pyx_t_20 = __pyx_v_i_k;
      *((double *) ( /* dim=1 */ ((char *) (((double *) ( /* dim=0 */ (__pyx_v_result.data + __pyx_t_19 * __pyx_v_result.strides[0]) )) + __pyx_t_20)) )) += pow(cabs(__pyx_v_tmp), 2.0);
    }
  }
 42: 
+43:     return result
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_result, 2, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 43, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_r = __pyx_t_1;
  __pyx_t_1 = 0;
  goto __pyx_L0;
In [19]:
%time intensity = Circ_cython(n, m, h, k, e0, w)
%timeit Circ_cython(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 4min 43s, sys: 1.44 s, total: 4min 44s
Wall time: 4min 42s
1 loop, best of 3: 4min 35s per loop
Error: 0.0

Cython (multi-threaded) implementation

In [20]:
%%cython -a --compile-args=-fopenmp --link-args=-fopenmp
import numpy as np
from cython.parallel import prange
import cython

from libc.math cimport sqrt, tanh

cdef extern from "complex.h" nogil:
    double complex cexp(double complex)
    double cabs(double complex)
      
@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
def Circ_cython_parallel(long[::1] n, 
                long[::1] m, 
                double[::1] h, 
                double[::1] k,
                double e0,
                int w):
    cdef:
        double[:, ::1] result
        double r_part, i_part, v_h, v_k, radius, strain
        double complex tmp, two_j_pi
        int i_h, i_k, i_m, i_n, size_h, size_k, N
        
    two_j_pi = np.pi*2j
    size_h = h.size
    size_k = k.size
    N = n.size
    result = np.zeros((size_k, size_h))
    #loop over the reciprocal space coordinates
    for i_k in prange(size_k, nogil=True):
        v_k = k[i_k]
        for i_h in range(size_h): 
            v_h = h[i_h]
            tmp = 0
            #loop and sum over unit-cells
            for i_n in range(N):
                for i_m in range(N):
                    radius = sqrt((i_n - N/2.)** 2 + (i_m - N/2.)** 2)
                    if (radius > (N/2.)):
                        continue
                    strain = e0 * (1 + tanh((radius-N/2.)/w))
                    tmp = tmp + cexp(two_j_pi*(v_h*(i_n+strain*(i_n-N/2.)) + v_k*(i_m+strain*(i_m-N/2.))))
            result[i_h, i_k] += cabs(tmp)**2

    return result
Out[20]:
Cython: _cython_magic_db50bd1c6df80b1361475bdd7f5daad4.pyx

Generated by Cython 0.26.1

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

+01: import numpy as np
  __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
/* … */
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 02: from cython.parallel import prange
 03: import cython
 04: 
 05: from libc.math cimport sqrt, tanh
 06: 
 07: cdef extern from "complex.h" nogil:
 08:     double complex cexp(double complex)
 09:     double cabs(double complex)
 10: 
 11: @cython.wraparound(False)
 12: @cython.boundscheck(False)
 13: @cython.cdivision(True)
+14: def Circ_cython_parallel(long[::1] n,
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_db50bd1c6df80b1361475bdd7f5daad4_1Circ_cython_parallel(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_db50bd1c6df80b1361475bdd7f5daad4_1Circ_cython_parallel = {"Circ_cython_parallel", (PyCFunction)__pyx_pw_46_cython_magic_db50bd1c6df80b1361475bdd7f5daad4_1Circ_cython_parallel, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_46_cython_magic_db50bd1c6df80b1361475bdd7f5daad4_1Circ_cython_parallel(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  __Pyx_memviewslice __pyx_v_n = { 0, 0, { 0 }, { 0 }, { 0 } };
  CYTHON_UNUSED __Pyx_memviewslice __pyx_v_m = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_h = { 0, 0, { 0 }, { 0 }, { 0 } };
  __Pyx_memviewslice __pyx_v_k = { 0, 0, { 0 }, { 0 }, { 0 } };
  double __pyx_v_e0;
  int __pyx_v_w;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("Circ_cython_parallel (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_n,&__pyx_n_s_m,&__pyx_n_s_h,&__pyx_n_s_k,&__pyx_n_s_e0,&__pyx_n_s_w,0};
    PyObject* values[6] = {0,0,0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_n)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_m)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython_parallel", 1, 6, 6, 1); __PYX_ERR(0, 14, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_h)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython_parallel", 1, 6, 6, 2); __PYX_ERR(0, 14, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_k)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython_parallel", 1, 6, 6, 3); __PYX_ERR(0, 14, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_e0)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython_parallel", 1, 6, 6, 4); __PYX_ERR(0, 14, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_w)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("Circ_cython_parallel", 1, 6, 6, 5); __PYX_ERR(0, 14, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "Circ_cython_parallel") < 0)) __PYX_ERR(0, 14, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 6) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
      values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
      values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
    }
    __pyx_v_n = __Pyx_PyObject_to_MemoryviewSlice_dc_long(values[0]); if (unlikely(!__pyx_v_n.memview)) __PYX_ERR(0, 14, __pyx_L3_error)
    __pyx_v_m = __Pyx_PyObject_to_MemoryviewSlice_dc_long(values[1]); if (unlikely(!__pyx_v_m.memview)) __PYX_ERR(0, 15, __pyx_L3_error)
    __pyx_v_h = __Pyx_PyObject_to_MemoryviewSlice_dc_double(values[2]); if (unlikely(!__pyx_v_h.memview)) __PYX_ERR(0, 16, __pyx_L3_error)
    __pyx_v_k = __Pyx_PyObject_to_MemoryviewSlice_dc_double(values[3]); if (unlikely(!__pyx_v_k.memview)) __PYX_ERR(0, 17, __pyx_L3_error)
    __pyx_v_e0 = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_e0 == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 18, __pyx_L3_error)
    __pyx_v_w = __Pyx_PyInt_As_int(values[5]); if (unlikely((__pyx_v_w == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 19, __pyx_L3_error)
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("Circ_cython_parallel", 1, 6, 6, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 14, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_db50bd1c6df80b1361475bdd7f5daad4.Circ_cython_parallel", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_db50bd1c6df80b1361475bdd7f5daad4_Circ_cython_parallel(__pyx_self, __pyx_v_n, __pyx_v_m, __pyx_v_h, __pyx_v_k, __pyx_v_e0, __pyx_v_w);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_db50bd1c6df80b1361475bdd7f5daad4_Circ_cython_parallel(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_n, CYTHON_UNUSED __Pyx_memviewslice __pyx_v_m, __Pyx_memviewslice __pyx_v_h, __Pyx_memviewslice __pyx_v_k, double __pyx_v_e0, int __pyx_v_w) {
  __Pyx_memviewslice __pyx_v_result = { 0, 0, { 0 }, { 0 }, { 0 } };
  double __pyx_v_v_h;
  double __pyx_v_v_k;
  double __pyx_v_radius;
  double __pyx_v_strain;
  __pyx_t_double_complex __pyx_v_tmp;
  __pyx_t_double_complex __pyx_v_two_j_pi;
  int __pyx_v_i_h;
  int __pyx_v_i_k;
  int __pyx_v_i_m;
  int __pyx_v_i_n;
  int __pyx_v_size_h;
  int __pyx_v_size_k;
  int __pyx_v_N;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("Circ_cython_parallel", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_XDECREF(__pyx_t_7);
  __PYX_XDEC_MEMVIEW(&__pyx_t_8, 1);
  __Pyx_AddTraceback("_cython_magic_db50bd1c6df80b1361475bdd7f5daad4.Circ_cython_parallel", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __PYX_XDEC_MEMVIEW(&__pyx_v_result, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_n, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_m, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_h, 1);
  __PYX_XDEC_MEMVIEW(&__pyx_v_k, 1);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple__20 = PyTuple_Pack(22, __pyx_n_s_n, __pyx_n_s_m, __pyx_n_s_h, __pyx_n_s_k, __pyx_n_s_e0, __pyx_n_s_w, __pyx_n_s_result, __pyx_n_s_r_part, __pyx_n_s_i_part, __pyx_n_s_v_h, __pyx_n_s_v_k, __pyx_n_s_radius, __pyx_n_s_strain, __pyx_n_s_tmp, __pyx_n_s_two_j_pi, __pyx_n_s_i_h, __pyx_n_s_i_k, __pyx_n_s_i_m, __pyx_n_s_i_n, __pyx_n_s_size_h, __pyx_n_s_size_k, __pyx_n_s_N); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__20);
  __Pyx_GIVEREF(__pyx_tuple__20);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_db50bd1c6df80b1361475bdd7f5daad4_1Circ_cython_parallel, NULL, __pyx_n_s_cython_magic_db50bd1c6df80b1361); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_Circ_cython_parallel, __pyx_t_1) < 0) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(6, 0, 22, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_aboulle_cache_ipython_cyth, __pyx_n_s_Circ_cython_parallel, 14, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) __PYX_ERR(0, 14, __pyx_L1_error)
 15:                 long[::1] m,
 16:                 double[::1] h,
 17:                 double[::1] k,
 18:                 double e0,
 19:                 int w):
 20:     cdef:
 21:         double[:, ::1] result
 22:         double r_part, i_part, v_h, v_k, radius, strain
 23:         double complex tmp, two_j_pi
 24:         int i_h, i_k, i_m, i_n, size_h, size_k, N
 25: 
+26:     two_j_pi = np.pi*2j
  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_pi); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = PyComplex_FromDoubles(0.0, 2.0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = PyNumber_Multiply(__pyx_t_2, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_4 = __Pyx_PyComplex_As___pyx_t_double_complex(__pyx_t_3); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_v_two_j_pi = __pyx_t_4;
+27:     size_h = h.size
  __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_h, 1, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 27, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 27, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 27, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_size_h = __pyx_t_5;
+28:     size_k = k.size
  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_k, 1, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_size); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 28, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 28, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_v_size_k = __pyx_t_5;
+29:     N = n.size
  __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_n, 1, (PyObject *(*)(char *)) __pyx_memview_get_long, (int (*)(char *, PyObject *)) __pyx_memview_set_long, 0);; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 29, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_N = __pyx_t_5;
+30:     result = np.zeros((size_k, size_h))
  __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 30, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_zeros); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_size_k); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 30, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_size_h); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 30, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_6);
  __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 30, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __Pyx_GIVEREF(__pyx_t_3);
  PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_3);
  __Pyx_GIVEREF(__pyx_t_6);
  PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_6);
  __pyx_t_3 = 0;
  __pyx_t_6 = 0;
  __pyx_t_6 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
    __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_2);
    if (likely(__pyx_t_6)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_6);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_2, function);
    }
  }
  if (!__pyx_t_6) {
    __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_7); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 30, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_GOTREF(__pyx_t_1);
  } else {
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(__pyx_t_2)) {
      PyObject *__pyx_temp[2] = {__pyx_t_6, __pyx_t_7};
      __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 30, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    } else
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(__pyx_t_2)) {
      PyObject *__pyx_temp[2] = {__pyx_t_6, __pyx_t_7};
      __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-1, 1+1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 30, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    } else
    #endif
    {
      __pyx_t_3 = PyTuple_New(1+1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 30, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_6); __pyx_t_6 = NULL;
      __Pyx_GIVEREF(__pyx_t_7);
      PyTuple_SET_ITEM(__pyx_t_3, 0+1, __pyx_t_7);
      __pyx_t_7 = 0;
      __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 30, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    }
  }
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_8 = __Pyx_PyObject_to_MemoryviewSlice_d_dc_double(__pyx_t_1);
  if (unlikely(!__pyx_t_8.memview)) __PYX_ERR(0, 30, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_result = __pyx_t_8;
  __pyx_t_8.memview = NULL;
  __pyx_t_8.data = NULL;
 31:     #loop over the reciprocal space coordinates
+32:     for i_k in prange(size_k, nogil=True):
  {
      #ifdef WITH_THREAD
      PyThreadState *_save;
      Py_UNBLOCK_THREADS
      __Pyx_FastGIL_Remember();
      #endif
      /*try:*/ {
        __pyx_t_5 = __pyx_v_size_k;
        if (1 == 0) abort();
        {
            #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
                #undef likely
                #undef unlikely
                #define likely(x)   (x)
                #define unlikely(x) (x)
            #endif
            __pyx_t_10 = (__pyx_t_5 - 0 + 1 - 1/abs(1)) / 1;
            if (__pyx_t_10 > 0)
            {
                #ifdef _OPENMP
                #pragma omp parallel
                #endif /* _OPENMP */
                {
                    #ifdef _OPENMP
                    #pragma omp for lastprivate(__pyx_v_i_h) firstprivate(__pyx_v_i_k) lastprivate(__pyx_v_i_k) lastprivate(__pyx_v_i_m) lastprivate(__pyx_v_i_n) lastprivate(__pyx_v_radius) lastprivate(__pyx_v_strain) lastprivate(__pyx_v_tmp) lastprivate(__pyx_v_v_h) lastprivate(__pyx_v_v_k)
                    #endif /* _OPENMP */
                    for (__pyx_t_9 = 0; __pyx_t_9 < __pyx_t_10; __pyx_t_9++){
                        {
                            __pyx_v_i_k = (int)(0 + 1 * __pyx_t_9);
                            /* Initialize private variables to invalid values */
                            __pyx_v_i_h = ((int)0xbad0bad0);
                            __pyx_v_i_m = ((int)0xbad0bad0);
                            __pyx_v_i_n = ((int)0xbad0bad0);
                            __pyx_v_radius = ((double)__PYX_NAN());
                            __pyx_v_strain = ((double)__PYX_NAN());
                            __pyx_v_v_h = ((double)__PYX_NAN());
                            __pyx_v_v_k = ((double)__PYX_NAN());
/* … */
      /*finally:*/ {
        /*normal exit:*/{
          #ifdef WITH_THREAD
          __Pyx_FastGIL_Forget();
          Py_BLOCK_THREADS
          #endif
          goto __pyx_L5;
        }
        __pyx_L5:;
      }
  }
+33:         v_k = k[i_k]
                            __pyx_t_11 = __pyx_v_i_k;
                            __pyx_v_v_k = (*((double *) ( /* dim=0 */ ((char *) (((double *) __pyx_v_k.data) + __pyx_t_11)) )));
+34:         for i_h in range(size_h):
                            __pyx_t_12 = __pyx_v_size_h;
                            for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_12; __pyx_t_13+=1) {
                              __pyx_v_i_h = __pyx_t_13;
+35:             v_h = h[i_h]
                              __pyx_t_14 = __pyx_v_i_h;
                              __pyx_v_v_h = (*((double *) ( /* dim=0 */ ((char *) (((double *) __pyx_v_h.data) + __pyx_t_14)) )));
+36:             tmp = 0
                              __pyx_v_tmp = __pyx_t_double_complex_from_parts(0, 0);
 37:             #loop and sum over unit-cells
+38:             for i_n in range(N):
                              __pyx_t_15 = __pyx_v_N;
                              for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) {
                                __pyx_v_i_n = __pyx_t_16;
+39:                 for i_m in range(N):
                                __pyx_t_17 = __pyx_v_N;
                                for (__pyx_t_18 = 0; __pyx_t_18 < __pyx_t_17; __pyx_t_18+=1) {
                                  __pyx_v_i_m = __pyx_t_18;
+40:                     radius = sqrt((i_n - N/2.)** 2 + (i_m - N/2.)** 2)
                                  __pyx_v_radius = sqrt((pow((__pyx_v_i_n - (((double)__pyx_v_N) / 2.)), 2.0) + pow((__pyx_v_i_m - (((double)__pyx_v_N) / 2.)), 2.0)));
+41:                     if (radius > (N/2.)):
                                  __pyx_t_19 = ((__pyx_v_radius > (((double)__pyx_v_N) / 2.)) != 0);
                                  if (__pyx_t_19) {
/* … */
                                  }
+42:                         continue
                                    goto __pyx_L14_continue;
+43:                     strain = e0 * (1 + tanh((radius-N/2.)/w))
                                  __pyx_v_strain = (__pyx_v_e0 * (1.0 + tanh(((__pyx_v_radius - (((double)__pyx_v_N) / 2.)) / ((double)__pyx_v_w)))));
+44:                     tmp = tmp + cexp(two_j_pi*(v_h*(i_n+strain*(i_n-N/2.)) + v_k*(i_m+strain*(i_m-N/2.))))
                                  __pyx_v_tmp = __Pyx_c_sum_double(__pyx_v_tmp, cexp(__Pyx_c_prod_double(__pyx_v_two_j_pi, __pyx_t_double_complex_from_parts(((__pyx_v_v_h * (__pyx_v_i_n + (__pyx_v_strain * (__pyx_v_i_n - (((double)__pyx_v_N) / 2.))))) + (__pyx_v_v_k * (__pyx_v_i_m + (__pyx_v_strain * (__pyx_v_i_m - (((double)__pyx_v_N) / 2.)))))), 0))));
                                  __pyx_L14_continue:;
                                }
                              }
+45:             result[i_h, i_k] += cabs(tmp)**2
                              __pyx_t_20 = __pyx_v_i_h;
                              __pyx_t_21 = __pyx_v_i_k;
                              *((double *) ( /* dim=1 */ ((char *) (((double *) ( /* dim=0 */ (__pyx_v_result.data + __pyx_t_20 * __pyx_v_result.strides[0]) )) + __pyx_t_21)) )) += pow(cabs(__pyx_v_tmp), 2.0);
                            }
                        }
                    }
                }
            }
        }
        #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
            #undef likely
            #undef unlikely
            #define likely(x)   __builtin_expect(!!(x), 1)
            #define unlikely(x) __builtin_expect(!!(x), 0)
        #endif
      }
 46: 
+47:     return result
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_1 = __pyx_memoryview_fromslice(__pyx_v_result, 2, (PyObject *(*)(char *)) __pyx_memview_get_double, (int (*)(char *, PyObject *)) __pyx_memview_set_double, 0);; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 47, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_r = __pyx_t_1;
  __pyx_t_1 = 0;
  goto __pyx_L0;
In [21]:
%time intensity = Circ_cython_parallel(n, m, h, k, e0, w)
%timeit Circ_cython_parallel(n, m, h, k, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 8min 4s, sys: 32.5 ms, total: 8min 4s
Wall time: 6.69 s
1 loop, best of 3: 6.57 s per loop
Error: 0.0

Fast Fourier Transform

In [22]:
def Circ_fft(N, e0, w):
    n = np.arange(N*oversampling)[:,np.newaxis]
    m = np.arange(N*oversampling)[np.newaxis,:]
    radius = np.sqrt((n - N/2)** 2 + (m - N/2)** 2) # Compute the distance from center of crystal
    support = np.where(radius>N/2, 0, 1) # Set distances > threshold to 0
    strain = e0 * (1 + np.tanh((radius-N/2)/w))
    return np.fft.fftshift(abs(np.fft.fft2(support*np.exp(-2j*np.pi*(H*strain*(n-N/2) + K*strain*(m-N/2)))))**2)

%time intensity = Circ_fft(N, e0, w)
%timeit Circ_fft(N, e0, w)
print("Error:", abs(intensity-ref_intensity).max())
CPU times: user 47.3 ms, sys: 11.8 ms, total: 59.2 ms
Wall time: 58 ms
10 loops, best of 3: 44.8 ms per loop
Error: 161538.114096