diff --git a/palace/fem/integ/curlcurl.cpp b/palace/fem/integ/curlcurl.cpp index eb9065040..6a2f71c68 100644 --- a/palace/fem/integ/curlcurl.cpp +++ b/palace/fem/integ/curlcurl.cpp @@ -10,9 +10,7 @@ PalacePragmaDiagnosticPush PalacePragmaDiagnosticDisableUnused -#include "fem/qfunctions/hdiv_build_qf.h" #include "fem/qfunctions/hdiv_qf.h" -#include "fem/qfunctions/l2_build_qf.h" #include "fem/qfunctions/l2_qf.h" PalacePragmaDiagnosticPop diff --git a/palace/fem/integ/curlcurlmass.cpp b/palace/fem/integ/curlcurlmass.cpp index 7f2a1d26c..14388c4ef 100644 --- a/palace/fem/integ/curlcurlmass.cpp +++ b/palace/fem/integ/curlcurlmass.cpp @@ -6,7 +6,6 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/hdivmass_build_qf.h" #include "fem/qfunctions/hdivmass_qf.h" namespace palace diff --git a/palace/fem/integ/diffusion.cpp b/palace/fem/integ/diffusion.cpp index eaad929a0..47d864510 100644 --- a/palace/fem/integ/diffusion.cpp +++ b/palace/fem/integ/diffusion.cpp @@ -6,7 +6,6 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/hcurl_build_qf.h" #include "fem/qfunctions/hcurl_qf.h" namespace palace diff --git a/palace/fem/integ/diffusionmass.cpp b/palace/fem/integ/diffusionmass.cpp index 21e8897e7..e44426e15 100644 --- a/palace/fem/integ/diffusionmass.cpp +++ b/palace/fem/integ/diffusionmass.cpp @@ -6,7 +6,6 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/hcurlmass_build_qf.h" #include "fem/qfunctions/hcurlmass_qf.h" namespace palace diff --git a/palace/fem/integ/divdiv.cpp b/palace/fem/integ/divdiv.cpp index c724ce827..7a1d1bdc7 100644 --- a/palace/fem/integ/divdiv.cpp +++ b/palace/fem/integ/divdiv.cpp @@ -6,7 +6,6 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/l2_build_qf.h" #include "fem/qfunctions/l2_qf.h" namespace palace diff --git a/palace/fem/integ/divdivmass.cpp b/palace/fem/integ/divdivmass.cpp index 6e44970ec..aee34f8be 100644 --- a/palace/fem/integ/divdivmass.cpp +++ b/palace/fem/integ/divdivmass.cpp @@ -6,7 +6,6 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/l2mass_build_qf.h" #include "fem/qfunctions/l2mass_qf.h" namespace palace diff --git a/palace/fem/integ/grad.cpp b/palace/fem/integ/grad.cpp index e1a677337..dabc6511a 100644 --- a/palace/fem/integ/grad.cpp +++ b/palace/fem/integ/grad.cpp @@ -6,7 +6,6 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/hcurlh1d_build_qf.h" #include "fem/qfunctions/hcurlh1d_qf.h" namespace palace diff --git a/palace/fem/integ/mass.cpp b/palace/fem/integ/mass.cpp index fa3fa1df5..feb5bc142 100644 --- a/palace/fem/integ/mass.cpp +++ b/palace/fem/integ/mass.cpp @@ -6,7 +6,6 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/h1_build_qf.h" #include "fem/qfunctions/h1_qf.h" namespace palace diff --git a/palace/fem/integ/mixedveccurl.cpp b/palace/fem/integ/mixedveccurl.cpp index 07621b873..81c321aad 100644 --- a/palace/fem/integ/mixedveccurl.cpp +++ b/palace/fem/integ/mixedveccurl.cpp @@ -10,9 +10,7 @@ PalacePragmaDiagnosticPush PalacePragmaDiagnosticDisableUnused -#include "fem/qfunctions/hcurlhdiv_build_qf.h" #include "fem/qfunctions/hcurlhdiv_qf.h" -#include "fem/qfunctions/hdiv_build_qf.h" #include "fem/qfunctions/hdiv_qf.h" PalacePragmaDiagnosticPop diff --git a/palace/fem/integ/mixedvecgrad.cpp b/palace/fem/integ/mixedvecgrad.cpp index 566c04e4d..6d49db937 100644 --- a/palace/fem/integ/mixedvecgrad.cpp +++ b/palace/fem/integ/mixedvecgrad.cpp @@ -6,7 +6,6 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/hcurl_build_qf.h" #include "fem/qfunctions/hcurl_qf.h" namespace palace diff --git a/palace/fem/integ/vecfemass.cpp b/palace/fem/integ/vecfemass.cpp index 6cc819fac..d0c31948b 100644 --- a/palace/fem/integ/vecfemass.cpp +++ b/palace/fem/integ/vecfemass.cpp @@ -6,11 +6,8 @@ #include "fem/libceed/coefficient.hpp" #include "fem/libceed/integrator.hpp" -#include "fem/qfunctions/hcurl_build_qf.h" #include "fem/qfunctions/hcurl_qf.h" -#include "fem/qfunctions/hcurlhdiv_build_qf.h" #include "fem/qfunctions/hcurlhdiv_qf.h" -#include "fem/qfunctions/hdiv_build_qf.h" #include "fem/qfunctions/hdiv_qf.h" namespace palace diff --git a/palace/fem/libceed/coefficient.cpp b/palace/fem/libceed/coefficient.cpp index 9f42499f3..67c5409fb 100644 --- a/palace/fem/libceed/coefficient.cpp +++ b/palace/fem/libceed/coefficient.cpp @@ -7,7 +7,7 @@ #include "fem/libceed/ceed.hpp" #include "models/materialoperator.hpp" -#include "fem/qfunctions/coeff_qf.h" +#include "fem/qfunctions/coeff/coeff_qf.h" namespace palace::ceed { @@ -36,26 +36,15 @@ auto InitDefaultCoefficient(int dim) void MakeDiagonalCoefficient(int dim, CeedIntScalar *mat_coeff, CeedScalar a, CeedInt k) { - switch (dim) + const int coeff_dim = CoeffDim(dim); + for (int i = 0; i < coeff_dim; i++) { - case 1: - mat_coeff[k].second = a; - break; - case 2: - mat_coeff[3 * k + 0].second = a; - mat_coeff[3 * k + 1].second = 0.0; - mat_coeff[3 * k + 2].second = a; - break; - case 3: - mat_coeff[6 * k + 0].second = a; - mat_coeff[6 * k + 1].second = 0.0; - mat_coeff[6 * k + 2].second = 0.0; - mat_coeff[6 * k + 3].second = a; - mat_coeff[6 * k + 4].second = 0.0; - mat_coeff[6 * k + 5].second = a; - break; - default: - MFEM_ABORT("Unsupported dimension for diagonal coefficient!"); + mat_coeff[coeff_dim * k + i].second = 0.0; + } + for (int di = 0; di < dim; ++di) + { + const int idx = (di * dim) - (((di - 1) * di) / 2); + mat_coeff[coeff_dim * k + idx].second = a; } } @@ -97,8 +86,8 @@ PopulateCoefficientContext(int dim, const MaterialPropertyCoefficient *Q, double // Map unassigned attributes to zero material property coefficient (the last material // property is reserved for zero). - std::vector ctx(2 + attr_mat.Size() + - CoeffDim(dim) * (mat_coeff.SizeK() + 1)); + const int coeff_dim = CoeffDim(dim); + std::vector ctx(2 + attr_mat.Size() + coeff_dim * (mat_coeff.SizeK() + 1)); ctx[0].first = attr_mat.Size(); const int zero_mat = mat_coeff.SizeK(); for (int i = 0; i < attr_mat.Size(); i++) @@ -123,16 +112,16 @@ PopulateCoefficientContext(int dim, const MaterialPropertyCoefficient *Q, double { for (int di = dj; di < dim; ++di) { + // Column-major ordering. const int idx = (dj * dim) - (((dj - 1) * dj) / 2) + di - dj; - MatCoeff(ctx.data())[CoeffDim(dim) * k + idx].second = - a * mat_coeff(di, dj, k); // Column-major + MatCoeff(ctx.data())[coeff_dim * k + idx].second = a * mat_coeff(di, dj, k); } } } } - for (int d = 0; d < CoeffDim(dim); d++) + for (int d = 0; d < coeff_dim; d++) { - MatCoeff(ctx.data())[CoeffDim(dim) * zero_mat + d].second = 0.0; + MatCoeff(ctx.data())[coeff_dim * zero_mat + d].second = 0.0; } return ctx; diff --git a/palace/fem/mesh.cpp b/palace/fem/mesh.cpp index 049a29463..aaac29d86 100644 --- a/palace/fem/mesh.cpp +++ b/palace/fem/mesh.cpp @@ -201,8 +201,8 @@ auto AssembleGeometryData(const mfem::GridFunction &mesh_nodes, Ceed ceed, // is the first component of the quadrature data. { CeedScalar *geom_data_array; - PalaceCeedCall( - ceed, CeedVectorGetArrayWrite(data.geom_data, CEED_MEM_HOST, &geom_data_array)); + PalaceCeedCall(ceed, + CeedVectorGetArray(data.geom_data, CEED_MEM_HOST, &geom_data_array)); for (std::size_t k = 0; k < num_elem; k++) { const auto attr = GetCeedAttribute(data.indices[k]); diff --git a/palace/fem/qfunctions/1/h1_1_qf.h b/palace/fem/qfunctions/1/h1_1_qf.h new file mode 100644 index 000000000..48fbaf7fd --- /dev/null +++ b/palace/fem/qfunctions/1/h1_1_qf.h @@ -0,0 +1,24 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_H1_1_QF_H +#define PALACE_LIBCEED_H1_1_QF_H + +#include "../coeff/coeff_1_qf.h" + +CEED_QFUNCTION(f_apply_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + v[i] = coeff * wdetJ[i] * u[i]; + } + return 0; +} + +#endif // PALACE_LIBCEED_H1_1_QF_H diff --git a/palace/fem/qfunctions/1/h1_build_1_qf.h b/palace/fem/qfunctions/1/h1_build_1_qf.h new file mode 100644 index 000000000..acd0f0ed6 --- /dev/null +++ b/palace/fem/qfunctions/1/h1_build_1_qf.h @@ -0,0 +1,24 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_H1_BUILD_1_QF_H +#define PALACE_LIBCEED_H1_BUILD_1_QF_H + +#include "../coeff/coeff_1_qf.h" + +CEED_QFUNCTION(f_build_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + qd[i] = coeff * wdetJ[i]; + } + return 0; +} + +#endif // PALACE_LIBCEED_H1_BUILD_1_QF_H diff --git a/palace/fem/qfunctions/1/l2_1_qf.h b/palace/fem/qfunctions/1/l2_1_qf.h new file mode 100644 index 000000000..f98e0d427 --- /dev/null +++ b/palace/fem/qfunctions/1/l2_1_qf.h @@ -0,0 +1,24 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_1_QF_H +#define PALACE_LIBCEED_L2_1_QF_H + +#include "../coeff/coeff_1_qf.h" + +CEED_QFUNCTION(f_apply_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + v[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * u[i]; + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_1_QF_H diff --git a/palace/fem/qfunctions/1/l2_build_1_qf.h b/palace/fem/qfunctions/1/l2_build_1_qf.h new file mode 100644 index 000000000..2b26ca517 --- /dev/null +++ b/palace/fem/qfunctions/1/l2_build_1_qf.h @@ -0,0 +1,24 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_BUILD_1_QF_H +#define PALACE_LIBCEED_L2_BUILD_1_QF_H + +#include "../coeff/coeff_1_qf.h" + +CEED_QFUNCTION(f_build_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1]; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + qd[i] = coeff * qw[i] * qw[i] / wdetJ[i]; + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_BUILD_1_QF_H diff --git a/palace/fem/qfunctions/2/h1_2_qf.h b/palace/fem/qfunctions/2/h1_2_qf.h new file mode 100644 index 000000000..3e884b379 --- /dev/null +++ b/palace/fem/qfunctions/2/h1_2_qf.h @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_H1_2_QF_H +#define PALACE_LIBCEED_H1_2_QF_H + +#include "../coeff/coeff_2_qf.h" + +CEED_QFUNCTION(f_apply_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1); + v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[2] * u1); + } + return 0; +} + +#endif // PALACE_LIBCEED_H1_2_QF_H diff --git a/palace/fem/qfunctions/2/h1_build_2_qf.h b/palace/fem/qfunctions/2/h1_build_2_qf.h new file mode 100644 index 000000000..a4bb96c28 --- /dev/null +++ b/palace/fem/qfunctions/2/h1_build_2_qf.h @@ -0,0 +1,27 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_H1_BUILD_2_QF_H +#define PALACE_LIBCEED_H1_BUILD_2_QF_H + +#include "../coeff/coeff_2_qf.h" + +CEED_QFUNCTION(f_build_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + + qd[i + Q * 0] = wdetJ[i] * coeff[0]; + qd[i + Q * 1] = wdetJ[i] * coeff[1]; + qd[i + Q * 2] = wdetJ[i] * coeff[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_H1_BUILD_2_QF_H diff --git a/palace/fem/qfunctions/2/l2_2_qf.h b/palace/fem/qfunctions/2/l2_2_qf.h new file mode 100644 index 000000000..5057d47ba --- /dev/null +++ b/palace/fem/qfunctions/2/l2_2_qf.h @@ -0,0 +1,29 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_2_QF_H +#define PALACE_LIBCEED_L2_2_QF_H + +#include "../coeff/coeff_2_qf.h" + +CEED_QFUNCTION(f_apply_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + const CeedScalar w = qw[i] * qw[i] / wdetJ[i]; + + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1); + v[i + Q * 1] = w * (coeff[1] * u0 + coeff[2] * u1); + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_2_QF_H diff --git a/palace/fem/qfunctions/2/l2_build_2_qf.h b/palace/fem/qfunctions/2/l2_build_2_qf.h new file mode 100644 index 000000000..f8b7d6411 --- /dev/null +++ b/palace/fem/qfunctions/2/l2_build_2_qf.h @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_BUILD_2_QF_H +#define PALACE_LIBCEED_L2_BUILD_2_QF_H + +#include "../coeff/coeff_2_qf.h" + +CEED_QFUNCTION(f_build_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1]; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + const CeedScalar w = qw[i] * qw[i] / wdetJ[i]; + + qd[i + Q * 0] = w * coeff[0]; + qd[i + Q * 1] = w * coeff[1]; + qd[i + Q * 2] = w * coeff[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_BUILD_2_QF_H diff --git a/palace/fem/qfunctions/21/geom_21_qf.h b/palace/fem/qfunctions/21/geom_21_qf.h new file mode 100644 index 000000000..a6e9c1f1e --- /dev/null +++ b/palace/fem/qfunctions/21/geom_21_qf.h @@ -0,0 +1,29 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_GEOM_21_QF_H +#define PALACE_LIBCEED_GEOM_21_QF_H + +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_build_geom_factor_21)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *qw = in[0], *J = in[1]; + CeedScalar *attr = out[0], *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar J_loc[2], adjJt_loc[2]; + MatUnpack21(J + i, Q, J_loc); + const CeedScalar detJ = AdjJt21(J_loc, adjJt_loc); + + attr[i] = 0; + wdetJ[i] = qw[i] * detJ; + adjJt[i + Q * 0] = adjJt_loc[0] / detJ; + adjJt[i + Q * 1] = adjJt_loc[1] / detJ; + } + return 0; +} + +#endif // PALACE_LIBCEED_GEOM_21_QF_H diff --git a/palace/fem/qfunctions/21/hcurl_21_qf.h b/palace/fem/qfunctions/21/hcurl_21_qf.h new file mode 100644 index 000000000..06add1a24 --- /dev/null +++ b/palace/fem/qfunctions/21/hcurl_21_qf.h @@ -0,0 +1,29 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_21_QF_H +#define PALACE_LIBCEED_HCURL_21_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_apply_hcurl_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[1] = {u[i + Q * 0]}; + CeedScalar coeff[3], adjJt_loc[2], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + MultAtBCx21(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_21_QF_H diff --git a/palace/fem/qfunctions/21/hcurl_build_21_qf.h b/palace/fem/qfunctions/21/hcurl_build_21_qf.h new file mode 100644 index 000000000..53c8130b5 --- /dev/null +++ b/palace/fem/qfunctions/21/hcurl_build_21_qf.h @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_BUILD_21_QF_H +#define PALACE_LIBCEED_HCURL_BUILD_21_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_build_hcurl_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[2], qd_loc[1]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + MultAtBA21(adjJt_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_BUILD_21_QF_H diff --git a/palace/fem/qfunctions/21/hcurlh1d_21_qf.h b/palace/fem/qfunctions/21/hcurlh1d_21_qf.h new file mode 100644 index 000000000..d64d7d3b5 --- /dev/null +++ b/palace/fem/qfunctions/21/hcurlh1d_21_qf.h @@ -0,0 +1,29 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_H1D_21_QF_H +#define PALACE_LIBCEED_HCURL_H1D_21_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_apply_hcurlh1d_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[1] = {u[i + Q * 0]}; + CeedScalar coeff[3], adjJt_loc[2], v_loc[1]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + MultBAx21(adjJt_loc, coeff, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_H1D_21_QF_H diff --git a/palace/fem/qfunctions/21/hcurlh1d_build_21_qf.h b/palace/fem/qfunctions/21/hcurlh1d_build_21_qf.h new file mode 100644 index 000000000..5a3270f75 --- /dev/null +++ b/palace/fem/qfunctions/21/hcurlh1d_build_21_qf.h @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_H1D_BUILD_21_QF_H +#define PALACE_LIBCEED_HCURL_H1D_BUILD_21_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_build_hcurlh1d_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[2], qd_loc[1]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + MultBA21(adjJt_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_H1D_BUILD_21_QF_H diff --git a/palace/fem/qfunctions/21/hcurlhdiv_21_qf.h b/palace/fem/qfunctions/21/hcurlhdiv_21_qf.h new file mode 100644 index 000000000..81cb25e7c --- /dev/null +++ b/palace/fem/qfunctions/21/hcurlhdiv_21_qf.h @@ -0,0 +1,50 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_HDIV_21_QF_H +#define PALACE_LIBCEED_HCURL_HDIV_21_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_apply_hcurlhdiv_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[1] = {u[i + Q * 0]}; + CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + AdjJt21(adjJt_loc, J_loc); + MultAtBCx21(J_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + } + return 0; +} + +CEED_QFUNCTION(f_apply_hdivhcurl_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[1] = {u[i + Q * 0]}; + CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + AdjJt21(adjJt_loc, J_loc); + MultAtBCx21(adjJt_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_HDIV_21_QF_H diff --git a/palace/fem/qfunctions/21/hcurlhdiv_build_21_qf.h b/palace/fem/qfunctions/21/hcurlhdiv_build_21_qf.h new file mode 100644 index 000000000..1a6cef03b --- /dev/null +++ b/palace/fem/qfunctions/21/hcurlhdiv_build_21_qf.h @@ -0,0 +1,48 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_HDIV_BUILD_21_QF_H +#define PALACE_LIBCEED_HCURL_HDIV_BUILD_21_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_build_hcurlhdiv_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + AdjJt21(adjJt_loc, J_loc); + MultAtBC21(J_loc, coeff, adjJt_loc, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + } + return 0; +} + +CEED_QFUNCTION(f_build_hdivhcurl_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + AdjJt21(adjJt_loc, J_loc); + MultAtBC21(adjJt_loc, coeff, J_loc, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_HDIV_BUILD_21_QF_H diff --git a/palace/fem/qfunctions/21/hcurlmass_21_qf.h b/palace/fem/qfunctions/21/hcurlmass_21_qf.h new file mode 100644 index 000000000..c730c78a0 --- /dev/null +++ b/palace/fem/qfunctions/21/hcurlmass_21_qf.h @@ -0,0 +1,38 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_MASS_21_QF_H +#define PALACE_LIBCEED_HCURL_MASS_21_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_apply_hcurlmass_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], + *gradu = in[2]; + CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + v[i] = coeff * wdetJ[i] * u[i]; + } + { + const CeedScalar u_loc[1] = {gradu[i + Q * 0]}; + CeedScalar coeff[3], adjJt_loc[2], v_loc[2]; + CoeffUnpack2(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + MultAtBCx21(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + gradv[i + Q * 0] = wdetJ[i] * v_loc[0]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_MASS_21_QF_H diff --git a/palace/fem/qfunctions/21/hcurlmass_build_21_qf.h b/palace/fem/qfunctions/21/hcurlmass_build_21_qf.h new file mode 100644 index 000000000..8d3eeac4a --- /dev/null +++ b/palace/fem/qfunctions/21/hcurlmass_build_21_qf.h @@ -0,0 +1,36 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_MASS_BUILD_21_QF_H +#define PALACE_LIBCEED_HCURL_MASS_BUILD_21_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_build_hcurlmass_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + qd1[i + Q * 0] = coeff * wdetJ[i]; + } + { + CeedScalar coeff[3], adjJt_loc[2], qd_loc[1]; + CoeffUnpack2(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + MultAtBA21(adjJt_loc, coeff, qd_loc); + + qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_MASS_BUILD_21_QF_H diff --git a/palace/fem/qfunctions/21/hdiv_21_qf.h b/palace/fem/qfunctions/21/hdiv_21_qf.h new file mode 100644 index 000000000..ed7e0d28e --- /dev/null +++ b/palace/fem/qfunctions/21/hdiv_21_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_21_QF_H +#define PALACE_LIBCEED_HDIV_21_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_apply_hdiv_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[1] = {u[i + Q * 0]}; + CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + AdjJt21(adjJt_loc, J_loc); + MultAtBCx21(J_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_21_QF_H diff --git a/palace/fem/qfunctions/21/hdiv_build_21_qf.h b/palace/fem/qfunctions/21/hdiv_build_21_qf.h new file mode 100644 index 000000000..a09f9e513 --- /dev/null +++ b/palace/fem/qfunctions/21/hdiv_build_21_qf.h @@ -0,0 +1,29 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_BUILD_21_QF_H +#define PALACE_LIBCEED_HDIV_BUILD_21_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_build_hdiv_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + AdjJt21(adjJt_loc, J_loc); + MultAtBA21(J_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_BUILD_21_QF_H diff --git a/palace/fem/qfunctions/21/l2mass_21_qf.h b/palace/fem/qfunctions/21/l2mass_21_qf.h new file mode 100644 index 000000000..0760f15c0 --- /dev/null +++ b/palace/fem/qfunctions/21/l2mass_21_qf.h @@ -0,0 +1,40 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_MASS_21_QF_H +#define PALACE_LIBCEED_L2_MASS_21_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_apply_l2mass_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], + *u = in[2], *divu = in[3]; + CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar u_loc[1] = {u[i + Q * 0]}; + CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + AdjJt21(adjJt_loc, J_loc); + MultAtBCx21(J_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_MASS_21_QF_H diff --git a/palace/fem/qfunctions/21/l2mass_build_21_qf.h b/palace/fem/qfunctions/21/l2mass_build_21_qf.h new file mode 100644 index 000000000..6a8bd6320 --- /dev/null +++ b/palace/fem/qfunctions/21/l2mass_build_21_qf.h @@ -0,0 +1,38 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_MASS_BUILD_21_QF_H +#define PALACE_LIBCEED_L2_MASS_BUILD_21_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_21_qf.h" + +CEED_QFUNCTION(f_build_l2mass_21)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; + CeedScalar *qd1 = out[0], *qd2 = out[0] + Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack21(adjJt + i, Q, adjJt_loc); + AdjJt21(adjJt_loc, J_loc); + MultAtBA21(J_loc, coeff, qd_loc); + + qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_MASS_BUILD_21_QF_H diff --git a/palace/fem/qfunctions/21/utils_21_qf.h b/palace/fem/qfunctions/21/utils_21_qf.h new file mode 100644 index 000000000..cbdde5c17 --- /dev/null +++ b/palace/fem/qfunctions/21/utils_21_qf.h @@ -0,0 +1,102 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_UTILS_21_QF_H +#define PALACE_LIBCEED_UTILS_21_QF_H + +#include + +CEED_QFUNCTION_HELPER CeedScalar DetJ21(const CeedScalar J[2]) +{ + // J: 0 + // 1 + return sqrt(J[0] * J[0] + J[1] * J[1]); +} + +template +CEED_QFUNCTION_HELPER CeedScalar AdjJt21(const CeedScalar J[2], CeedScalar adjJt[2]) +{ + // Compute adj(J)^T / det(J) and store the result. + // J: 0 adj(J): 1/sqrt(J^T J) J^T + // 1 + const CeedScalar d = sqrt(J[0] * J[0] + J[1] * J[1]); + adjJt[0] = J[0] / d; + adjJt[1] = J[1] / d; + return ComputeDet ? d : 0.0; +} + +CEED_QFUNCTION_HELPER void MatUnpack21(const CeedScalar *A, const CeedInt A_stride, + CeedScalar A_loc[2]) +{ + A_loc[0] = A[A_stride * 0]; + A_loc[1] = A[A_stride * 1]; +} + +CEED_QFUNCTION_HELPER void MultAtBCx21(const CeedScalar A[2], const CeedScalar B[3], + const CeedScalar C[2], const CeedScalar x[1], + CeedScalar y[2]) +{ + // A: 0 B: 0 1 C: 0 + // 1 1 2 1 + CeedScalar z[2]; + + y[0] = C[0] * x[0]; + y[1] = C[1] * x[0]; + + z[0] = B[0] * y[0] + B[1] * y[1]; + z[1] = B[1] * y[0] + B[2] * y[1]; + + y[0] = A[0] * z[0] + A[1] * z[1]; + y[1] = 0.0; +} + +CEED_QFUNCTION_HELPER void MultBAx21(const CeedScalar A[2], const CeedScalar B[3], + const CeedScalar x[1], CeedScalar y[1]) +{ + // A: 0 B: 0 1 + // 1 1 2 + CeedScalar z[2]; + + z[0] = A[0] * x[0]; + z[1] = A[1] * x[0]; + + y[0] = B[0] * z[0] + B[1] * z[1]; + y[1] = B[1] * z[0] + B[2] * z[1]; +} + +CEED_QFUNCTION_HELPER void MultAtBA21(const CeedScalar A[2], const CeedScalar B[3], + CeedScalar C[1]) +{ + // A: 0 B: 0 1 C: 0 + // 1 1 2 + + // First compute entries of R = B A. + const CeedScalar R11 = B[0] * A[0] + B[1] * A[1]; + const CeedScalar R21 = B[1] * A[0] + B[2] * A[1]; + + C[0] = A[0] * R11 + A[1] * R21; +} + +CEED_QFUNCTION_HELPER void MultAtBC21(const CeedScalar A[2], const CeedScalar B[3], + const CeedScalar C[2], CeedScalar D[1]) +{ + // A, C: 0 B: 0 1 D: 0 + // 1 1 2 + + // First compute entries of R = B C. + const CeedScalar R11 = B[0] * C[0] + B[1] * C[1]; + const CeedScalar R21 = B[1] * C[0] + B[2] * C[1]; + + D[0] = A[0] * R11 + A[1] * R21; +} + +CEED_QFUNCTION_HELPER void MultBA21(const CeedScalar A[2], const CeedScalar B[3], + CeedScalar C[2]) +{ + // A: 0 B: 0 1 C: 0 + // 1 1 2 1 + C[0] = B[0] * A[0] + B[1] * A[1]; + C[1] = B[1] * A[0] + B[2] * A[1]; +} + +#endif // PALACE_LIBCEED_UTILS_21_QF_H diff --git a/palace/fem/qfunctions/22/geom_22_qf.h b/palace/fem/qfunctions/22/geom_22_qf.h new file mode 100644 index 000000000..55cb36901 --- /dev/null +++ b/palace/fem/qfunctions/22/geom_22_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_GEOM_22_QF_H +#define PALACE_LIBCEED_GEOM_22_QF_H + +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_build_geom_factor_22)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *qw = in[0], *J = in[1]; + CeedScalar *attr = out[0], *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar J_loc[4], adjJt_loc[4]; + MatUnpack22(J + i, Q, J_loc); + const CeedScalar detJ = AdjJt22(J_loc, adjJt_loc); + + attr[i] = 0; + wdetJ[i] = qw[i] * detJ; + adjJt[i + Q * 0] = adjJt_loc[0] / detJ; + adjJt[i + Q * 1] = adjJt_loc[1] / detJ; + adjJt[i + Q * 2] = adjJt_loc[2] / detJ; + adjJt[i + Q * 3] = adjJt_loc[3] / detJ; + } + return 0; +} + +#endif // PALACE_LIBCEED_GEOM_22_QF_H diff --git a/palace/fem/qfunctions/22/hcurl_22_qf.h b/palace/fem/qfunctions/22/hcurl_22_qf.h new file mode 100644 index 000000000..50e35e801 --- /dev/null +++ b/palace/fem/qfunctions/22/hcurl_22_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_22_QF_H +#define PALACE_LIBCEED_HCURL_22_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_apply_hcurl_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[3], adjJt_loc[4], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_22_QF_H diff --git a/palace/fem/qfunctions/22/hcurl_build_22_qf.h b/palace/fem/qfunctions/22/hcurl_build_22_qf.h new file mode 100644 index 000000000..8fdd180ea --- /dev/null +++ b/palace/fem/qfunctions/22/hcurl_build_22_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_BUILD_22_QF_H +#define PALACE_LIBCEED_HCURL_BUILD_22_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_build_hcurl_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[4], qd_loc[3]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + MultAtBA22(adjJt_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_BUILD_22_QF_H diff --git a/palace/fem/qfunctions/22/hcurlh1d_22_qf.h b/palace/fem/qfunctions/22/hcurlh1d_22_qf.h new file mode 100644 index 000000000..a0c506cf0 --- /dev/null +++ b/palace/fem/qfunctions/22/hcurlh1d_22_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_H1D_22_QF_H +#define PALACE_LIBCEED_HCURL_H1D_22_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_apply_hcurlh1d_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[3], adjJt_loc[4], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + MultBAx22(adjJt_loc, coeff, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_H1D_22_QF_H diff --git a/palace/fem/qfunctions/22/hcurlh1d_build_22_qf.h b/palace/fem/qfunctions/22/hcurlh1d_build_22_qf.h new file mode 100644 index 000000000..5652b05a2 --- /dev/null +++ b/palace/fem/qfunctions/22/hcurlh1d_build_22_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_H1D_BUILD_22_QF_H +#define PALACE_LIBCEED_HCURL_H1D_BUILD_22_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_build_hcurlh1d_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[4], qd_loc[4]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + MultBA22(adjJt_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_H1D_BUILD_22_QF_H diff --git a/palace/fem/qfunctions/22/hcurlhdiv_22_qf.h b/palace/fem/qfunctions/22/hcurlhdiv_22_qf.h new file mode 100644 index 000000000..265711b95 --- /dev/null +++ b/palace/fem/qfunctions/22/hcurlhdiv_22_qf.h @@ -0,0 +1,52 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_HDIV_22_QF_H +#define PALACE_LIBCEED_HCURL_HDIV_22_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_apply_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + AdjJt22(adjJt_loc, J_loc); + MultAtBCx22(J_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +CEED_QFUNCTION(f_apply_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + AdjJt22(adjJt_loc, J_loc); + MultAtBCx22(adjJt_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_HDIV_22_QF_H diff --git a/palace/fem/qfunctions/22/hcurlhdiv_build_22_qf.h b/palace/fem/qfunctions/22/hcurlhdiv_build_22_qf.h new file mode 100644 index 000000000..8ceec8cc1 --- /dev/null +++ b/palace/fem/qfunctions/22/hcurlhdiv_build_22_qf.h @@ -0,0 +1,54 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_HDIV_BUILD_22_QF_H +#define PALACE_LIBCEED_HCURL_HDIV_BUILD_22_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_build_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[4]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + AdjJt22(adjJt_loc, J_loc); + MultAtBC22(J_loc, coeff, adjJt_loc, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + } + return 0; +} + +CEED_QFUNCTION(f_build_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[4]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + AdjJt22(adjJt_loc, J_loc); + MultAtBC22(adjJt_loc, coeff, J_loc, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_HDIV_BUILD_22_QF_H diff --git a/palace/fem/qfunctions/22/hcurlmass_22_qf.h b/palace/fem/qfunctions/22/hcurlmass_22_qf.h new file mode 100644 index 000000000..20994d744 --- /dev/null +++ b/palace/fem/qfunctions/22/hcurlmass_22_qf.h @@ -0,0 +1,39 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_MASS_22_QF_H +#define PALACE_LIBCEED_HCURL_MASS_22_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_apply_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], + *gradu = in[2]; + CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + v[i] = coeff * wdetJ[i] * u[i]; + } + { + const CeedScalar u_loc[2] = {gradu[i + Q * 0], gradu[i + Q * 1]}; + CeedScalar coeff[3], adjJt_loc[4], v_loc[2]; + CoeffUnpack2(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + gradv[i + Q * 0] = wdetJ[i] * v_loc[0]; + gradv[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_MASS_22_QF_H diff --git a/palace/fem/qfunctions/22/hcurlmass_build_22_qf.h b/palace/fem/qfunctions/22/hcurlmass_build_22_qf.h new file mode 100644 index 000000000..0a4023abb --- /dev/null +++ b/palace/fem/qfunctions/22/hcurlmass_build_22_qf.h @@ -0,0 +1,38 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_MASS_BUILD_22_QF_H +#define PALACE_LIBCEED_HCURL_MASS_BUILD_22_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_build_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + qd1[i + Q * 0] = coeff * wdetJ[i]; + } + { + CeedScalar coeff[3], adjJt_loc[4], qd_loc[3]; + CoeffUnpack2(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + MultAtBA22(adjJt_loc, coeff, qd_loc); + + qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd2[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd2[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_MASS_BUILD_22_QF_H diff --git a/palace/fem/qfunctions/22/hdiv_22_qf.h b/palace/fem/qfunctions/22/hdiv_22_qf.h new file mode 100644 index 000000000..52f557605 --- /dev/null +++ b/palace/fem/qfunctions/22/hdiv_22_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_22_QF_H +#define PALACE_LIBCEED_HDIV_22_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_apply_hdiv_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + AdjJt22(adjJt_loc, J_loc); + MultAtBCx22(J_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_22_QF_H diff --git a/palace/fem/qfunctions/22/hdiv_build_22_qf.h b/palace/fem/qfunctions/22/hdiv_build_22_qf.h new file mode 100644 index 000000000..6f63cf64a --- /dev/null +++ b/palace/fem/qfunctions/22/hdiv_build_22_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_BUILD_22_QF_H +#define PALACE_LIBCEED_HDIV_BUILD_22_QF_H + +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_build_hdiv_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[3]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + AdjJt22(adjJt_loc, J_loc); + MultAtBA22(J_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_BUILD_22_QF_H diff --git a/palace/fem/qfunctions/22/hdivmass_22_qf.h b/palace/fem/qfunctions/22/hdivmass_22_qf.h new file mode 100644 index 000000000..78163cff3 --- /dev/null +++ b/palace/fem/qfunctions/22/hdivmass_22_qf.h @@ -0,0 +1,40 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_MASS_22_QF_H +#define PALACE_LIBCEED_HDIV_MASS_22_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_apply_hdivmass_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], + *u = in[2], *curlu = in[3]; + CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[3], adjJt_loc[4], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + curlv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * curlu[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_MASS_22_QF_H diff --git a/palace/fem/qfunctions/22/hdivmass_build_22_qf.h b/palace/fem/qfunctions/22/hdivmass_build_22_qf.h new file mode 100644 index 000000000..da3e06935 --- /dev/null +++ b/palace/fem/qfunctions/22/hdivmass_build_22_qf.h @@ -0,0 +1,39 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_MASS_BUILD_22_QF_H +#define PALACE_LIBCEED_HDIV_MASS_BUILD_22_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_build_hdivmass_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; + CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + CeedScalar coeff[3], adjJt_loc[4], qd_loc[3]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + MultAtBA22(adjJt_loc, coeff, qd_loc); + + qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_MASS_BUILD_22_QF_H diff --git a/palace/fem/qfunctions/22/l2mass_22_qf.h b/palace/fem/qfunctions/22/l2mass_22_qf.h new file mode 100644 index 000000000..0933a29f8 --- /dev/null +++ b/palace/fem/qfunctions/22/l2mass_22_qf.h @@ -0,0 +1,41 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_MASS_22_QF_H +#define PALACE_LIBCEED_L2_MASS_22_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_apply_l2mass_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], + *u = in[2], *divu = in[3]; + CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + AdjJt22(adjJt_loc, J_loc); + MultAtBCx22(J_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_MASS_22_QF_H diff --git a/palace/fem/qfunctions/22/l2mass_build_22_qf.h b/palace/fem/qfunctions/22/l2mass_build_22_qf.h new file mode 100644 index 000000000..648a21082 --- /dev/null +++ b/palace/fem/qfunctions/22/l2mass_build_22_qf.h @@ -0,0 +1,40 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_MASS_BUILD_22_QF_H +#define PALACE_LIBCEED_L2_MASS_BUILD_22_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_2_qf.h" +#include "utils_22_qf.h" + +CEED_QFUNCTION(f_build_l2mass_22)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; + CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[3]; + CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack22(adjJt + i, Q, adjJt_loc); + AdjJt22(adjJt_loc, J_loc); + MultAtBA22(J_loc, coeff, qd_loc); + + qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<2>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_MASS_BUILD_22_QF_H diff --git a/palace/fem/qfunctions/22/utils_22_qf.h b/palace/fem/qfunctions/22/utils_22_qf.h new file mode 100644 index 000000000..79dc1f1b6 --- /dev/null +++ b/palace/fem/qfunctions/22/utils_22_qf.h @@ -0,0 +1,116 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_UTILS_22_QF_H +#define PALACE_LIBCEED_UTILS_22_QF_H + +#include + +CEED_QFUNCTION_HELPER CeedScalar DetJ22(const CeedScalar J[4]) +{ + // J: 0 2 + // 1 3 + return J[0] * J[3] - J[1] * J[2]; +} + +template +CEED_QFUNCTION_HELPER CeedScalar AdjJt22(const CeedScalar J[4], CeedScalar adjJt[4]) +{ + // Compute adj(J)^T / det(J) and store the result. + // J: 0 2 adj(J): J22 -J12 + // 1 3 -J21 J11 + adjJt[0] = J[3]; + adjJt[1] = -J[2]; + adjJt[2] = -J[1]; + adjJt[3] = J[0]; + return ComputeDet ? (J[0] * J[3] - J[1] * J[2]) : 0.0; +} + +CEED_QFUNCTION_HELPER void MatUnpack22(const CeedScalar *A, const CeedInt A_stride, + CeedScalar A_loc[4]) +{ + A_loc[0] = A[A_stride * 0]; + A_loc[1] = A[A_stride * 1]; + A_loc[2] = A[A_stride * 2]; + A_loc[3] = A[A_stride * 3]; +} + +CEED_QFUNCTION_HELPER void MultAtBCx22(const CeedScalar A[4], const CeedScalar B[3], + const CeedScalar C[4], const CeedScalar x[2], + CeedScalar y[2]) +{ + // A: 0 2 B: 0 1 C: 0 2 + // 1 3 1 2 1 3 + CeedScalar z[2]; + + y[0] = C[0] * x[0] + C[2] * x[1]; + y[1] = C[1] * x[0] + C[3] * x[1]; + + z[0] = B[0] * y[0] + B[1] * y[1]; + z[1] = B[1] * y[0] + B[2] * y[1]; + + y[0] = A[0] * z[0] + A[1] * z[1]; + y[1] = A[2] * z[0] + A[3] * z[1]; +} + +CEED_QFUNCTION_HELPER void MultBAx22(const CeedScalar A[4], const CeedScalar B[3], + const CeedScalar x[2], CeedScalar y[2]) +{ + // A: 0 2 B: 0 1 + // 1 3 1 2 + CeedScalar z[2]; + + z[0] = A[0] * x[0] + A[2] * x[1]; + z[1] = A[1] * x[0] + A[3] * x[1]; + + y[0] = B[0] * z[0] + B[1] * z[1]; + y[1] = B[1] * z[0] + B[2] * z[1]; +} + +CEED_QFUNCTION_HELPER void MultAtBA22(const CeedScalar A[4], const CeedScalar B[3], + CeedScalar C[3]) +{ + // A: 0 2 B: 0 1 C: 0 1 + // 1 3 1 2 1 2 + + // First compute entries of R = B A. + const CeedScalar R11 = B[0] * A[0] + B[1] * A[1]; + const CeedScalar R21 = B[1] * A[0] + B[2] * A[1]; + const CeedScalar R12 = B[0] * A[2] + B[1] * A[3]; + const CeedScalar R22 = B[1] * A[2] + B[2] * A[3]; + + C[0] = A[0] * R11 + A[1] * R21; + C[1] = A[0] * R12 + A[1] * R22; + C[2] = A[2] * R12 + A[3] * R22; +} + +CEED_QFUNCTION_HELPER void MultAtBC22(const CeedScalar A[4], const CeedScalar B[3], + const CeedScalar C[4], CeedScalar D[4]) +{ + // A, C: 0 2 B: 0 1 D: 0 2 + // 1 3 1 2 1 3 + + // First compute entries of R = B C. + const CeedScalar R11 = B[0] * C[0] + B[1] * C[1]; + const CeedScalar R21 = B[1] * C[0] + B[2] * C[1]; + const CeedScalar R12 = B[0] * C[2] + B[1] * C[3]; + const CeedScalar R22 = B[1] * C[2] + B[2] * C[3]; + + D[0] = A[0] * R11 + A[1] * R21; + D[1] = A[2] * R11 + A[3] * R21; + D[2] = A[0] * R12 + A[1] * R22; + D[3] = A[2] * R12 + A[3] * R22; +} + +CEED_QFUNCTION_HELPER void MultBA22(const CeedScalar A[4], const CeedScalar B[3], + CeedScalar C[4]) +{ + // A: 0 2 B: 0 1 C: 0 2 + // 1 3 1 2 1 3 + C[0] = B[0] * A[0] + B[1] * A[1]; + C[1] = B[1] * A[0] + B[2] * A[1]; + C[2] = B[0] * A[2] + B[1] * A[3]; + C[3] = B[1] * A[2] + B[2] * A[3]; +} + +#endif // PALACE_LIBCEED_UTILS_22_QF_H diff --git a/palace/fem/qfunctions/3/h1_3_qf.h b/palace/fem/qfunctions/3/h1_3_qf.h new file mode 100644 index 000000000..2f5cf32f8 --- /dev/null +++ b/palace/fem/qfunctions/3/h1_3_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_H1_3_QF_H +#define PALACE_LIBCEED_H1_3_QF_H + +#include "../coeff/coeff_3_qf.h" + +CEED_QFUNCTION(f_apply_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + const CeedScalar u2 = u[i + Q * 2]; + v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1 + coeff[2] * u2); + v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[3] * u1 + coeff[4] * u2); + v[i + Q * 2] = wdetJ[i] * (coeff[2] * u0 + coeff[4] * u1 + coeff[5] * u2); + } + return 0; +} + +#endif // PALACE_LIBCEED_H1_3_QF_H diff --git a/palace/fem/qfunctions/3/h1_build_3_qf.h b/palace/fem/qfunctions/3/h1_build_3_qf.h new file mode 100644 index 000000000..534fae03a --- /dev/null +++ b/palace/fem/qfunctions/3/h1_build_3_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_H1_BUILD_3_QF_H +#define PALACE_LIBCEED_H1_BUILD_3_QF_H + +#include "../coeff/coeff_3_qf.h" + +CEED_QFUNCTION(f_build_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + + qd[i + Q * 0] = wdetJ[i] * coeff[0]; + qd[i + Q * 1] = wdetJ[i] * coeff[1]; + qd[i + Q * 2] = wdetJ[i] * coeff[2]; + qd[i + Q * 3] = wdetJ[i] * coeff[3]; + qd[i + Q * 4] = wdetJ[i] * coeff[4]; + qd[i + Q * 5] = wdetJ[i] * coeff[5]; + } + return 0; +} + +#endif // PALACE_LIBCEED_H1_BUILD_3_QF_H diff --git a/palace/fem/qfunctions/3/l2_3_qf.h b/palace/fem/qfunctions/3/l2_3_qf.h new file mode 100644 index 000000000..51c2bb5b4 --- /dev/null +++ b/palace/fem/qfunctions/3/l2_3_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_3_QF_H +#define PALACE_LIBCEED_L2_3_QF_H + +#include "../coeff/coeff_3_qf.h" + +CEED_QFUNCTION(f_apply_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + const CeedScalar w = qw[i] * qw[i] / wdetJ[i]; + + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + const CeedScalar u2 = u[i + Q * 2]; + v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1 + coeff[2] * u2); + v[i + Q * 1] = w * (coeff[1] * u0 + coeff[3] * u1 + coeff[4] * u2); + v[i + Q * 2] = w * (coeff[2] * u0 + coeff[4] * u1 + coeff[5] * u2); + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_3_QF_H diff --git a/palace/fem/qfunctions/3/l2_build_3_qf.h b/palace/fem/qfunctions/3/l2_build_3_qf.h new file mode 100644 index 000000000..78e6c71e0 --- /dev/null +++ b/palace/fem/qfunctions/3/l2_build_3_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_BUILD_3_QF_H +#define PALACE_LIBCEED_L2_BUILD_3_QF_H + +#include "../coeff/coeff_3_qf.h" + +CEED_QFUNCTION(f_build_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1]; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + const CeedScalar w = qw[i] * qw[i] / wdetJ[i]; + + qd[i + Q * 0] = w * coeff[0]; + qd[i + Q * 1] = w * coeff[1]; + qd[i + Q * 2] = w * coeff[2]; + qd[i + Q * 3] = w * coeff[3]; + qd[i + Q * 4] = w * coeff[4]; + qd[i + Q * 5] = w * coeff[5]; + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_BUILD_3_QF_H diff --git a/palace/fem/qfunctions/32/geom_32_qf.h b/palace/fem/qfunctions/32/geom_32_qf.h new file mode 100644 index 000000000..cf7db5ee0 --- /dev/null +++ b/palace/fem/qfunctions/32/geom_32_qf.h @@ -0,0 +1,33 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_GEOM_32_QF_H +#define PALACE_LIBCEED_GEOM_32_QF_H + +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_build_geom_factor_32)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *qw = in[0], *J = in[1]; + CeedScalar *attr = out[0], *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar J_loc[6], adjJt_loc[6]; + MatUnpack32(J + i, Q, J_loc); + const CeedScalar detJ = AdjJt32(J_loc, adjJt_loc); + + attr[i] = 0; + wdetJ[i] = qw[i] * detJ; + adjJt[i + Q * 0] = adjJt_loc[0] / detJ; + adjJt[i + Q * 1] = adjJt_loc[1] / detJ; + adjJt[i + Q * 2] = adjJt_loc[2] / detJ; + adjJt[i + Q * 3] = adjJt_loc[3] / detJ; + adjJt[i + Q * 4] = adjJt_loc[4] / detJ; + adjJt[i + Q * 5] = adjJt_loc[5] / detJ; + } + return 0; +} + +#endif // PALACE_LIBCEED_GEOM_32_QF_H diff --git a/palace/fem/qfunctions/32/hcurl_32_qf.h b/palace/fem/qfunctions/32/hcurl_32_qf.h new file mode 100644 index 000000000..bc098c7c5 --- /dev/null +++ b/palace/fem/qfunctions/32/hcurl_32_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_32_QF_H +#define PALACE_LIBCEED_HCURL_32_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_apply_hcurl_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[6], adjJt_loc[6], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_32_QF_H diff --git a/palace/fem/qfunctions/32/hcurl_build_32_qf.h b/palace/fem/qfunctions/32/hcurl_build_32_qf.h new file mode 100644 index 000000000..eadd4bb4c --- /dev/null +++ b/palace/fem/qfunctions/32/hcurl_build_32_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_BUILD_32_QF_H +#define PALACE_LIBCEED_HCURL_BUILD_32_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_build_hcurl_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[6], qd_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + MultAtBA32(adjJt_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_BUILD_32_QF_H diff --git a/palace/fem/qfunctions/32/hcurlh1d_32_qf.h b/palace/fem/qfunctions/32/hcurlh1d_32_qf.h new file mode 100644 index 000000000..0aaeea4cb --- /dev/null +++ b/palace/fem/qfunctions/32/hcurlh1d_32_qf.h @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_H1D_32_QF_H +#define PALACE_LIBCEED_HCURL_H1D_32_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_apply_hcurlh1d_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[6], adjJt_loc[6], v_loc[2]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + MultBAx32(adjJt_loc, coeff, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_H1D_32_QF_H diff --git a/palace/fem/qfunctions/32/hcurlh1d_build_32_qf.h b/palace/fem/qfunctions/32/hcurlh1d_build_32_qf.h new file mode 100644 index 000000000..2f8feecd3 --- /dev/null +++ b/palace/fem/qfunctions/32/hcurlh1d_build_32_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_H1D_BUILD_32_QF_H +#define PALACE_LIBCEED_HCURL_H1D_BUILD_32_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_build_hcurlh1d_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[6], qd_loc[4]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + MultBA32(adjJt_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_H1D_BUILD_32_QF_H diff --git a/palace/fem/qfunctions/32/hcurlhdiv_32_qf.h b/palace/fem/qfunctions/32/hcurlhdiv_32_qf.h new file mode 100644 index 000000000..3492454fe --- /dev/null +++ b/palace/fem/qfunctions/32/hcurlhdiv_32_qf.h @@ -0,0 +1,52 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_HDIV_32_QF_H +#define PALACE_LIBCEED_HCURL_HDIV_32_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_apply_hcurlhdiv_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + AdjJt32(adjJt_loc, J_loc); + MultAtBCx32(J_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +CEED_QFUNCTION(f_apply_hdivhcurl_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + AdjJt32(adjJt_loc, J_loc); + MultAtBCx32(adjJt_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_HDIV_32_QF_H diff --git a/palace/fem/qfunctions/32/hcurlhdiv_build_32_qf.h b/palace/fem/qfunctions/32/hcurlhdiv_build_32_qf.h new file mode 100644 index 000000000..4b4074cb9 --- /dev/null +++ b/palace/fem/qfunctions/32/hcurlhdiv_build_32_qf.h @@ -0,0 +1,54 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_HDIV_BUILD_32_QF_H +#define PALACE_LIBCEED_HCURL_HDIV_BUILD_32_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_build_hcurlhdiv_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[4]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + AdjJt32(adjJt_loc, J_loc); + MultAtBC32(J_loc, coeff, adjJt_loc, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + } + return 0; +} + +CEED_QFUNCTION(f_build_hdivhcurl_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[4]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + AdjJt32(adjJt_loc, J_loc); + MultAtBC32(adjJt_loc, coeff, J_loc, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_HDIV_BUILD_32_QF_H diff --git a/palace/fem/qfunctions/32/hcurlmass_32_qf.h b/palace/fem/qfunctions/32/hcurlmass_32_qf.h new file mode 100644 index 000000000..b7bc1524b --- /dev/null +++ b/palace/fem/qfunctions/32/hcurlmass_32_qf.h @@ -0,0 +1,39 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_MASS_32_QF_H +#define PALACE_LIBCEED_HCURL_MASS_32_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_apply_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], + *gradu = in[2]; + CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + v[i] = coeff * wdetJ[i] * u[i]; + } + { + const CeedScalar u_loc[2] = {gradu[i + Q * 0], gradu[i + Q * 1]}; + CeedScalar coeff[6], adjJt_loc[6], v_loc[3]; + CoeffUnpack3(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + gradv[i + Q * 0] = wdetJ[i] * v_loc[0]; + gradv[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_MASS_32_QF_H diff --git a/palace/fem/qfunctions/32/hcurlmass_build_32_qf.h b/palace/fem/qfunctions/32/hcurlmass_build_32_qf.h new file mode 100644 index 000000000..cff513903 --- /dev/null +++ b/palace/fem/qfunctions/32/hcurlmass_build_32_qf.h @@ -0,0 +1,38 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_MASS_BUILD_32_QF_H +#define PALACE_LIBCEED_HCURL_MASS_BUILD_32_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_build_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + qd1[i + Q * 0] = coeff * wdetJ[i]; + } + { + CeedScalar coeff[6], adjJt_loc[6], qd_loc[3]; + CoeffUnpack3(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + MultAtBA32(adjJt_loc, coeff, qd_loc); + + qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd2[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd2[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_MASS_BUILD_32_QF_H diff --git a/palace/fem/qfunctions/32/hdiv_32_qf.h b/palace/fem/qfunctions/32/hdiv_32_qf.h new file mode 100644 index 000000000..2c8d19ad7 --- /dev/null +++ b/palace/fem/qfunctions/32/hdiv_32_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_32_QF_H +#define PALACE_LIBCEED_HDIV_32_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_apply_hdiv_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + AdjJt32(adjJt_loc, J_loc); + MultAtBCx32(J_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_32_QF_H diff --git a/palace/fem/qfunctions/32/hdiv_build_32_qf.h b/palace/fem/qfunctions/32/hdiv_build_32_qf.h new file mode 100644 index 000000000..ae54577a9 --- /dev/null +++ b/palace/fem/qfunctions/32/hdiv_build_32_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_BUILD_32_QF_H +#define PALACE_LIBCEED_HDIV_BUILD_32_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_build_hdiv_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + AdjJt32(adjJt_loc, J_loc); + MultAtBA32(J_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_BUILD_32_QF_H diff --git a/palace/fem/qfunctions/32/hdivmass_32_qf.h b/palace/fem/qfunctions/32/hdivmass_32_qf.h new file mode 100644 index 000000000..8dae7ed14 --- /dev/null +++ b/palace/fem/qfunctions/32/hdivmass_32_qf.h @@ -0,0 +1,40 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_MASS_32_QF_H +#define PALACE_LIBCEED_HDIV_MASS_32_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_apply_hdivmass_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], + *u = in[2], *curlu = in[3]; + CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[6], adjJt_loc[6], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + curlv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * curlu[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_MASS_32_QF_H diff --git a/palace/fem/qfunctions/32/hdivmass_build_32_qf.h b/palace/fem/qfunctions/32/hdivmass_build_32_qf.h new file mode 100644 index 000000000..2cad3a878 --- /dev/null +++ b/palace/fem/qfunctions/32/hdivmass_build_32_qf.h @@ -0,0 +1,39 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_MASS_BUILD_32_QF_H +#define PALACE_LIBCEED_HDIV_MASS_BUILD_32_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_build_hdivmass_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; + CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + CeedScalar coeff[6], adjJt_loc[6], qd_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + MultAtBA32(adjJt_loc, coeff, qd_loc); + + qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_MASS_BUILD_32_QF_H diff --git a/palace/fem/qfunctions/32/l2mass_32_qf.h b/palace/fem/qfunctions/32/l2mass_32_qf.h new file mode 100644 index 000000000..3fcc382be --- /dev/null +++ b/palace/fem/qfunctions/32/l2mass_32_qf.h @@ -0,0 +1,41 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_MASS_32_QF_H +#define PALACE_LIBCEED_L2_MASS_32_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_apply_l2mass_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], + *u = in[2], *divu = in[3]; + CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; + CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + AdjJt32(adjJt_loc, J_loc); + MultAtBCx32(J_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_MASS_32_QF_H diff --git a/palace/fem/qfunctions/32/l2mass_build_32_qf.h b/palace/fem/qfunctions/32/l2mass_build_32_qf.h new file mode 100644 index 000000000..4debee61a --- /dev/null +++ b/palace/fem/qfunctions/32/l2mass_build_32_qf.h @@ -0,0 +1,40 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_MASS_BUILD_32_QF_H +#define PALACE_LIBCEED_L2_MASS_BUILD_32_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_32_qf.h" + +CEED_QFUNCTION(f_build_l2mass_32)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; + CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack32(adjJt + i, Q, adjJt_loc); + AdjJt32(adjJt_loc, J_loc); + MultAtBA32(J_loc, coeff, qd_loc); + + qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_MASS_BUILD_32_QF_H diff --git a/palace/fem/qfunctions/32/utils_32_qf.h b/palace/fem/qfunctions/32/utils_32_qf.h new file mode 100644 index 000000000..3a0e70737 --- /dev/null +++ b/palace/fem/qfunctions/32/utils_32_qf.h @@ -0,0 +1,145 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_UTILS_32_QF_H +#define PALACE_LIBCEED_UTILS_32_QF_H + +#include + +CEED_QFUNCTION_HELPER CeedScalar DetJ32(const CeedScalar J[6]) +{ + // J: 0 3 + // 1 4 + // 2 5 + const CeedScalar E = J[0] * J[0] + J[1] * J[1] + J[2] * J[2]; + const CeedScalar G = J[3] * J[3] + J[4] * J[4] + J[5] * J[5]; + const CeedScalar F = J[0] * J[3] + J[1] * J[4] + J[2] * J[5]; + return sqrt(E * G - F * F); +} + +template +CEED_QFUNCTION_HELPER CeedScalar AdjJt32(const CeedScalar J[6], CeedScalar adjJt[6]) +{ + // Compute adj(J)^T / det(J) and store the result. + // J: 0 3 + // 1 4 + // 2 5 + const CeedScalar E = J[0] * J[0] + J[1] * J[1] + J[2] * J[2]; + const CeedScalar G = J[3] * J[3] + J[4] * J[4] + J[5] * J[5]; + const CeedScalar F = J[0] * J[3] + J[1] * J[4] + J[2] * J[5]; + const CeedScalar d = sqrt(E * G - F * F); + adjJt[0] = (G * J[0] - F * J[3]) / d; + adjJt[1] = (G * J[1] - F * J[4]) / d; + adjJt[2] = (G * J[2] - F * J[5]) / d; + adjJt[3] = (E * J[3] - F * J[0]) / d; + adjJt[4] = (E * J[4] - F * J[1]) / d; + adjJt[5] = (E * J[5] - F * J[2]) / d; + return ComputeDet ? d : 0.0; +} + +CEED_QFUNCTION_HELPER void MatUnpack32(const CeedScalar *A, const CeedInt A_stride, + CeedScalar A_loc[6]) +{ + A_loc[0] = A[A_stride * 0]; + A_loc[1] = A[A_stride * 1]; + A_loc[2] = A[A_stride * 2]; + A_loc[3] = A[A_stride * 3]; + A_loc[4] = A[A_stride * 4]; + A_loc[5] = A[A_stride * 5]; +} + +CEED_QFUNCTION_HELPER void MultAtBCx32(const CeedScalar A[6], const CeedScalar B[6], + const CeedScalar C[6], const CeedScalar x[2], + CeedScalar y[3]) +{ + // A: 0 3 B: 0 1 2 C: 0 3 + // 1 4 1 3 4 1 4 + // 2 5 2 4 5 2 5 + CeedScalar z[3]; + + y[0] = C[0] * x[0] + C[3] * x[1]; + y[1] = C[1] * x[0] + C[4] * x[1]; + y[2] = C[2] * x[0] + C[5] * x[1]; + + z[0] = B[0] * y[0] + B[1] * y[1] + B[2] * y[2]; + z[1] = B[1] * y[0] + B[3] * y[1] + B[4] * y[2]; + z[2] = B[2] * y[0] + B[4] * y[1] + B[5] * y[2]; + + y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2]; + y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2]; + y[2] = 0.0; +} + +CEED_QFUNCTION_HELPER void MultBAx32(const CeedScalar A[6], const CeedScalar B[6], + const CeedScalar x[2], CeedScalar y[2]) +{ + // A: 0 3 B: 0 1 2 + // 1 4 1 3 4 + // 2 5 2 4 5 + CeedScalar z[3]; + + z[0] = A[0] * x[0] + A[3] * x[1]; + z[1] = A[1] * x[0] + A[4] * x[1]; + z[2] = A[2] * x[0] + A[5] * x[1]; + + y[0] = B[0] * z[0] + B[1] * z[1] + B[2] * z[2]; + y[1] = B[1] * z[0] + B[3] * z[1] + B[4] * z[2]; + y[2] = B[2] * z[0] + B[4] * z[1] + B[5] * z[2]; +} + +CEED_QFUNCTION_HELPER void MultAtBA32(const CeedScalar A[6], const CeedScalar B[6], + CeedScalar C[3]) +{ + // A: 0 3 B: 0 1 2 C: 0 1 + // 1 4 1 3 4 1 2 + // 2 5 2 4 5 + + // First compute entries of R = B A. + const CeedScalar R11 = B[0] * A[0] + B[1] * A[1] + B[2] * A[2]; + const CeedScalar R21 = B[1] * A[0] + B[3] * A[1] + B[4] * A[2]; + const CeedScalar R31 = B[2] * A[0] + B[4] * A[1] + B[5] * A[2]; + const CeedScalar R12 = B[0] * A[3] + B[1] * A[4] + B[2] * A[5]; + const CeedScalar R22 = B[1] * A[3] + B[3] * A[4] + B[4] * A[5]; + const CeedScalar R32 = B[2] * A[3] + B[4] * A[4] + B[5] * A[5]; + + C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31; + C[1] = A[0] * R12 + A[1] * R22 + A[2] * R32; + C[2] = A[3] * R12 + A[4] * R22 + A[5] * R32; +} + +CEED_QFUNCTION_HELPER void MultAtBC32(const CeedScalar A[6], const CeedScalar B[6], + const CeedScalar C[6], CeedScalar D[4]) +{ + // A, C: 0 3 B: 0 1 2 D: 0 2 + // 1 4 1 3 4 1 3 + // 2 5 2 4 5 + + // First compute entries of R = B C. + const CeedScalar R11 = B[0] * C[0] + B[1] * C[1] + B[2] * C[2]; + const CeedScalar R21 = B[1] * C[0] + B[3] * C[1] + B[4] * C[2]; + const CeedScalar R31 = B[2] * C[0] + B[4] * C[1] + B[5] * C[2]; + const CeedScalar R12 = B[0] * C[3] + B[1] * C[4] + B[2] * C[5]; + const CeedScalar R22 = B[1] * C[3] + B[3] * C[4] + B[4] * C[5]; + const CeedScalar R32 = B[2] * C[3] + B[4] * C[4] + B[5] * C[5]; + + D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31; + D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31; + D[2] = A[0] * R12 + A[1] * R22 + A[2] * R32; + D[3] = A[3] * R12 + A[4] * R22 + A[5] * R32; +} + +CEED_QFUNCTION_HELPER void MultBA32(const CeedScalar A[6], const CeedScalar B[6], + CeedScalar C[6]) +{ + // A: 0 3 B: 0 1 2 C: 0 3 + // 1 4 1 3 4 1 4 + // 2 5 2 4 5 2 5 + C[0] = B[0] * A[0] + B[1] * A[1] + B[2] * A[2]; + C[1] = B[1] * A[0] + B[3] * A[1] + B[4] * A[2]; + C[2] = B[2] * A[0] + B[4] * A[1] + B[5] * A[2]; + C[3] = B[0] * A[3] + B[1] * A[4] + B[2] * A[5]; + C[4] = B[1] * A[3] + B[3] * A[4] + B[4] * A[5]; + C[5] = B[2] * A[3] + B[4] * A[4] + B[5] * A[5]; +} + +#endif // PALACE_LIBCEED_UTILS_32_QF_H diff --git a/palace/fem/qfunctions/33/geom_33_qf.h b/palace/fem/qfunctions/33/geom_33_qf.h new file mode 100644 index 000000000..213a4d1b3 --- /dev/null +++ b/palace/fem/qfunctions/33/geom_33_qf.h @@ -0,0 +1,36 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_GEOM_33_QF_H +#define PALACE_LIBCEED_GEOM_33_QF_H + +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_build_geom_factor_33)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *qw = in[0], *J = in[1]; + CeedScalar *attr = out[0], *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar J_loc[9], adjJt_loc[9]; + MatUnpack33(J + i, Q, J_loc); + const CeedScalar detJ = AdjJt33(J_loc, adjJt_loc); + + attr[i] = 0; + wdetJ[i] = qw[i] * detJ; + adjJt[i + Q * 0] = adjJt_loc[0] / detJ; + adjJt[i + Q * 1] = adjJt_loc[1] / detJ; + adjJt[i + Q * 2] = adjJt_loc[2] / detJ; + adjJt[i + Q * 3] = adjJt_loc[3] / detJ; + adjJt[i + Q * 4] = adjJt_loc[4] / detJ; + adjJt[i + Q * 5] = adjJt_loc[5] / detJ; + adjJt[i + Q * 6] = adjJt_loc[6] / detJ; + adjJt[i + Q * 7] = adjJt_loc[7] / detJ; + adjJt[i + Q * 8] = adjJt_loc[8] / detJ; + } + return 0; +} + +#endif // PALACE_LIBCEED_GEOM_33_QF_H diff --git a/palace/fem/qfunctions/33/hcurl_33_qf.h b/palace/fem/qfunctions/33/hcurl_33_qf.h new file mode 100644 index 000000000..98803c9b4 --- /dev/null +++ b/palace/fem/qfunctions/33/hcurl_33_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_33_QF_H +#define PALACE_LIBCEED_HCURL_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_apply_hcurl_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + v[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_33_QF_H diff --git a/palace/fem/qfunctions/33/hcurl_build_33_qf.h b/palace/fem/qfunctions/33/hcurl_build_33_qf.h new file mode 100644 index 000000000..f1f34be12 --- /dev/null +++ b/palace/fem/qfunctions/33/hcurl_build_33_qf.h @@ -0,0 +1,33 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_BUILD_33_QF_H +#define PALACE_LIBCEED_HCURL_BUILD_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_build_hcurl_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[9], qd_loc[6]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + MultAtBA33(adjJt_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_BUILD_33_QF_H diff --git a/palace/fem/qfunctions/33/hcurlh1d_33_qf.h b/palace/fem/qfunctions/33/hcurlh1d_33_qf.h new file mode 100644 index 000000000..fde5ffc6c --- /dev/null +++ b/palace/fem/qfunctions/33/hcurlh1d_33_qf.h @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_H1D_33_QF_H +#define PALACE_LIBCEED_HCURL_H1D_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_apply_hcurlh1d_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + MultBAx33(adjJt_loc, coeff, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + v[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_H1D_33_QF_H diff --git a/palace/fem/qfunctions/33/hcurlh1d_build_33_qf.h b/palace/fem/qfunctions/33/hcurlh1d_build_33_qf.h new file mode 100644 index 000000000..fb1a09587 --- /dev/null +++ b/palace/fem/qfunctions/33/hcurlh1d_build_33_qf.h @@ -0,0 +1,36 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_H1D_BUILD_33_QF_H +#define PALACE_LIBCEED_HCURL_H1D_BUILD_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_build_hcurlh1d_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[9], qd_loc[9]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + MultBA33(adjJt_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; + qd[i + Q * 6] = wdetJ[i] * qd_loc[6]; + qd[i + Q * 7] = wdetJ[i] * qd_loc[7]; + qd[i + Q * 8] = wdetJ[i] * qd_loc[8]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_H1D_BUILD_33_QF_H diff --git a/palace/fem/qfunctions/33/hcurlhdiv_33_qf.h b/palace/fem/qfunctions/33/hcurlhdiv_33_qf.h new file mode 100644 index 000000000..a9789de23 --- /dev/null +++ b/palace/fem/qfunctions/33/hcurlhdiv_33_qf.h @@ -0,0 +1,54 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_HDIV_33_QF_H +#define PALACE_LIBCEED_HCURL_HDIV_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_apply_hcurlhdiv_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBCx33(J_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + v[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + return 0; +} + +CEED_QFUNCTION(f_apply_hdivhcurl_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBCx33(adjJt_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + v[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_HDIV_33_QF_H diff --git a/palace/fem/qfunctions/33/hcurlhdiv_build_33_qf.h b/palace/fem/qfunctions/33/hcurlhdiv_build_33_qf.h new file mode 100644 index 000000000..deda1016b --- /dev/null +++ b/palace/fem/qfunctions/33/hcurlhdiv_build_33_qf.h @@ -0,0 +1,64 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_HDIV_BUILD_33_QF_H +#define PALACE_LIBCEED_HCURL_HDIV_BUILD_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_build_hcurlhdiv_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[9]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBC33(J_loc, coeff, adjJt_loc, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; + qd[i + Q * 6] = wdetJ[i] * qd_loc[6]; + qd[i + Q * 7] = wdetJ[i] * qd_loc[7]; + qd[i + Q * 8] = wdetJ[i] * qd_loc[8]; + } + return 0; +} + +CEED_QFUNCTION(f_build_hdivhcurl_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[9]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBC33(adjJt_loc, coeff, J_loc, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; + qd[i + Q * 6] = wdetJ[i] * qd_loc[6]; + qd[i + Q * 7] = wdetJ[i] * qd_loc[7]; + qd[i + Q * 8] = wdetJ[i] * qd_loc[8]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_HDIV_BUILD_33_QF_H diff --git a/palace/fem/qfunctions/33/hcurlmass_33_qf.h b/palace/fem/qfunctions/33/hcurlmass_33_qf.h new file mode 100644 index 000000000..71563d953 --- /dev/null +++ b/palace/fem/qfunctions/33/hcurlmass_33_qf.h @@ -0,0 +1,40 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_MASS_33_QF_H +#define PALACE_LIBCEED_HCURL_MASS_33_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_apply_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], + *gradu = in[2]; + CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + v[i] = coeff * wdetJ[i] * u[i]; + } + { + const CeedScalar u_loc[3] = {gradu[i + Q * 0], gradu[i + Q * 1], gradu[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], v_loc[3]; + CoeffUnpack3(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + gradv[i + Q * 0] = wdetJ[i] * v_loc[0]; + gradv[i + Q * 1] = wdetJ[i] * v_loc[1]; + gradv[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_MASS_33_QF_H diff --git a/palace/fem/qfunctions/33/hcurlmass_build_33_qf.h b/palace/fem/qfunctions/33/hcurlmass_build_33_qf.h new file mode 100644 index 000000000..ea43256ef --- /dev/null +++ b/palace/fem/qfunctions/33/hcurlmass_build_33_qf.h @@ -0,0 +1,41 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HCURL_MASS_BUILD_33_QF_H +#define PALACE_LIBCEED_HCURL_MASS_BUILD_33_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_build_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); + + qd1[i + Q * 0] = coeff * wdetJ[i]; + } + { + CeedScalar coeff[6], adjJt_loc[9], qd_loc[6]; + CoeffUnpack3(CoeffPairSecond<1>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + MultAtBA33(adjJt_loc, coeff, qd_loc); + + qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd2[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd2[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd2[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd2[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd2[i + Q * 5] = wdetJ[i] * qd_loc[5]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HCURL_MASS_BUILD_33_QF_H diff --git a/palace/fem/qfunctions/33/hdiv_33_qf.h b/palace/fem/qfunctions/33/hdiv_33_qf.h new file mode 100644 index 000000000..ded132586 --- /dev/null +++ b/palace/fem/qfunctions/33/hdiv_33_qf.h @@ -0,0 +1,32 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_33_QF_H +#define PALACE_LIBCEED_HDIV_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_apply_hdiv_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; + CeedScalar *v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + v[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_33_QF_H diff --git a/palace/fem/qfunctions/33/hdiv_build_33_qf.h b/palace/fem/qfunctions/33/hdiv_build_33_qf.h new file mode 100644 index 000000000..3c782cce8 --- /dev/null +++ b/palace/fem/qfunctions/33/hdiv_build_33_qf.h @@ -0,0 +1,34 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_BUILD_33_QF_H +#define PALACE_LIBCEED_HDIV_BUILD_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_build_hdiv_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *qd = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBA33(J_loc, coeff, qd_loc); + + qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_BUILD_33_QF_H diff --git a/palace/fem/qfunctions/33/hdivmass_33_qf.h b/palace/fem/qfunctions/33/hdivmass_33_qf.h new file mode 100644 index 000000000..33958ad86 --- /dev/null +++ b/palace/fem/qfunctions/33/hdivmass_33_qf.h @@ -0,0 +1,46 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_MASS_33_QF_H +#define PALACE_LIBCEED_HDIV_MASS_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_apply_hdivmass_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], + *curlu = in[2]; + CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + v[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + { + const CeedScalar u_loc[3] = {curlu[i + Q * 0], curlu[i + Q * 1], curlu[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; + CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc); + + curlv[i + Q * 0] = wdetJ[i] * v_loc[0]; + curlv[i + Q * 1] = wdetJ[i] * v_loc[1]; + curlv[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_MASS_33_QF_H diff --git a/palace/fem/qfunctions/33/hdivmass_build_33_qf.h b/palace/fem/qfunctions/33/hdivmass_build_33_qf.h new file mode 100644 index 000000000..5f64dc2f9 --- /dev/null +++ b/palace/fem/qfunctions/33/hdivmass_build_33_qf.h @@ -0,0 +1,49 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_HDIV_MASS_BUILD_33_QF_H +#define PALACE_LIBCEED_HDIV_MASS_BUILD_33_QF_H + +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_build_hdivmass_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; + CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 6 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + CeedScalar coeff[6], adjJt_loc[9], qd_loc[6]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + MultAtBA33(adjJt_loc, coeff, qd_loc); + + qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd1[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd1[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd1[i + Q * 5] = wdetJ[i] * qd_loc[5]; + } + { + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6]; + CoeffUnpack3(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBA33(J_loc, coeff, qd_loc); + + qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd2[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd2[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd2[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd2[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd2[i + Q * 5] = wdetJ[i] * qd_loc[5]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_HDIV_MASS_BUILD_33_QF_H diff --git a/palace/fem/qfunctions/33/l2mass_33_qf.h b/palace/fem/qfunctions/33/l2mass_33_qf.h new file mode 100644 index 000000000..d892f7170 --- /dev/null +++ b/palace/fem/qfunctions/33/l2mass_33_qf.h @@ -0,0 +1,42 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_MASS_33_QF_H +#define PALACE_LIBCEED_L2_MASS_33_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_apply_l2mass_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], + *u = in[2], *divu = in[3]; + CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc); + + v[i + Q * 0] = wdetJ[i] * v_loc[0]; + v[i + Q * 1] = wdetJ[i] * v_loc[1]; + v[i + Q * 2] = wdetJ[i] * v_loc[2]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_MASS_33_QF_H diff --git a/palace/fem/qfunctions/33/l2mass_build_33_qf.h b/palace/fem/qfunctions/33/l2mass_build_33_qf.h new file mode 100644 index 000000000..9d8853110 --- /dev/null +++ b/palace/fem/qfunctions/33/l2mass_build_33_qf.h @@ -0,0 +1,43 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_L2_MASS_BUILD_33_QF_H +#define PALACE_LIBCEED_L2_MASS_BUILD_33_QF_H + +#include "../coeff/coeff_1_qf.h" +#include "../coeff/coeff_3_qf.h" +#include "utils_33_qf.h" + +CEED_QFUNCTION(f_build_l2mass_33)(void *__restrict__ ctx, CeedInt Q, + const CeedScalar *const *in, CeedScalar *const *out) +{ + const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; + CeedScalar *qd1 = out[0], *qd2 = out[0] + 6 * Q; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + { + CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6]; + CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); + MatUnpack33(adjJt + i, Q, adjJt_loc); + AdjJt33(adjJt_loc, J_loc); + MultAtBA33(J_loc, coeff, qd_loc); + + qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; + qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; + qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; + qd1[i + Q * 3] = wdetJ[i] * qd_loc[3]; + qd1[i + Q * 4] = wdetJ[i] * qd_loc[4]; + qd1[i + Q * 5] = wdetJ[i] * qd_loc[5]; + } + { + const CeedScalar coeff = + CoeffUnpack1(CoeffPairSecond<3>((const CeedIntScalar *)ctx), (CeedInt)attr[i]); + + qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; + } + } + return 0; +} + +#endif // PALACE_LIBCEED_L2_MASS_BUILD_33_QF_H diff --git a/palace/fem/qfunctions/33/utils_33_qf.h b/palace/fem/qfunctions/33/utils_33_qf.h new file mode 100644 index 000000000..a19fffd40 --- /dev/null +++ b/palace/fem/qfunctions/33/utils_33_qf.h @@ -0,0 +1,162 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_UTILS_33_QF_H +#define PALACE_LIBCEED_UTILS_33_QF_H + +#include + +CEED_QFUNCTION_HELPER CeedScalar DetJ33(const CeedScalar J[9]) +{ + // J: 0 3 6 + // 1 4 7 + // 2 5 8 + return J[0] * (J[4] * J[8] - J[5] * J[7]) - J[1] * (J[3] * J[8] - J[5] * J[6]) + + J[2] * (J[3] * J[7] - J[4] * J[6]); +} + +template +CEED_QFUNCTION_HELPER CeedScalar AdjJt33(const CeedScalar J[9], CeedScalar adjJt[9]) +{ + // Compute adj(J)^T / det(J) and store the result. + // J: 0 3 6 + // 1 4 7 + // 2 5 8 + adjJt[0] = J[4] * J[8] - J[7] * J[5]; + adjJt[3] = J[7] * J[2] - J[1] * J[8]; + adjJt[6] = J[1] * J[5] - J[4] * J[2]; + adjJt[1] = J[6] * J[5] - J[3] * J[8]; + adjJt[4] = J[0] * J[8] - J[6] * J[2]; + adjJt[7] = J[3] * J[2] - J[0] * J[5]; + adjJt[2] = J[3] * J[7] - J[6] * J[4]; + adjJt[5] = J[6] * J[1] - J[0] * J[7]; + adjJt[8] = J[0] * J[4] - J[3] * J[1]; + return ComputeDet ? (J[0] * adjJt[0] + J[1] * adjJt[1] + J[2] * adjJt[2]) : 0.0; +} + +CEED_QFUNCTION_HELPER void MatUnpack33(const CeedScalar *A, const CeedInt A_stride, + CeedScalar A_loc[9]) +{ + A_loc[0] = A[A_stride * 0]; + A_loc[1] = A[A_stride * 1]; + A_loc[2] = A[A_stride * 2]; + A_loc[3] = A[A_stride * 3]; + A_loc[4] = A[A_stride * 4]; + A_loc[5] = A[A_stride * 5]; + A_loc[6] = A[A_stride * 6]; + A_loc[7] = A[A_stride * 7]; + A_loc[8] = A[A_stride * 8]; +} + +CEED_QFUNCTION_HELPER void MultAtBCx33(const CeedScalar A[9], const CeedScalar B[6], + const CeedScalar C[9], const CeedScalar x[3], + CeedScalar y[3]) +{ + // A: 0 3 6 B: 0 1 2 C: 0 3 6 + // 1 4 7 1 3 4 1 4 7 + // 2 5 8 2 4 5 2 5 8 + CeedScalar z[3]; + + y[0] = C[0] * x[0] + C[3] * x[1] + C[6] * x[2]; + y[1] = C[1] * x[0] + C[4] * x[1] + C[7] * x[2]; + y[2] = C[2] * x[0] + C[5] * x[1] + C[8] * x[2]; + + z[0] = B[0] * y[0] + B[1] * y[1] + B[2] * y[2]; + z[1] = B[1] * y[0] + B[3] * y[1] + B[4] * y[2]; + z[2] = B[2] * y[0] + B[4] * y[1] + B[5] * y[2]; + + y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2]; + y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2]; + y[2] = A[6] * z[0] + A[7] * z[1] + A[8] * z[2]; +} + +CEED_QFUNCTION_HELPER void MultBAx33(const CeedScalar A[9], const CeedScalar B[6], + const CeedScalar x[3], CeedScalar y[3]) +{ + // A: 0 3 6 B: 0 1 2 + // 1 4 7 1 3 4 + // 2 5 8 2 4 5 + CeedScalar z[3]; + + z[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2]; + z[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2]; + z[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2]; + + y[0] = B[0] * z[0] + B[1] * z[1] + B[2] * z[2]; + y[1] = B[1] * z[0] + B[3] * z[1] + B[4] * z[2]; + y[2] = B[2] * z[0] + B[4] * z[1] + B[5] * z[2]; +} + +CEED_QFUNCTION_HELPER void MultAtBA33(const CeedScalar A[9], const CeedScalar B[6], + CeedScalar C[6]) +{ + // A: 0 3 6 B: 0 1 2 C: 0 1 2 + // 1 4 7 1 3 4 1 3 4 + // 2 5 8 2 4 5 2 4 5 + + // First compute entries of R = B A. + const CeedScalar R11 = B[0] * A[0] + B[1] * A[1] + B[2] * A[2]; + const CeedScalar R21 = B[1] * A[0] + B[3] * A[1] + B[4] * A[2]; + const CeedScalar R31 = B[2] * A[0] + B[4] * A[1] + B[5] * A[2]; + const CeedScalar R12 = B[0] * A[3] + B[1] * A[4] + B[2] * A[5]; + const CeedScalar R22 = B[1] * A[3] + B[3] * A[4] + B[4] * A[5]; + const CeedScalar R32 = B[2] * A[3] + B[4] * A[4] + B[5] * A[5]; + const CeedScalar R13 = B[0] * A[6] + B[1] * A[7] + B[2] * A[8]; + const CeedScalar R23 = B[1] * A[6] + B[3] * A[7] + B[4] * A[8]; + const CeedScalar R33 = B[2] * A[6] + B[4] * A[7] + B[5] * A[8]; + + C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31; + C[1] = A[0] * R12 + A[1] * R22 + A[2] * R32; + C[2] = A[0] * R13 + A[1] * R23 + A[2] * R33; + C[3] = A[3] * R12 + A[4] * R22 + A[5] * R32; + C[4] = A[3] * R13 + A[4] * R23 + A[5] * R33; + C[5] = A[6] * R13 + A[7] * R23 + A[8] * R33; +} + +CEED_QFUNCTION_HELPER void MultAtBC33(const CeedScalar A[9], const CeedScalar B[6], + const CeedScalar C[9], CeedScalar D[9]) +{ + // A, C: 0 3 6 B: 0 1 2 D: 0 3 6 + // 1 4 7 1 3 4 1 4 7 + // 2 5 8 2 4 5 2 5 8 + + // First compute entries of R = B C. + const CeedScalar R11 = B[0] * C[0] + B[1] * C[1] + B[2] * C[2]; + const CeedScalar R21 = B[1] * C[0] + B[3] * C[1] + B[4] * C[2]; + const CeedScalar R31 = B[2] * C[0] + B[4] * C[1] + B[5] * C[2]; + const CeedScalar R12 = B[0] * C[3] + B[1] * C[4] + B[2] * C[5]; + const CeedScalar R22 = B[1] * C[3] + B[3] * C[4] + B[4] * C[5]; + const CeedScalar R32 = B[2] * C[3] + B[4] * C[4] + B[5] * C[5]; + const CeedScalar R13 = B[0] * C[6] + B[1] * C[7] + B[2] * C[8]; + const CeedScalar R23 = B[1] * C[6] + B[3] * C[7] + B[4] * C[8]; + const CeedScalar R33 = B[2] * C[6] + B[4] * C[7] + B[5] * C[8]; + + D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31; + D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31; + D[2] = A[6] * R11 + A[7] * R21 + A[8] * R31; + D[3] = A[0] * R12 + A[1] * R22 + A[2] * R32; + D[4] = A[3] * R12 + A[4] * R22 + A[5] * R32; + D[5] = A[6] * R12 + A[7] * R22 + A[8] * R32; + D[6] = A[0] * R13 + A[1] * R23 + A[2] * R33; + D[7] = A[3] * R13 + A[4] * R23 + A[5] * R33; + D[8] = A[6] * R13 + A[7] * R23 + A[8] * R33; +} + +CEED_QFUNCTION_HELPER void MultBA33(const CeedScalar A[9], const CeedScalar B[6], + CeedScalar C[9]) +{ + // A: 0 3 6 B: 0 1 2 C: 0 3 6 + // 1 4 7 1 3 4 1 4 7 + // 2 5 8 2 4 5 2 5 8 + C[0] = B[0] * A[0] + B[1] * A[1] + B[2] * A[2]; + C[1] = B[1] * A[0] + B[3] * A[1] + B[4] * A[2]; + C[2] = B[2] * A[0] + B[4] * A[1] + B[5] * A[2]; + C[3] = B[0] * A[3] + B[1] * A[4] + B[2] * A[5]; + C[4] = B[1] * A[3] + B[3] * A[4] + B[4] * A[5]; + C[5] = B[2] * A[3] + B[4] * A[4] + B[5] * A[5]; + C[6] = B[0] * A[6] + B[1] * A[7] + B[2] * A[8]; + C[7] = B[1] * A[6] + B[3] * A[7] + B[4] * A[8]; + C[8] = B[2] * A[6] + B[4] * A[7] + B[5] * A[8]; +} + +#endif // PALACE_LIBCEED_UTILS_33_QF_H diff --git a/palace/fem/qfunctions/apply/apply_12_qf.h b/palace/fem/qfunctions/apply/apply_12_qf.h new file mode 100644 index 000000000..b93c4fded --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_12_qf.h @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_12_QF_H +#define PALACE_LIBCEED_APPLY_12_QF_H + +CEED_QFUNCTION(f_apply_12)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + Q, + *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; + CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + v1[i] = qd1[i] * u1[i]; + + const CeedScalar u20 = u2[i + Q * 0]; + const CeedScalar u21 = u2[i + Q * 1]; + v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21; + v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 2] * u21; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_12_QF_H diff --git a/palace/fem/qfunctions/apply/apply_13_qf.h b/palace/fem/qfunctions/apply/apply_13_qf.h new file mode 100644 index 000000000..543be0489 --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_13_qf.h @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_13_QF_H +#define PALACE_LIBCEED_APPLY_13_QF_H + +CEED_QFUNCTION(f_apply_13)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + Q, + *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; + CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + v1[i] = qd1[i] * u1[i]; + + const CeedScalar u20 = u2[i + Q * 0]; + const CeedScalar u21 = u2[i + Q * 1]; + const CeedScalar u22 = u2[i + Q * 2]; + v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21 + qd2[i + Q * 2] * u22; + v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 4] * u22; + v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 5] * u22; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_13_QF_H diff --git a/palace/fem/qfunctions/apply/apply_1_qf.h b/palace/fem/qfunctions/apply/apply_1_qf.h new file mode 100644 index 000000000..0caeedd99 --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_1_qf.h @@ -0,0 +1,20 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_1_QF_H +#define PALACE_LIBCEED_APPLY_1_QF_H + +CEED_QFUNCTION(f_apply_1)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1]; + CeedScalar *__restrict__ v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + v[i] = qd[i] * u[i]; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_1_QF_H diff --git a/palace/fem/qfunctions/apply/apply_21_qf.h b/palace/fem/qfunctions/apply/apply_21_qf.h new file mode 100644 index 000000000..d9b61f93f --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_21_qf.h @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_21_QF_H +#define PALACE_LIBCEED_APPLY_21_QF_H + +CEED_QFUNCTION(f_apply_21)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q, + *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; + CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u10 = u1[i + Q * 0]; + const CeedScalar u11 = u1[i + Q * 1]; + v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11; + v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 2] * u11; + + v2[i] = qd2[i] * u2[i]; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_21_QF_H diff --git a/palace/fem/qfunctions/apply/apply_22_qf.h b/palace/fem/qfunctions/apply/apply_22_qf.h new file mode 100644 index 000000000..bc81094b9 --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_22_qf.h @@ -0,0 +1,29 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_22_QF_H +#define PALACE_LIBCEED_APPLY_22_QF_H + +CEED_QFUNCTION(f_apply_22)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q, + *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; + CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u10 = u1[i + Q * 0]; + const CeedScalar u11 = u1[i + Q * 1]; + v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11; + v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 2] * u11; + + const CeedScalar u20 = u2[i + Q * 0]; + const CeedScalar u21 = u2[i + Q * 1]; + v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21; + v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 2] * u21; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_22_QF_H diff --git a/palace/fem/qfunctions/apply/apply_2_qf.h b/palace/fem/qfunctions/apply/apply_2_qf.h new file mode 100644 index 000000000..8329f2732 --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_2_qf.h @@ -0,0 +1,23 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_2_QF_H +#define PALACE_LIBCEED_APPLY_2_QF_H + +CEED_QFUNCTION(f_apply_2)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1]; + CeedScalar *__restrict__ v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1; + v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 2] * u1; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_2_QF_H diff --git a/palace/fem/qfunctions/apply/apply_31_qf.h b/palace/fem/qfunctions/apply/apply_31_qf.h new file mode 100644 index 000000000..853b5cb41 --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_31_qf.h @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_31_QF_H +#define PALACE_LIBCEED_APPLY_31_QF_H + +CEED_QFUNCTION(f_apply_31)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q, + *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; + CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u10 = u1[i + Q * 0]; + const CeedScalar u11 = u1[i + Q * 1]; + const CeedScalar u12 = u1[i + Q * 2]; + v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11 + qd1[i + Q * 2] * u12; + v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 4] * u12; + v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 5] * u12; + + v2[i] = qd2[i] * u2[i]; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_31_QF_H diff --git a/palace/fem/qfunctions/apply/apply_33_qf.h b/palace/fem/qfunctions/apply/apply_33_qf.h new file mode 100644 index 000000000..ea9781584 --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_33_qf.h @@ -0,0 +1,33 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_33_QF_H +#define PALACE_LIBCEED_APPLY_33_QF_H + +CEED_QFUNCTION(f_apply_33)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q, + *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; + CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u10 = u1[i + Q * 0]; + const CeedScalar u11 = u1[i + Q * 1]; + const CeedScalar u12 = u1[i + Q * 2]; + v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11 + qd1[i + Q * 2] * u12; + v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 4] * u12; + v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 5] * u12; + + const CeedScalar u20 = u2[i + Q * 0]; + const CeedScalar u21 = u2[i + Q * 1]; + const CeedScalar u22 = u2[i + Q * 2]; + v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21 + qd2[i + Q * 2] * u22; + v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 4] * u22; + v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 5] * u22; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_33_QF_H diff --git a/palace/fem/qfunctions/apply/apply_3_qf.h b/palace/fem/qfunctions/apply/apply_3_qf.h new file mode 100644 index 000000000..a33bac466 --- /dev/null +++ b/palace/fem/qfunctions/apply/apply_3_qf.h @@ -0,0 +1,25 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_APPLY_3_QF_H +#define PALACE_LIBCEED_APPLY_3_QF_H + +CEED_QFUNCTION(f_apply_3)(void *, CeedInt Q, const CeedScalar *const *in, + CeedScalar *const *out) +{ + const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1]; + CeedScalar *__restrict__ v = out[0]; + + CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) + { + const CeedScalar u0 = u[i + Q * 0]; + const CeedScalar u1 = u[i + Q * 1]; + const CeedScalar u2 = u[i + Q * 2]; + v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1 + qd[i + Q * 2] * u2; + v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 4] * u2; + v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 5] * u2; + } + return 0; +} + +#endif // PALACE_LIBCEED_APPLY_3_QF_H diff --git a/palace/fem/qfunctions/apply_qf.h b/palace/fem/qfunctions/apply_qf.h index 43f00e111..ccf8176c4 100644 --- a/palace/fem/qfunctions/apply_qf.h +++ b/palace/fem/qfunctions/apply_qf.h @@ -13,179 +13,14 @@ // data is arranged to be applied with the first vdim*(vdim+1)/2 components for the first // input/output and the remainder for the second. -CEED_QFUNCTION(f_apply_1)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1]; - CeedScalar *__restrict__ v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - v[i] = qd[i] * u[i]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_2)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1]; - CeedScalar *__restrict__ v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1; - v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 2] * u1; - } - return 0; -} - -CEED_QFUNCTION(f_apply_3)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd = in[0], *__restrict__ u = in[1]; - CeedScalar *__restrict__ v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; - v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1 + qd[i + Q * 2] * u2; - v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 4] * u2; - v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 5] * u2; - } - return 0; -} - -CEED_QFUNCTION(f_apply_22)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q, - *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; - CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u10 = u1[i + Q * 0]; - const CeedScalar u11 = u1[i + Q * 1]; - v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11; - v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 2] * u11; - - const CeedScalar u20 = u2[i + Q * 0]; - const CeedScalar u21 = u2[i + Q * 1]; - v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21; - v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 2] * u21; - } - return 0; -} - -CEED_QFUNCTION(f_apply_33)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q, - *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; - CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u10 = u1[i + Q * 0]; - const CeedScalar u11 = u1[i + Q * 1]; - const CeedScalar u12 = u1[i + Q * 2]; - v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11 + qd1[i + Q * 2] * u12; - v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 4] * u12; - v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 5] * u12; - - const CeedScalar u20 = u2[i + Q * 0]; - const CeedScalar u21 = u2[i + Q * 1]; - const CeedScalar u22 = u2[i + Q * 2]; - v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21 + qd2[i + Q * 2] * u22; - v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 4] * u22; - v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 5] * u22; - } - return 0; -} - -CEED_QFUNCTION(f_apply_12)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + Q, - *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; - CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - v1[i] = qd1[i] * u1[i]; - - const CeedScalar u20 = u2[i + Q * 0]; - const CeedScalar u21 = u2[i + Q * 1]; - v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21; - v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 2] * u21; - } - return 0; -} - -CEED_QFUNCTION(f_apply_13)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + Q, - *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; - CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - v1[i] = qd1[i] * u1[i]; - - const CeedScalar u20 = u2[i + Q * 0]; - const CeedScalar u21 = u2[i + Q * 1]; - const CeedScalar u22 = u2[i + Q * 2]; - v2[i + Q * 0] = qd2[i + Q * 0] * u20 + qd2[i + Q * 1] * u21 + qd2[i + Q * 2] * u22; - v2[i + Q * 1] = qd2[i + Q * 1] * u20 + qd2[i + Q * 3] * u21 + qd2[i + Q * 4] * u22; - v2[i + Q * 2] = qd2[i + Q * 2] * u20 + qd2[i + Q * 4] * u21 + qd2[i + Q * 5] * u22; - } - return 0; -} - -CEED_QFUNCTION(f_apply_21)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 3 * Q, - *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; - CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u10 = u1[i + Q * 0]; - const CeedScalar u11 = u1[i + Q * 1]; - v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11; - v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 2] * u11; - - v2[i] = qd2[i] * u2[i]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_31)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *__restrict__ qd1 = in[0], *__restrict__ qd2 = in[0] + 6 * Q, - *__restrict__ u1 = in[1], *__restrict__ u2 = in[2]; - CeedScalar *__restrict__ v1 = out[0], *__restrict__ v2 = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u10 = u1[i + Q * 0]; - const CeedScalar u11 = u1[i + Q * 1]; - const CeedScalar u12 = u1[i + Q * 2]; - v1[i + Q * 0] = qd1[i + Q * 0] * u10 + qd1[i + Q * 1] * u11 + qd1[i + Q * 2] * u12; - v1[i + Q * 1] = qd1[i + Q * 1] * u10 + qd1[i + Q * 3] * u11 + qd1[i + Q * 4] * u12; - v1[i + Q * 2] = qd1[i + Q * 2] * u10 + qd1[i + Q * 4] * u11 + qd1[i + Q * 5] * u12; - - v2[i] = qd2[i] * u2[i]; - } - return 0; -} +#include "apply/apply_12_qf.h" +#include "apply/apply_13_qf.h" +#include "apply/apply_1_qf.h" +#include "apply/apply_21_qf.h" +#include "apply/apply_22_qf.h" +#include "apply/apply_2_qf.h" +#include "apply/apply_31_qf.h" +#include "apply/apply_33_qf.h" +#include "apply/apply_3_qf.h" #endif // PALACE_LIBCEED_APPLY_QF_H diff --git a/palace/fem/qfunctions/coeff/coeff_1_qf.h b/palace/fem/qfunctions/coeff/coeff_1_qf.h new file mode 100644 index 000000000..873f0ddfd --- /dev/null +++ b/palace/fem/qfunctions/coeff/coeff_1_qf.h @@ -0,0 +1,21 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_COEFF_1_QF_H +#define PALACE_LIBCEED_COEFF_1_QF_H + +#include "coeff_qf.h" + +CEED_QFUNCTION_HELPER CeedScalar CoeffUnpack1(const CeedIntScalar *ctx, const CeedInt attr) +{ + const CeedInt k = AttrMat(ctx)[attr - 1].first; + return MatCoeff(ctx)[k].second; +} + +CEED_QFUNCTION_HELPER void CoeffUnpack1(const CeedIntScalar *ctx, const CeedInt attr, + CeedScalar coeff[1]) +{ + coeff[0] = CoeffUnpack1(ctx, attr); +} + +#endif // PALACE_LIBCEED_COEFF_1_QF_H diff --git a/palace/fem/qfunctions/coeff/coeff_2_qf.h b/palace/fem/qfunctions/coeff/coeff_2_qf.h new file mode 100644 index 000000000..1651cd81e --- /dev/null +++ b/palace/fem/qfunctions/coeff/coeff_2_qf.h @@ -0,0 +1,19 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_COEFF_2_QF_H +#define PALACE_LIBCEED_COEFF_2_QF_H + +#include "coeff_qf.h" + +CEED_QFUNCTION_HELPER void CoeffUnpack2(const CeedIntScalar *ctx, const CeedInt attr, + CeedScalar coeff[3]) +{ + const CeedInt k = AttrMat(ctx)[attr - 1].first; + const CeedIntScalar *mat_coeff = MatCoeff(ctx); + coeff[0] = mat_coeff[3 * k + 0].second; + coeff[1] = mat_coeff[3 * k + 1].second; + coeff[2] = mat_coeff[3 * k + 2].second; +} + +#endif // PALACE_LIBCEED_COEFF_2_QF_H diff --git a/palace/fem/qfunctions/coeff/coeff_3_qf.h b/palace/fem/qfunctions/coeff/coeff_3_qf.h new file mode 100644 index 000000000..c2242864a --- /dev/null +++ b/palace/fem/qfunctions/coeff/coeff_3_qf.h @@ -0,0 +1,22 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_COEFF_3_QF_H +#define PALACE_LIBCEED_COEFF_3_QF_H + +#include "coeff_qf.h" + +CEED_QFUNCTION_HELPER void CoeffUnpack3(const CeedIntScalar *ctx, const CeedInt attr, + CeedScalar coeff[6]) +{ + const CeedInt k = AttrMat(ctx)[attr - 1].first; + const CeedIntScalar *mat_coeff = MatCoeff(ctx); + coeff[0] = mat_coeff[6 * k + 0].second; + coeff[1] = mat_coeff[6 * k + 1].second; + coeff[2] = mat_coeff[6 * k + 2].second; + coeff[3] = mat_coeff[6 * k + 3].second; + coeff[4] = mat_coeff[6 * k + 4].second; + coeff[5] = mat_coeff[6 * k + 5].second; +} + +#endif // PALACE_LIBCEED_COEFF_3_QF_H diff --git a/palace/fem/qfunctions/coeff/coeff_qf.h b/palace/fem/qfunctions/coeff/coeff_qf.h new file mode 100644 index 000000000..2bee43e03 --- /dev/null +++ b/palace/fem/qfunctions/coeff/coeff_qf.h @@ -0,0 +1,38 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LIBCEED_COEFF_QF_H +#define PALACE_LIBCEED_COEFF_QF_H + +union CeedIntScalar +{ + CeedInt first; + CeedScalar second; +}; + +// The first entry of ctx is the number of (1-based) attributes, followed by the entries of +// the attribute to material index array (these are 0-based). +// The next entry is the number of material property coefficients, followed by the +// coefficients. +// Pair coefficients are two coefficient contexts arranged contiguously in memory. + +CEED_QFUNCTION_HELPER const CeedIntScalar *AttrMat(const CeedIntScalar *ctx) +{ + return ctx + 1; +} + +CEED_QFUNCTION_HELPER const CeedIntScalar *MatCoeff(const CeedIntScalar *ctx) +{ + const CeedInt num_attr = ctx[0].first; + return ctx + 2 + num_attr; +} + +template +CEED_QFUNCTION_HELPER const CeedIntScalar *CoeffPairSecond(const CeedIntScalar *ctx) +{ + const CeedInt num_attr = ctx[0].first; + const CeedInt num_mat = ctx[1 + num_attr].first; + return ctx + 2 + num_attr + (DIM * (DIM + 1) / 2) * num_mat; +} + +#endif // PALACE_LIBCEED_COEFF_QF_H diff --git a/palace/fem/qfunctions/coeff_qf.h b/palace/fem/qfunctions/coeff_qf.h deleted file mode 100644 index 6c45c21b6..000000000 --- a/palace/fem/qfunctions/coeff_qf.h +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_COEFF_QF_H -#define PALACE_LIBCEED_COEFF_QF_H - -union CeedIntScalar -{ - CeedInt first; - CeedScalar second; -}; - -// The first entry of ctx is the number of (1-based) attributes, followed by the entries of -// the attribute to material index array (these are 0-based). -// The next entry is the number of material property coefficients, followed by the -// coefficients. -// Pair coefficients are two coefficient contexts arranged contiguously in memory. - -CEED_QFUNCTION_HELPER const CeedIntScalar *AttrMat(const CeedIntScalar *ctx) -{ - return ctx + 1; -} - -CEED_QFUNCTION_HELPER const CeedIntScalar *MatCoeff(const CeedIntScalar *ctx) -{ - const CeedInt num_attr = ctx[0].first; - return ctx + 2 + num_attr; -} - -CEED_QFUNCTION_HELPER CeedScalar CoeffUnpack1(const CeedIntScalar *ctx, const CeedInt attr) -{ - const CeedInt k = AttrMat(ctx)[attr - 1].first; - return MatCoeff(ctx)[k].second; -} - -CEED_QFUNCTION_HELPER void CoeffUnpack1(const CeedIntScalar *ctx, const CeedInt attr, - CeedScalar coeff[1]) -{ - coeff[0] = CoeffUnpack1(ctx, attr); -} - -CEED_QFUNCTION_HELPER void CoeffUnpack2(const CeedIntScalar *ctx, const CeedInt attr, - CeedScalar coeff[3]) -{ - const CeedInt k = AttrMat(ctx)[attr - 1].first; - const CeedIntScalar *mat_coeff = MatCoeff(ctx); - coeff[0] = mat_coeff[3 * k + 0].second; - coeff[1] = mat_coeff[3 * k + 1].second; - coeff[2] = mat_coeff[3 * k + 2].second; -} - -CEED_QFUNCTION_HELPER void CoeffUnpack3(const CeedIntScalar *ctx, const CeedInt attr, - CeedScalar coeff[6]) -{ - const CeedInt k = AttrMat(ctx)[attr - 1].first; - const CeedIntScalar *mat_coeff = MatCoeff(ctx); - coeff[0] = mat_coeff[6 * k + 0].second; - coeff[1] = mat_coeff[6 * k + 1].second; - coeff[2] = mat_coeff[6 * k + 2].second; - coeff[3] = mat_coeff[6 * k + 3].second; - coeff[4] = mat_coeff[6 * k + 4].second; - coeff[5] = mat_coeff[6 * k + 5].second; -} - -CEED_QFUNCTION_HELPER const CeedIntScalar *CoeffPairSecond1(const CeedIntScalar *ctx) -{ - const CeedInt num_attr = ctx[0].first; - const CeedInt num_mat = ctx[1 + num_attr].first; - return ctx + 2 + num_attr + num_mat; -} - -CEED_QFUNCTION_HELPER const CeedIntScalar *CoeffPairSecond2(const CeedIntScalar *ctx) -{ - const CeedInt num_attr = ctx[0].first; - const CeedInt num_mat = ctx[1 + num_attr].first; - return ctx + 2 + num_attr + 3 * num_mat; -} - -CEED_QFUNCTION_HELPER const CeedIntScalar *CoeffPairSecond3(const CeedIntScalar *ctx) -{ - const CeedInt num_attr = ctx[0].first; - const CeedInt num_mat = ctx[1 + num_attr].first; - return ctx + 2 + num_attr + 6 * num_mat; -} - -#endif // PALACE_LIBCEED_COEFF_QF_H diff --git a/palace/fem/qfunctions/geom_qf.h b/palace/fem/qfunctions/geom_qf.h index 8bcb85312..dfbc33817 100644 --- a/palace/fem/qfunctions/geom_qf.h +++ b/palace/fem/qfunctions/geom_qf.h @@ -4,9 +4,6 @@ #ifndef PALACE_LIBCEED_GEOM_QF_H #define PALACE_LIBCEED_GEOM_QF_H -#include "utils_geom_qf.h" -#include "utils_qf.h" - // libCEED QFunction for building geometry factors for integration and transformations. // At every quadrature point, compute qw * det(J) and adj(J)^T / |J| and store the result. // in[0] is quadrature weights, shape [Q] @@ -14,93 +11,9 @@ // out[0] is quadrature data, stored as {attribute, Jacobian determinant, (transpose) // adjugate Jacobian} quadrature data, shape [ncomp=2+space_dim*dim, Q] -CEED_QFUNCTION(f_build_geom_factor_22)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *qw = in[0], *J = in[1]; - CeedScalar *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar J_loc[4], adjJt_loc[4]; - MatUnpack22(J + i, Q, J_loc); - const CeedScalar detJ = AdjJt22(J_loc, adjJt_loc); - - wdetJ[i] = qw[i] * detJ; - adjJt[i + Q * 0] = adjJt_loc[0] / detJ; - adjJt[i + Q * 1] = adjJt_loc[1] / detJ; - adjJt[i + Q * 2] = adjJt_loc[2] / detJ; - adjJt[i + Q * 3] = adjJt_loc[3] / detJ; - } - return 0; -} - -CEED_QFUNCTION(f_build_geom_factor_33)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *qw = in[0], *J = in[1]; - CeedScalar *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar J_loc[9], adjJt_loc[9]; - MatUnpack33(J + i, Q, J_loc); - const CeedScalar detJ = AdjJt33(J_loc, adjJt_loc); - - wdetJ[i] = qw[i] * detJ; - adjJt[i + Q * 0] = adjJt_loc[0] / detJ; - adjJt[i + Q * 1] = adjJt_loc[1] / detJ; - adjJt[i + Q * 2] = adjJt_loc[2] / detJ; - adjJt[i + Q * 3] = adjJt_loc[3] / detJ; - adjJt[i + Q * 4] = adjJt_loc[4] / detJ; - adjJt[i + Q * 5] = adjJt_loc[5] / detJ; - adjJt[i + Q * 6] = adjJt_loc[6] / detJ; - adjJt[i + Q * 7] = adjJt_loc[7] / detJ; - adjJt[i + Q * 8] = adjJt_loc[8] / detJ; - } - return 0; -} - -CEED_QFUNCTION(f_build_geom_factor_21)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *qw = in[0], *J = in[1]; - CeedScalar *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar J_loc[2], adjJt_loc[2]; - MatUnpack21(J + i, Q, J_loc); - const CeedScalar detJ = AdjJt21(J_loc, adjJt_loc); - - wdetJ[i] = qw[i] * detJ; - adjJt[i + Q * 0] = adjJt_loc[0] / detJ; - adjJt[i + Q * 1] = adjJt_loc[1] / detJ; - } - return 0; -} - -CEED_QFUNCTION(f_build_geom_factor_32)(void *, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *qw = in[0], *J = in[1]; - CeedScalar *wdetJ = out[0] + Q, *adjJt = out[0] + 2 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar J_loc[6], adjJt_loc[6]; - MatUnpack32(J + i, Q, J_loc); - const CeedScalar detJ = AdjJt32(J_loc, adjJt_loc); - - wdetJ[i] = qw[i] * detJ; - adjJt[i + Q * 0] = adjJt_loc[0] / detJ; - adjJt[i + Q * 1] = adjJt_loc[1] / detJ; - adjJt[i + Q * 2] = adjJt_loc[2] / detJ; - adjJt[i + Q * 3] = adjJt_loc[3] / detJ; - adjJt[i + Q * 4] = adjJt_loc[4] / detJ; - adjJt[i + Q * 5] = adjJt_loc[5] / detJ; - } - return 0; -} +#include "21/geom_21_qf.h" +#include "22/geom_22_qf.h" +#include "32/geom_32_qf.h" +#include "33/geom_33_qf.h" #endif // PALACE_LIBCEED_GEOM_QF_H diff --git a/palace/fem/qfunctions/h1_build_qf.h b/palace/fem/qfunctions/h1_build_qf.h deleted file mode 100644 index 0d7fe5ac8..000000000 --- a/palace/fem/qfunctions/h1_build_qf.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_H1_BUILD_QF_H -#define PALACE_LIBCEED_H1_BUILD_QF_H - -#include "coeff_qf.h" - -// Build functions assemble the quadrature point data, stored as a symmetric matrix. - -CEED_QFUNCTION(f_build_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - qd[i] = coeff * wdetJ[i]; - } - return 0; -} - -CEED_QFUNCTION(f_build_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - - qd[i + Q * 0] = wdetJ[i] * coeff[0]; - qd[i + Q * 1] = wdetJ[i] * coeff[1]; - qd[i + Q * 2] = wdetJ[i] * coeff[2]; - } - return 0; -} - -CEED_QFUNCTION(f_build_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - - qd[i + Q * 0] = wdetJ[i] * coeff[0]; - qd[i + Q * 1] = wdetJ[i] * coeff[1]; - qd[i + Q * 2] = wdetJ[i] * coeff[2]; - qd[i + Q * 3] = wdetJ[i] * coeff[3]; - qd[i + Q * 4] = wdetJ[i] * coeff[4]; - qd[i + Q * 5] = wdetJ[i] * coeff[5]; - } - return 0; -} - -#endif // PALACE_LIBCEED_H1_BUILD_QF_H diff --git a/palace/fem/qfunctions/h1_qf.h b/palace/fem/qfunctions/h1_qf.h index 43ea7754a..8de7cf6b2 100644 --- a/palace/fem/qfunctions/h1_qf.h +++ b/palace/fem/qfunctions/h1_qf.h @@ -4,66 +4,18 @@ #ifndef PALACE_LIBCEED_H1_QF_H #define PALACE_LIBCEED_H1_QF_H -#include "coeff_qf.h" - // libCEED QFunctions for H1 operators (Piola transformation u = ̂u). // in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q] // in[1] is active vector, shape [ncomp=vdim, Q] // out[0] is active vector, shape [ncomp=vdim, Q] -CEED_QFUNCTION(f_apply_h1_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - v[i] = coeff * wdetJ[i] * u[i]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_h1_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1); - v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[2] * u1); - } - return 0; -} - -CEED_QFUNCTION(f_apply_h1_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); +// Build functions assemble the quadrature point data, stored as a symmetric matrix. - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; - v[i + Q * 0] = wdetJ[i] * (coeff[0] * u0 + coeff[1] * u1 + coeff[2] * u2); - v[i + Q * 1] = wdetJ[i] * (coeff[1] * u0 + coeff[3] * u1 + coeff[4] * u2); - v[i + Q * 2] = wdetJ[i] * (coeff[2] * u0 + coeff[4] * u1 + coeff[5] * u2); - } - return 0; -} +#include "1/h1_1_qf.h" +#include "1/h1_build_1_qf.h" +#include "2/h1_2_qf.h" +#include "2/h1_build_2_qf.h" +#include "3/h1_3_qf.h" +#include "3/h1_build_3_qf.h" #endif // PALACE_LIBCEED_H1_QF_H diff --git a/palace/fem/qfunctions/hcurl_build_qf.h b/palace/fem/qfunctions/hcurl_build_qf.h deleted file mode 100644 index bc25c1777..000000000 --- a/palace/fem/qfunctions/hcurl_build_qf.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_HCURL_BUILD_QF_H -#define PALACE_LIBCEED_HCURL_BUILD_QF_H - -#include "coeff_qf.h" -#include "utils_qf.h" - -// Build functions assemble the quadrature point data, stored as a symmetric matrix. - -CEED_QFUNCTION(f_build_hcurl_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[4], qd_loc[3]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - MultAtBA22(adjJt_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurl_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[9], qd_loc[6]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - MultAtBA33(adjJt_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurl_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[2], qd_loc[1]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - MultAtBA21(adjJt_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurl_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[6], qd_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - MultAtBA32(adjJt_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - return 0; -} - -#endif // PALACE_LIBCEED_HCURL_BUILD_QF_H diff --git a/palace/fem/qfunctions/hcurl_qf.h b/palace/fem/qfunctions/hcurl_qf.h index cced70851..b01ea1e13 100644 --- a/palace/fem/qfunctions/hcurl_qf.h +++ b/palace/fem/qfunctions/hcurl_qf.h @@ -4,92 +4,20 @@ #ifndef PALACE_LIBCEED_HCURL_QF_H #define PALACE_LIBCEED_HCURL_QF_H -#include "coeff_qf.h" -#include "utils_qf.h" - // libCEED QFunctions for H(curl) operators (Piola transformation u = adj(J)^T / det(J) ̂u). // in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q] // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q] // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q] -CEED_QFUNCTION(f_apply_hcurl_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[3], adjJt_loc[4], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurl_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - v[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurl_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[1] = {u[i + Q * 0]}; - CeedScalar coeff[3], adjJt_loc[2], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - MultAtBCx21(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurl_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[6], adjJt_loc[6], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); +// Build functions assemble the quadrature point data, stored as a symmetric matrix. - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} +#include "21/hcurl_21_qf.h" +#include "21/hcurl_build_21_qf.h" +#include "22/hcurl_22_qf.h" +#include "22/hcurl_build_22_qf.h" +#include "32/hcurl_32_qf.h" +#include "32/hcurl_build_32_qf.h" +#include "33/hcurl_33_qf.h" +#include "33/hcurl_build_33_qf.h" #endif // PALACE_LIBCEED_HCURL_QF_H diff --git a/palace/fem/qfunctions/hcurlh1d_build_qf.h b/palace/fem/qfunctions/hcurlh1d_build_qf.h deleted file mode 100644 index 15dc8517a..000000000 --- a/palace/fem/qfunctions/hcurlh1d_build_qf.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_HCURL_H1D_BUILD_QF_H -#define PALACE_LIBCEED_HCURL_H1D_BUILD_QF_H - -#include "coeff_qf.h" -#include "utils_qf.h" - -// Build functions replace active vector output with quadrature point data and remove active -// vector input. - -CEED_QFUNCTION(f_build_hcurlh1d_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[4], qd_loc[4]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - MultBA22(adjJt_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlh1d_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[9], qd_loc[9]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - MultBA33(adjJt_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; - qd[i + Q * 6] = wdetJ[i] * qd_loc[6]; - qd[i + Q * 7] = wdetJ[i] * qd_loc[7]; - qd[i + Q * 8] = wdetJ[i] * qd_loc[8]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlh1d_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[2], qd_loc[1]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - MultBA21(adjJt_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlh1d_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[6], qd_loc[4]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - MultBA32(adjJt_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - } - return 0; -} - -#endif // PALACE_LIBCEED_HCURL_H1D_BUILD_QF_H diff --git a/palace/fem/qfunctions/hcurlh1d_qf.h b/palace/fem/qfunctions/hcurlh1d_qf.h index 71d2f0f16..e22354a6a 100644 --- a/palace/fem/qfunctions/hcurlh1d_qf.h +++ b/palace/fem/qfunctions/hcurlh1d_qf.h @@ -4,93 +4,21 @@ #ifndef PALACE_LIBCEED_HCURL_H1D_QF_H #define PALACE_LIBCEED_HCURL_H1D_QF_H -#include "coeff_qf.h" -#include "utils_qf.h" - // libCEED QFunctions for mixed H(curl)-(H1)ᵈ operators (Piola transformation u = // adj(J)^T / det(J) ̂u and u = ̂u) // in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q] // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q] // out[0] is active vector, shape [ncomp=space_dim, Q] -CEED_QFUNCTION(f_apply_hcurlh1d_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[3], adjJt_loc[4], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - MultBAx22(adjJt_loc, coeff, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlh1d_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - MultBAx33(adjJt_loc, coeff, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - v[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlh1d_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[1] = {u[i + Q * 0]}; - CeedScalar coeff[3], adjJt_loc[2], v_loc[1]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - MultBAx21(adjJt_loc, coeff, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlh1d_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[6], adjJt_loc[6], v_loc[2]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - MultBAx32(adjJt_loc, coeff, u_loc, v_loc); +// Build functions assemble the quadrature point data. - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} +#include "21/hcurlh1d_21_qf.h" +#include "21/hcurlh1d_build_21_qf.h" +#include "22/hcurlh1d_22_qf.h" +#include "22/hcurlh1d_build_22_qf.h" +#include "32/hcurlh1d_32_qf.h" +#include "32/hcurlh1d_build_32_qf.h" +#include "33/hcurlh1d_33_qf.h" +#include "33/hcurlh1d_build_33_qf.h" #endif // PALACE_LIBCEED_HCURL_H1D_QF_H diff --git a/palace/fem/qfunctions/hcurlhdiv_build_qf.h b/palace/fem/qfunctions/hcurlhdiv_build_qf.h deleted file mode 100644 index 8a5f2b876..000000000 --- a/palace/fem/qfunctions/hcurlhdiv_build_qf.h +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_HCURL_HDIV_BUILD_QF_H -#define PALACE_LIBCEED_HCURL_HDIV_BUILD_QF_H - -#include "coeff_qf.h" -#include "utils_geom_qf.h" -#include "utils_qf.h" - -// Build functions replace active vector output with quadrature point data and remove active -// vector input. - -CEED_QFUNCTION(f_build_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[4]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - AdjJt22(adjJt_loc, J_loc); - MultAtBC22(J_loc, coeff, adjJt_loc, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlhdiv_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[9]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBC33(J_loc, coeff, adjJt_loc, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; - qd[i + Q * 6] = wdetJ[i] * qd_loc[6]; - qd[i + Q * 7] = wdetJ[i] * qd_loc[7]; - qd[i + Q * 8] = wdetJ[i] * qd_loc[8]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlhdiv_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - AdjJt21(adjJt_loc, J_loc); - MultAtBC21(J_loc, coeff, adjJt_loc, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlhdiv_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[4]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - AdjJt32(adjJt_loc, J_loc); - MultAtBC32(J_loc, coeff, adjJt_loc, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[4]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - AdjJt22(adjJt_loc, J_loc); - MultAtBC22(adjJt_loc, coeff, J_loc, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hdivhcurl_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[9]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBC33(adjJt_loc, coeff, J_loc, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; - qd[i + Q * 6] = wdetJ[i] * qd_loc[6]; - qd[i + Q * 7] = wdetJ[i] * qd_loc[7]; - qd[i + Q * 8] = wdetJ[i] * qd_loc[8]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hdivhcurl_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - AdjJt21(adjJt_loc, J_loc); - MultAtBC21(adjJt_loc, coeff, J_loc, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hdivhcurl_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[4]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - AdjJt32(adjJt_loc, J_loc); - MultAtBC32(adjJt_loc, coeff, J_loc, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - } - return 0; -} - -#endif // PALACE_LIBCEED_HCURL_HDIV_BUILD_QF_H diff --git a/palace/fem/qfunctions/hcurlhdiv_qf.h b/palace/fem/qfunctions/hcurlhdiv_qf.h index 38bca163c..4658a456f 100644 --- a/palace/fem/qfunctions/hcurlhdiv_qf.h +++ b/palace/fem/qfunctions/hcurlhdiv_qf.h @@ -4,10 +4,6 @@ #ifndef PALACE_LIBCEED_HCURL_HDIV_QF_H #define PALACE_LIBCEED_HCURL_HDIV_QF_H -#include "coeff_qf.h" -#include "utils_geom_qf.h" -#include "utils_qf.h" - // libCEED QFunctions for mixed H(curl)-H(div) operators (Piola transformations u = // adj(J)^T / det(J) ̂u and u = J / det(J) ̂u). // Note: J / det(J) = adj(adj(J)^T / det(J))^T @@ -15,172 +11,16 @@ // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q] // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q] -CEED_QFUNCTION(f_apply_hcurlhdiv_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - AdjJt22(adjJt_loc, J_loc); - MultAtBCx22(J_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlhdiv_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBCx33(J_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - v[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlhdiv_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[1] = {u[i + Q * 0]}; - CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - AdjJt21(adjJt_loc, J_loc); - MultAtBCx21(J_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlhdiv_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - AdjJt32(adjJt_loc, J_loc); - MultAtBCx32(J_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdivhcurl_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - AdjJt22(adjJt_loc, J_loc); - MultAtBCx22(adjJt_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdivhcurl_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBCx33(adjJt_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - v[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdivhcurl_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[1] = {u[i + Q * 0]}; - CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - AdjJt21(adjJt_loc, J_loc); - MultAtBCx21(adjJt_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdivhcurl_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - AdjJt32(adjJt_loc, J_loc); - MultAtBCx32(adjJt_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} +// Build functions assemble the quadrature point data, stored as a symmetric matrix where +// possible. + +#include "21/hcurlhdiv_21_qf.h" +#include "21/hcurlhdiv_build_21_qf.h" +#include "22/hcurlhdiv_22_qf.h" +#include "22/hcurlhdiv_build_22_qf.h" +#include "32/hcurlhdiv_32_qf.h" +#include "32/hcurlhdiv_build_32_qf.h" +#include "33/hcurlhdiv_33_qf.h" +#include "33/hcurlhdiv_build_33_qf.h" #endif // PALACE_LIBCEED_HCURL_HDIV_QF_H diff --git a/palace/fem/qfunctions/hcurlmass_build_qf.h b/palace/fem/qfunctions/hcurlmass_build_qf.h deleted file mode 100644 index 50b63125d..000000000 --- a/palace/fem/qfunctions/hcurlmass_build_qf.h +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_HCURL_MASS_BUILD_QF_H -#define PALACE_LIBCEED_HCURL_MASS_BUILD_QF_H - -#include "coeff_qf.h" -#include "utils_qf.h" - -// Build functions assemble the quadrature point data, stored as a symmetric matrix. - -CEED_QFUNCTION(f_build_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - qd1[i + Q * 0] = coeff * wdetJ[i]; - } - { - CeedScalar coeff[3], adjJt_loc[4], qd_loc[3]; - CoeffUnpack2(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - MultAtBA22(adjJt_loc, coeff, qd_loc); - - qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd2[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd2[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - qd1[i + Q * 0] = coeff * wdetJ[i]; - } - { - CeedScalar coeff[6], adjJt_loc[9], qd_loc[6]; - CoeffUnpack3(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - MultAtBA33(adjJt_loc, coeff, qd_loc); - - qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd2[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd2[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd2[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd2[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd2[i + Q * 5] = wdetJ[i] * qd_loc[5]; - } - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlmass_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - qd1[i + Q * 0] = coeff * wdetJ[i]; - } - { - CeedScalar coeff[3], adjJt_loc[2], qd_loc[1]; - CoeffUnpack2(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - MultAtBA21(adjJt_loc, coeff, qd_loc); - - qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; - } - } - return 0; -} - -CEED_QFUNCTION(f_build_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - qd1[i + Q * 0] = coeff * wdetJ[i]; - } - { - CeedScalar coeff[6], adjJt_loc[6], qd_loc[3]; - CoeffUnpack3(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - MultAtBA32(adjJt_loc, coeff, qd_loc); - - qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd2[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd2[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - } - return 0; -} - -#endif // PALACE_LIBCEED_HCURL_MASS_BUILD_QF_H diff --git a/palace/fem/qfunctions/hcurlmass_qf.h b/palace/fem/qfunctions/hcurlmass_qf.h index cab010148..a28eebe7c 100644 --- a/palace/fem/qfunctions/hcurlmass_qf.h +++ b/palace/fem/qfunctions/hcurlmass_qf.h @@ -4,9 +4,6 @@ #ifndef PALACE_LIBCEED_HCURL_MASS_QF_H #define PALACE_LIBCEED_HCURL_MASS_QF_H -#include "coeff_qf.h" -#include "utils_qf.h" - // libCEED QFunctions for H(curl) + H1 mass operators (Piola transformation u = // adj(J)^T / det(J) ̂u and u = ̂u). // in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q] @@ -15,116 +12,15 @@ // out[0] is active vector, shape [ncomp=1, Q] // out[1] is active vector gradient, shape [qcomp=dim, ncomp=1, Q] -CEED_QFUNCTION(f_apply_hcurlmass_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], - *gradu = in[2]; - CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - v[i] = coeff * wdetJ[i] * u[i]; - } - { - const CeedScalar u_loc[2] = {gradu[i + Q * 0], gradu[i + Q * 1]}; - CeedScalar coeff[3], adjJt_loc[4], v_loc[2]; - CoeffUnpack2(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - gradv[i + Q * 0] = wdetJ[i] * v_loc[0]; - gradv[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlmass_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], - *gradu = in[2]; - CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - v[i] = coeff * wdetJ[i] * u[i]; - } - { - const CeedScalar u_loc[3] = {gradu[i + Q * 0], gradu[i + Q * 1], gradu[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], v_loc[3]; - CoeffUnpack3(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - gradv[i + Q * 0] = wdetJ[i] * v_loc[0]; - gradv[i + Q * 1] = wdetJ[i] * v_loc[1]; - gradv[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlmass_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], - *gradu = in[2]; - CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - v[i] = coeff * wdetJ[i] * u[i]; - } - { - const CeedScalar u_loc[1] = {gradu[i + Q * 0]}; - CeedScalar coeff[3], adjJt_loc[2], v_loc[2]; - CoeffUnpack2(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - MultAtBCx21(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - gradv[i + Q * 0] = wdetJ[i] * v_loc[0]; - } - } - return 0; -} - -CEED_QFUNCTION(f_apply_hcurlmass_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], - *gradu = in[2]; - CeedScalar *__restrict__ v = out[0], *__restrict__ gradv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - v[i] = coeff * wdetJ[i] * u[i]; - } - { - const CeedScalar u_loc[2] = {gradu[i + Q * 0], gradu[i + Q * 1]}; - CeedScalar coeff[6], adjJt_loc[6], v_loc[3]; - CoeffUnpack3(CoeffPairSecond1((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); +// Build functions assemble the quadrature point data, stored as a symmetric matrix. - gradv[i + Q * 0] = wdetJ[i] * v_loc[0]; - gradv[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - } - return 0; -} +#include "21/hcurlmass_21_qf.h" +#include "21/hcurlmass_build_21_qf.h" +#include "22/hcurlmass_22_qf.h" +#include "22/hcurlmass_build_22_qf.h" +#include "32/hcurlmass_32_qf.h" +#include "32/hcurlmass_build_32_qf.h" +#include "33/hcurlmass_33_qf.h" +#include "33/hcurlmass_build_33_qf.h" #endif // PALACE_LIBCEED_HCURL_MASS_QF_H diff --git a/palace/fem/qfunctions/hdiv_build_qf.h b/palace/fem/qfunctions/hdiv_build_qf.h deleted file mode 100644 index 6da85ce42..000000000 --- a/palace/fem/qfunctions/hdiv_build_qf.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_HDIV_BUILD_QF_H -#define PALACE_LIBCEED_HDIV_BUILD_QF_H - -#include "coeff_qf.h" -#include "utils_geom_qf.h" -#include "utils_qf.h" - -// Build functions assemble the quadrature point data, stored as a symmetric matrix. - -CEED_QFUNCTION(f_build_hdiv_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[3]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - AdjJt22(adjJt_loc, J_loc); - MultAtBA22(J_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hdiv_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBA33(J_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd[i + Q * 5] = wdetJ[i] * qd_loc[5]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hdiv_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - AdjJt21(adjJt_loc, J_loc); - MultAtBA21(J_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_build_hdiv_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - AdjJt32(adjJt_loc, J_loc); - MultAtBA32(J_loc, coeff, qd_loc); - - qd[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - return 0; -} - -#endif // PALACE_LIBCEED_HDIV_BUILD_QF_H diff --git a/palace/fem/qfunctions/hdiv_qf.h b/palace/fem/qfunctions/hdiv_qf.h index 47ef5abf4..cbce03869 100644 --- a/palace/fem/qfunctions/hdiv_qf.h +++ b/palace/fem/qfunctions/hdiv_qf.h @@ -4,98 +4,21 @@ #ifndef PALACE_LIBCEED_HDIV_QF_H #define PALACE_LIBCEED_HDIV_QF_H -#include "coeff_qf.h" -#include "utils_geom_qf.h" -#include "utils_qf.h" - // libCEED QFunctions for H(div) operators (Piola transformation u = J / det(J) ̂u). // Note: J / det(J) = adj(adj(J)^T / det(J))^T // in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q] // in[1] is active vector, shape [qcomp=dim, ncomp=1, Q] // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q] -CEED_QFUNCTION(f_apply_hdiv_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - AdjJt22(adjJt_loc, J_loc); - MultAtBCx22(J_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdiv_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - v[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdiv_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[1] = {u[i + Q * 0]}; - CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - AdjJt21(adjJt_loc, J_loc); - MultAtBCx21(J_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdiv_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - AdjJt32(adjJt_loc, J_loc); - MultAtBCx32(J_loc, coeff, J_loc, u_loc, v_loc); +// Build functions assemble the quadrature point data, stored as a symmetric matrix. - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - return 0; -} +#include "21/hdiv_21_qf.h" +#include "21/hdiv_build_21_qf.h" +#include "22/hdiv_22_qf.h" +#include "22/hdiv_build_22_qf.h" +#include "32/hdiv_32_qf.h" +#include "32/hdiv_build_32_qf.h" +#include "33/hdiv_33_qf.h" +#include "33/hdiv_build_33_qf.h" #endif // PALACE_LIBCEED_HDIV_QF_H diff --git a/palace/fem/qfunctions/hdivmass_build_qf.h b/palace/fem/qfunctions/hdivmass_build_qf.h deleted file mode 100644 index e802638ab..000000000 --- a/palace/fem/qfunctions/hdivmass_build_qf.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_HDIV_MASS_BUILD_QF_H -#define PALACE_LIBCEED_HDIV_MASS_BUILD_QF_H - -#include "coeff_qf.h" -#include "utils_geom_qf.h" -#include "utils_qf.h" - -// Build functions assemble the quadrature point data, stored as a symmetric matrix. - -CEED_QFUNCTION(f_build_hdivmass_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; - CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - CeedScalar coeff[3], adjJt_loc[4], qd_loc[3]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - MultAtBA22(adjJt_loc, coeff, qd_loc); - - qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; - } - } - return 0; -} - -CEED_QFUNCTION(f_build_hdivmass_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q; - CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 6 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - CeedScalar coeff[6], adjJt_loc[9], qd_loc[6]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - MultAtBA33(adjJt_loc, coeff, qd_loc); - - qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd1[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd1[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd1[i + Q * 5] = wdetJ[i] * qd_loc[5]; - } - { - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6]; - CoeffUnpack3(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBA33(J_loc, coeff, qd_loc); - - qd2[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd2[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd2[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd2[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd2[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd2[i + Q * 5] = wdetJ[i] * qd_loc[5]; - } - } - return 0; -} - -CEED_QFUNCTION(f_build_hdivmass_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; - CeedScalar *__restrict__ qd1 = out[0], *__restrict__ qd2 = out[0] + 3 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - CeedScalar coeff[6], adjJt_loc[6], qd_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - MultAtBA32(adjJt_loc, coeff, qd_loc); - - qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; - } - } - return 0; -} - -#endif // PALACE_LIBCEED_CURLCURL_MASS_BUILD_QF_H diff --git a/palace/fem/qfunctions/hdivmass_qf.h b/palace/fem/qfunctions/hdivmass_qf.h index 0b4d08e88..181dd7b8c 100644 --- a/palace/fem/qfunctions/hdivmass_qf.h +++ b/palace/fem/qfunctions/hdivmass_qf.h @@ -4,10 +4,6 @@ #ifndef PALACE_LIBCEED_HDIV_MASS_QF_H #define PALACE_LIBCEED_HDIV_MASS_QF_H -#include "coeff_qf.h" -#include "utils_geom_qf.h" -#include "utils_qf.h" - // libCEED QFunctions for H(div) + H(curl) mass operators in 3D (Piola transformations u = // J / det(J) ̂u and u = adj(J)^T / det(J) ̂u). // Note: J / det(J) = adj(adj(J)^T / det(J))^T @@ -26,98 +22,13 @@ // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q] // out[1] is active vector curl, shape [ncomp=1, Q] -CEED_QFUNCTION(f_apply_hdivmass_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], - *u = in[2], *curlu = in[3]; - CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[3], adjJt_loc[4], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - MultAtBCx22(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - curlv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * curlu[i]; - } - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdivmass_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *u = in[1], - *curlu = in[2]; - CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - MultAtBCx33(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - v[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - { - const CeedScalar u_loc[3] = {curlu[i + Q * 0], curlu[i + Q * 1], curlu[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; - CoeffUnpack3(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc); - - curlv[i + Q * 0] = wdetJ[i] * v_loc[0]; - curlv[i + Q * 1] = wdetJ[i] * v_loc[1]; - curlv[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - } - return 0; -} - -CEED_QFUNCTION(f_apply_hdivmass_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], - *u = in[2], *curlu = in[3]; - CeedScalar *__restrict__ v = out[0], *__restrict__ curlv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[6], adjJt_loc[6], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - MultAtBCx32(adjJt_loc, coeff, adjJt_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]); +// Build functions assemble the quadrature point data, stored as a symmetric matrix. - curlv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * curlu[i]; - } - } - return 0; -} +#include "22/hdivmass_22_qf.h" +#include "22/hdivmass_build_22_qf.h" +#include "32/hdivmass_32_qf.h" +#include "32/hdivmass_build_32_qf.h" +#include "33/hdivmass_33_qf.h" +#include "33/hdivmass_build_33_qf.h" -#endif // PALACE_LIBCEED_CURLCURL_MASS_QF_H +#endif // PALACE_LIBCEED_HDIV_MASS_QF_H diff --git a/palace/fem/qfunctions/l2_build_qf.h b/palace/fem/qfunctions/l2_build_qf.h deleted file mode 100644 index 0f27ce70a..000000000 --- a/palace/fem/qfunctions/l2_build_qf.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_L2_BUILD_QF_H -#define PALACE_LIBCEED_L2_BUILD_QF_H - -#include "coeff_qf.h" - -// Build functions assemble the quadrature point data, stored as a symmetric matrix. - -CEED_QFUNCTION(f_build_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1]; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - qd[i] = coeff * qw[i] * qw[i] / wdetJ[i]; - } - return 0; -} - -CEED_QFUNCTION(f_build_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1]; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - const CeedScalar w = qw[i] * qw[i] / wdetJ[i]; - - qd[i + Q * 0] = w * coeff[0]; - qd[i + Q * 1] = w * coeff[1]; - qd[i + Q * 2] = w * coeff[2]; - } - return 0; -} - -CEED_QFUNCTION(f_build_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1]; - CeedScalar *qd = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - const CeedScalar w = qw[i] * qw[i] / wdetJ[i]; - - qd[i + Q * 0] = w * coeff[0]; - qd[i + Q * 1] = w * coeff[1]; - qd[i + Q * 2] = w * coeff[2]; - qd[i + Q * 3] = w * coeff[3]; - qd[i + Q * 4] = w * coeff[4]; - qd[i + Q * 5] = w * coeff[5]; - } - return 0; -} - -#endif // PALACE_LIBCEED_L2_BUILD_QF_H diff --git a/palace/fem/qfunctions/l2_qf.h b/palace/fem/qfunctions/l2_qf.h index 071ee3371..6c8072a43 100644 --- a/palace/fem/qfunctions/l2_qf.h +++ b/palace/fem/qfunctions/l2_qf.h @@ -4,69 +4,19 @@ #ifndef PALACE_LIBCEED_L2_QF_H #define PALACE_LIBCEED_L2_QF_H -#include "coeff_qf.h" - // libCEED QFunctions for L2 operators (Piola transformation u = 1 / det(J) ̂u). // in[0] is geometry quadrature data, shape [ncomp=2+space_dim*dim, Q] // in[1] is quadrature weights, shape [Q] // in[2] is active vector, shape [ncomp=vdim, Q] // out[0] is active vector, shape [ncomp=vdim, Q] -CEED_QFUNCTION(f_apply_l2_1)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar coeff = CoeffUnpack1((const CeedIntScalar *)ctx, (CeedInt)attr[i]); - - v[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * u[i]; - } - return 0; -} - -CEED_QFUNCTION(f_apply_l2_2)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[3]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - const CeedScalar w = qw[i] * qw[i] / wdetJ[i]; - - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1); - v[i + Q * 1] = w * (coeff[1] * u0 + coeff[2] * u1); - } - return 0; -} - -CEED_QFUNCTION(f_apply_l2_3)(void *__restrict__ ctx, CeedInt Q, const CeedScalar *const *in, - CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *qw = in[1], *u = in[2]; - CeedScalar *v = out[0]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - CeedScalar coeff[6]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - const CeedScalar w = qw[i] * qw[i] / wdetJ[i]; +// Build functions assemble the quadrature point data, stored as a symmetric matrix. - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; - v[i + Q * 0] = w * (coeff[0] * u0 + coeff[1] * u1 + coeff[2] * u2); - v[i + Q * 1] = w * (coeff[1] * u0 + coeff[3] * u1 + coeff[4] * u2); - v[i + Q * 2] = w * (coeff[2] * u0 + coeff[4] * u1 + coeff[5] * u2); - } - return 0; -} +#include "1/l2_1_qf.h" +#include "1/l2_build_1_qf.h" +#include "2/l2_2_qf.h" +#include "2/l2_build_2_qf.h" +#include "3/l2_3_qf.h" +#include "3/l2_build_3_qf.h" #endif // PALACE_LIBCEED_L2_QF_H diff --git a/palace/fem/qfunctions/l2mass_build_qf.h b/palace/fem/qfunctions/l2mass_build_qf.h deleted file mode 100644 index aec894643..000000000 --- a/palace/fem/qfunctions/l2mass_build_qf.h +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_L2_MASS_BUILD_QF_H -#define PALACE_LIBCEED_L2_MASS_BUILD_QF_H - -#include "coeff_qf.h" -#include "utils_geom_qf.h" -#include "utils_qf.h" - -// Build functions assemble the quadrature point data, stored as a symmetric matrix. - -CEED_QFUNCTION(f_build_l2mass_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; - CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - CeedScalar coeff[3], adjJt_loc[4], J_loc[4], qd_loc[3]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - AdjJt22(adjJt_loc, J_loc); - MultAtBA22(J_loc, coeff, qd_loc); - - qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; - } - } - return 0; -} - -CEED_QFUNCTION(f_build_l2mass_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; - CeedScalar *qd1 = out[0], *qd2 = out[0] + 6 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], qd_loc[6]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBA33(J_loc, coeff, qd_loc); - - qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; - qd1[i + Q * 3] = wdetJ[i] * qd_loc[3]; - qd1[i + Q * 4] = wdetJ[i] * qd_loc[4]; - qd1[i + Q * 5] = wdetJ[i] * qd_loc[5]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; - } - } - return 0; -} - -CEED_QFUNCTION(f_build_l2mass_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; - CeedScalar *qd1 = out[0], *qd2 = out[0] + Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - CeedScalar coeff[3], adjJt_loc[2], J_loc[2], qd_loc[1]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - AdjJt21(adjJt_loc, J_loc); - MultAtBA21(J_loc, coeff, qd_loc); - - qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; - } - } - return 0; -} - -CEED_QFUNCTION(f_build_l2mass_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1]; - CeedScalar *qd1 = out[0], *qd2 = out[0] + 3 * Q; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - CeedScalar coeff[6], adjJt_loc[6], J_loc[6], qd_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - AdjJt32(adjJt_loc, J_loc); - MultAtBA32(J_loc, coeff, qd_loc); - - qd1[i + Q * 0] = wdetJ[i] * qd_loc[0]; - qd1[i + Q * 1] = wdetJ[i] * qd_loc[1]; - qd1[i + Q * 2] = wdetJ[i] * qd_loc[2]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - qd2[i] = coeff * qw[i] * qw[i] / wdetJ[i]; - } - } - return 0; -} - -#endif // PALACE_LIBCEED_L2_MASS_BUILD_QF_H diff --git a/palace/fem/qfunctions/l2mass_qf.h b/palace/fem/qfunctions/l2mass_qf.h index b6e16521c..d37672c0f 100644 --- a/palace/fem/qfunctions/l2mass_qf.h +++ b/palace/fem/qfunctions/l2mass_qf.h @@ -4,10 +4,6 @@ #ifndef PALACE_LIBCEED_L2_MASS_QF_H #define PALACE_LIBCEED_L2_MASS_QF_H -#include "coeff_qf.h" -#include "utils_geom_qf.h" -#include "utils_qf.h" - // libCEED QFunctions for L2 + H(div) mass operators (Piola transformations u = 1 / det(J) ̂u // and u = J / det(J) ̂u). // Note: J / det(J) = adj(adj(J)^T / det(J))^T @@ -18,124 +14,15 @@ // out[0] is active vector, shape [qcomp=dim, ncomp=1, Q] // out[1] is active vector divergence, shape [ncomp=1, Q] -CEED_QFUNCTION(f_apply_l2mass_22)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], - *u = in[2], *divu = in[3]; - CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[3], adjJt_loc[4], J_loc[4], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack22(adjJt + i, Q, adjJt_loc); - AdjJt22(adjJt_loc, J_loc); - MultAtBCx22(J_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i]; - } - } - return 0; -} - -CEED_QFUNCTION(f_apply_l2mass_33)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], - *u = in[2], *divu = in[3]; - CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar u_loc[3] = {u[i + Q * 0], u[i + Q * 1], u[i + Q * 2]}; - CeedScalar coeff[6], adjJt_loc[9], J_loc[9], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack33(adjJt + i, Q, adjJt_loc); - AdjJt33(adjJt_loc, J_loc); - MultAtBCx33(J_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - v[i + Q * 2] = wdetJ[i] * v_loc[2]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i]; - } - } - return 0; -} - -CEED_QFUNCTION(f_apply_l2mass_21)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], - *u = in[2], *divu = in[3]; - CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar u_loc[1] = {u[i + Q * 0]}; - CeedScalar coeff[3], adjJt_loc[2], J_loc[2], v_loc[2]; - CoeffUnpack2((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack21(adjJt + i, Q, adjJt_loc); - AdjJt21(adjJt_loc, J_loc); - MultAtBCx21(J_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond2((const CeedIntScalar *)ctx), (CeedInt)attr[i]); - - divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i]; - } - } - return 0; -} - -CEED_QFUNCTION(f_apply_l2mass_32)(void *__restrict__ ctx, CeedInt Q, - const CeedScalar *const *in, CeedScalar *const *out) -{ - const CeedScalar *attr = in[0], *wdetJ = in[0] + Q, *adjJt = in[0] + 2 * Q, *qw = in[1], - *u = in[2], *divu = in[3]; - CeedScalar *__restrict__ v = out[0], *__restrict__ divv = out[1]; - - CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - { - const CeedScalar u_loc[2] = {u[i + Q * 0], u[i + Q * 1]}; - CeedScalar coeff[6], adjJt_loc[6], J_loc[6], v_loc[3]; - CoeffUnpack3((const CeedIntScalar *)ctx, (CeedInt)attr[i], coeff); - MatUnpack32(adjJt + i, Q, adjJt_loc); - AdjJt32(adjJt_loc, J_loc); - MultAtBCx32(J_loc, coeff, J_loc, u_loc, v_loc); - - v[i + Q * 0] = wdetJ[i] * v_loc[0]; - v[i + Q * 1] = wdetJ[i] * v_loc[1]; - } - { - const CeedScalar coeff = - CoeffUnpack1(CoeffPairSecond3((const CeedIntScalar *)ctx), (CeedInt)attr[i]); +// Build functions assemble the quadrature point data, stored as a symmetric matrix. - divv[i] = (coeff * qw[i] * qw[i] / wdetJ[i]) * divu[i]; - } - } - return 0; -} +#include "21/l2mass_21_qf.h" +#include "21/l2mass_build_21_qf.h" +#include "22/l2mass_22_qf.h" +#include "22/l2mass_build_22_qf.h" +#include "32/l2mass_32_qf.h" +#include "32/l2mass_build_32_qf.h" +#include "33/l2mass_33_qf.h" +#include "33/l2mass_build_33_qf.h" #endif // PALACE_LIBCEED_L2_MASS_QF_H diff --git a/palace/fem/qfunctions/utils_geom_qf.h b/palace/fem/qfunctions/utils_geom_qf.h deleted file mode 100644 index b71970a0e..000000000 --- a/palace/fem/qfunctions/utils_geom_qf.h +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_UTILS_GEOM_QF_H -#define PALACE_LIBCEED_UTILS_GEOM_QF_H - -#include - -CEED_QFUNCTION_HELPER CeedScalar DetJ22(const CeedScalar J[4]) -{ - // J: 0 2 - // 1 3 - return J[0] * J[3] - J[1] * J[2]; -} - -CEED_QFUNCTION_HELPER CeedScalar DetJ33(const CeedScalar J[9]) -{ - // J: 0 3 6 - // 1 4 7 - // 2 5 8 - return J[0] * (J[4] * J[8] - J[5] * J[7]) - J[1] * (J[3] * J[8] - J[5] * J[6]) + - J[2] * (J[3] * J[7] - J[4] * J[6]); -} - -CEED_QFUNCTION_HELPER CeedScalar DetJ21(const CeedScalar J[2]) -{ - // J: 0 - // 1 - return sqrt(J[0] * J[0] + J[1] * J[1]); -} - -CEED_QFUNCTION_HELPER CeedScalar DetJ32(const CeedScalar J[6]) -{ - // J: 0 3 - // 1 4 - // 2 5 - const CeedScalar E = J[0] * J[0] + J[1] * J[1] + J[2] * J[2]; - const CeedScalar G = J[3] * J[3] + J[4] * J[4] + J[5] * J[5]; - const CeedScalar F = J[0] * J[3] + J[1] * J[4] + J[2] * J[5]; - return sqrt(E * G - F * F); -} - -template -CEED_QFUNCTION_HELPER CeedScalar AdjJt22(const CeedScalar J[4], CeedScalar adjJt[4]) -{ - // Compute adj(J)^T / det(J) and store the result. - // J: 0 2 adj(J): J22 -J12 - // 1 3 -J21 J11 - adjJt[0] = J[3]; - adjJt[1] = -J[2]; - adjJt[2] = -J[1]; - adjJt[3] = J[0]; - return ComputeDet ? (J[0] * J[3] - J[1] * J[2]) : 0.0; -} - -template -CEED_QFUNCTION_HELPER CeedScalar AdjJt33(const CeedScalar J[9], CeedScalar adjJt[9]) -{ - // Compute adj(J)^T / det(J) and store the result. - // J: 0 3 6 - // 1 4 7 - // 2 5 8 - adjJt[0] = J[4] * J[8] - J[7] * J[5]; - adjJt[3] = J[7] * J[2] - J[1] * J[8]; - adjJt[6] = J[1] * J[5] - J[4] * J[2]; - adjJt[1] = J[6] * J[5] - J[3] * J[8]; - adjJt[4] = J[0] * J[8] - J[6] * J[2]; - adjJt[7] = J[3] * J[2] - J[0] * J[5]; - adjJt[2] = J[3] * J[7] - J[6] * J[4]; - adjJt[5] = J[6] * J[1] - J[0] * J[7]; - adjJt[8] = J[0] * J[4] - J[3] * J[1]; - return ComputeDet ? (J[0] * adjJt[0] + J[1] * adjJt[1] + J[2] * adjJt[2]) : 0.0; -} - -template -CEED_QFUNCTION_HELPER CeedScalar AdjJt21(const CeedScalar J[2], CeedScalar adjJt[2]) -{ - // Compute adj(J)^T / det(J) and store the result. - // J: 0 adj(J): 1/sqrt(J^T J) J^T - // 1 - const CeedScalar d = sqrt(J[0] * J[0] + J[1] * J[1]); - adjJt[0] = J[0] / d; - adjJt[1] = J[1] / d; - return ComputeDet ? d : 0.0; -} - -template -CEED_QFUNCTION_HELPER CeedScalar AdjJt32(const CeedScalar J[6], CeedScalar adjJt[6]) -{ - // Compute adj(J)^T / det(J) and store the result. - // J: 0 3 - // 1 4 - // 2 5 - const CeedScalar E = J[0] * J[0] + J[1] * J[1] + J[2] * J[2]; - const CeedScalar G = J[3] * J[3] + J[4] * J[4] + J[5] * J[5]; - const CeedScalar F = J[0] * J[3] + J[1] * J[4] + J[2] * J[5]; - const CeedScalar d = sqrt(E * G - F * F); - adjJt[0] = (G * J[0] - F * J[3]) / d; - adjJt[1] = (G * J[1] - F * J[4]) / d; - adjJt[2] = (G * J[2] - F * J[5]) / d; - adjJt[3] = (E * J[3] - F * J[0]) / d; - adjJt[4] = (E * J[4] - F * J[1]) / d; - adjJt[5] = (E * J[5] - F * J[2]) / d; - return ComputeDet ? d : 0.0; -} - -#endif // PALACE_LIBCEED_UTILS_QF_H diff --git a/palace/fem/qfunctions/utils_qf.h b/palace/fem/qfunctions/utils_qf.h deleted file mode 100644 index 76b322e21..000000000 --- a/palace/fem/qfunctions/utils_qf.h +++ /dev/null @@ -1,398 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LIBCEED_UTILS_QF_H -#define PALACE_LIBCEED_UTILS_QF_H - -CEED_QFUNCTION_HELPER void MatUnpack22(const CeedScalar *A, const CeedInt A_stride, - CeedScalar A_loc[4]) -{ - A_loc[0] = A[A_stride * 0]; - A_loc[1] = A[A_stride * 1]; - A_loc[2] = A[A_stride * 2]; - A_loc[3] = A[A_stride * 3]; -} - -CEED_QFUNCTION_HELPER void MatUnpack33(const CeedScalar *A, const CeedInt A_stride, - CeedScalar A_loc[9]) -{ - A_loc[0] = A[A_stride * 0]; - A_loc[1] = A[A_stride * 1]; - A_loc[2] = A[A_stride * 2]; - A_loc[3] = A[A_stride * 3]; - A_loc[4] = A[A_stride * 4]; - A_loc[5] = A[A_stride * 5]; - A_loc[6] = A[A_stride * 6]; - A_loc[7] = A[A_stride * 7]; - A_loc[8] = A[A_stride * 8]; -} - -CEED_QFUNCTION_HELPER void MatUnpack21(const CeedScalar *A, const CeedInt A_stride, - CeedScalar A_loc[2]) -{ - A_loc[0] = A[A_stride * 0]; - A_loc[1] = A[A_stride * 1]; -} - -CEED_QFUNCTION_HELPER void MatUnpack32(const CeedScalar *A, const CeedInt A_stride, - CeedScalar A_loc[6]) -{ - A_loc[0] = A[A_stride * 0]; - A_loc[1] = A[A_stride * 1]; - A_loc[2] = A[A_stride * 2]; - A_loc[3] = A[A_stride * 3]; - A_loc[4] = A[A_stride * 4]; - A_loc[5] = A[A_stride * 5]; -} - -CEED_QFUNCTION_HELPER void MultAtBCx22(const CeedScalar A[4], const CeedScalar B[3], - const CeedScalar C[4], const CeedScalar x[2], - CeedScalar y[2]) -{ - // A: 0 2 B: 0 1 C: 0 2 - // 1 3 1 2 1 3 - CeedScalar z[2]; - - y[0] = C[0] * x[0] + C[2] * x[1]; - y[1] = C[1] * x[0] + C[3] * x[1]; - - z[0] = B[0] * y[0] + B[1] * y[1]; - z[1] = B[1] * y[0] + B[2] * y[1]; - - y[0] = A[0] * z[0] + A[1] * z[1]; - y[1] = A[2] * z[0] + A[3] * z[1]; -} - -CEED_QFUNCTION_HELPER void MultAtBCx33(const CeedScalar A[9], const CeedScalar B[6], - const CeedScalar C[9], const CeedScalar x[3], - CeedScalar y[3]) -{ - // A: 0 3 6 B: 0 1 2 C: 0 3 6 - // 1 4 7 1 3 4 1 4 7 - // 2 5 8 2 4 5 2 5 8 - CeedScalar z[3]; - - y[0] = C[0] * x[0] + C[3] * x[1] + C[6] * x[2]; - y[1] = C[1] * x[0] + C[4] * x[1] + C[7] * x[2]; - y[2] = C[2] * x[0] + C[5] * x[1] + C[8] * x[2]; - - z[0] = B[0] * y[0] + B[1] * y[1] + B[2] * y[2]; - z[1] = B[1] * y[0] + B[3] * y[1] + B[4] * y[2]; - z[2] = B[2] * y[0] + B[4] * y[1] + B[5] * y[2]; - - y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2]; - y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2]; - y[2] = A[6] * z[0] + A[7] * z[1] + A[8] * z[2]; -} - -CEED_QFUNCTION_HELPER void MultAtBCx21(const CeedScalar A[2], const CeedScalar B[3], - const CeedScalar C[2], const CeedScalar x[1], - CeedScalar y[2]) -{ - // A: 0 B: 0 1 C: 0 - // 1 1 2 1 - CeedScalar z[2]; - - y[0] = C[0] * x[0]; - y[1] = C[1] * x[0]; - - z[0] = B[0] * y[0] + B[1] * y[1]; - z[1] = B[1] * y[0] + B[2] * y[1]; - - y[0] = A[0] * z[0] + A[1] * z[1]; - y[1] = 0.0; -} - -CEED_QFUNCTION_HELPER void MultAtBCx32(const CeedScalar A[6], const CeedScalar B[6], - const CeedScalar C[6], const CeedScalar x[2], - CeedScalar y[3]) -{ - // A: 0 3 B: 0 1 2 C: 0 3 - // 1 4 1 3 4 1 4 - // 2 5 2 4 5 2 5 - CeedScalar z[3]; - - y[0] = C[0] * x[0] + C[3] * x[1]; - y[1] = C[1] * x[0] + C[4] * x[1]; - y[2] = C[2] * x[0] + C[5] * x[1]; - - z[0] = B[0] * y[0] + B[1] * y[1] + B[2] * y[2]; - z[1] = B[1] * y[0] + B[3] * y[1] + B[4] * y[2]; - z[2] = B[2] * y[0] + B[4] * y[1] + B[5] * y[2]; - - y[0] = A[0] * z[0] + A[1] * z[1] + A[2] * z[2]; - y[1] = A[3] * z[0] + A[4] * z[1] + A[5] * z[2]; - y[2] = 0.0; -} - -CEED_QFUNCTION_HELPER void MultBAx22(const CeedScalar A[4], const CeedScalar B[3], - const CeedScalar x[2], CeedScalar y[2]) -{ - // A: 0 2 B: 0 1 - // 1 3 1 2 - CeedScalar z[2]; - - z[0] = A[0] * x[0] + A[2] * x[1]; - z[1] = A[1] * x[0] + A[3] * x[1]; - - y[0] = B[0] * z[0] + B[1] * z[1]; - y[1] = B[1] * z[0] + B[2] * z[1]; -} - -CEED_QFUNCTION_HELPER void MultBAx33(const CeedScalar A[9], const CeedScalar B[6], - const CeedScalar x[3], CeedScalar y[3]) -{ - // A: 0 3 6 B: 0 1 2 - // 1 4 7 1 3 4 - // 2 5 8 2 4 5 - CeedScalar z[3]; - - z[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2]; - z[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2]; - z[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2]; - - y[0] = B[0] * z[0] + B[1] * z[1] + B[2] * z[2]; - y[1] = B[1] * z[0] + B[3] * z[1] + B[4] * z[2]; - y[2] = B[2] * z[0] + B[4] * z[1] + B[5] * z[2]; -} - -CEED_QFUNCTION_HELPER void MultBAx21(const CeedScalar A[2], const CeedScalar B[3], - const CeedScalar x[1], CeedScalar y[1]) -{ - // A: 0 B: 0 1 - // 1 1 2 - CeedScalar z[2]; - - z[0] = A[0] * x[0]; - z[1] = A[1] * x[0]; - - y[0] = B[0] * z[0] + B[1] * z[1]; - y[1] = B[1] * z[0] + B[2] * z[1]; -} - -CEED_QFUNCTION_HELPER void MultBAx32(const CeedScalar A[6], const CeedScalar B[6], - const CeedScalar x[2], CeedScalar y[2]) -{ - // A: 0 3 B: 0 1 2 - // 1 4 1 3 4 - // 2 5 2 4 5 - CeedScalar z[3]; - - z[0] = A[0] * x[0] + A[3] * x[1]; - z[1] = A[1] * x[0] + A[4] * x[1]; - z[2] = A[2] * x[0] + A[5] * x[1]; - - y[0] = B[0] * z[0] + B[1] * z[1] + B[2] * z[2]; - y[1] = B[1] * z[0] + B[3] * z[1] + B[4] * z[2]; - y[2] = B[2] * z[0] + B[4] * z[1] + B[5] * z[2]; -} - -CEED_QFUNCTION_HELPER void MultAtBA22(const CeedScalar A[4], const CeedScalar B[3], - CeedScalar C[3]) -{ - // A: 0 2 B: 0 1 C: 0 1 - // 1 3 1 2 1 2 - - // First compute entries of R = B A. - const CeedScalar R11 = B[0] * A[0] + B[1] * A[1]; - const CeedScalar R21 = B[1] * A[0] + B[2] * A[1]; - const CeedScalar R12 = B[0] * A[2] + B[1] * A[3]; - const CeedScalar R22 = B[1] * A[2] + B[2] * A[3]; - - C[0] = A[0] * R11 + A[1] * R21; - C[1] = A[0] * R12 + A[1] * R22; - C[2] = A[2] * R12 + A[3] * R22; -} - -CEED_QFUNCTION_HELPER void MultAtBA33(const CeedScalar A[9], const CeedScalar B[6], - CeedScalar C[6]) -{ - // A: 0 3 6 B: 0 1 2 C: 0 1 2 - // 1 4 7 1 3 4 1 3 4 - // 2 5 8 2 4 5 2 4 5 - - // First compute entries of R = B A. - const CeedScalar R11 = B[0] * A[0] + B[1] * A[1] + B[2] * A[2]; - const CeedScalar R21 = B[1] * A[0] + B[3] * A[1] + B[4] * A[2]; - const CeedScalar R31 = B[2] * A[0] + B[4] * A[1] + B[5] * A[2]; - const CeedScalar R12 = B[0] * A[3] + B[1] * A[4] + B[2] * A[5]; - const CeedScalar R22 = B[1] * A[3] + B[3] * A[4] + B[4] * A[5]; - const CeedScalar R32 = B[2] * A[3] + B[4] * A[4] + B[5] * A[5]; - const CeedScalar R13 = B[0] * A[6] + B[1] * A[7] + B[2] * A[8]; - const CeedScalar R23 = B[1] * A[6] + B[3] * A[7] + B[4] * A[8]; - const CeedScalar R33 = B[2] * A[6] + B[4] * A[7] + B[5] * A[8]; - - C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31; - C[1] = A[0] * R12 + A[1] * R22 + A[2] * R32; - C[2] = A[0] * R13 + A[1] * R23 + A[2] * R33; - C[3] = A[3] * R12 + A[4] * R22 + A[5] * R32; - C[4] = A[3] * R13 + A[4] * R23 + A[5] * R33; - C[5] = A[6] * R13 + A[7] * R23 + A[8] * R33; -} - -CEED_QFUNCTION_HELPER void MultAtBA21(const CeedScalar A[2], const CeedScalar B[3], - CeedScalar C[1]) -{ - // A: 0 B: 0 1 C: 0 - // 1 1 2 - - // First compute entries of R = B A. - const CeedScalar R11 = B[0] * A[0] + B[1] * A[1]; - const CeedScalar R21 = B[1] * A[0] + B[2] * A[1]; - - C[0] = A[0] * R11 + A[1] * R21; -} - -CEED_QFUNCTION_HELPER void MultAtBA32(const CeedScalar A[6], const CeedScalar B[6], - CeedScalar C[3]) -{ - // A: 0 3 B: 0 1 2 C: 0 1 - // 1 4 1 3 4 1 2 - // 2 5 2 4 5 - - // First compute entries of R = B A. - const CeedScalar R11 = B[0] * A[0] + B[1] * A[1] + B[2] * A[2]; - const CeedScalar R21 = B[1] * A[0] + B[3] * A[1] + B[4] * A[2]; - const CeedScalar R31 = B[2] * A[0] + B[4] * A[1] + B[5] * A[2]; - const CeedScalar R12 = B[0] * A[3] + B[1] * A[4] + B[2] * A[5]; - const CeedScalar R22 = B[1] * A[3] + B[3] * A[4] + B[4] * A[5]; - const CeedScalar R32 = B[2] * A[3] + B[4] * A[4] + B[5] * A[5]; - - C[0] = A[0] * R11 + A[1] * R21 + A[2] * R31; - C[1] = A[0] * R12 + A[1] * R22 + A[2] * R32; - C[2] = A[3] * R12 + A[4] * R22 + A[5] * R32; -} - -CEED_QFUNCTION_HELPER void MultAtBC22(const CeedScalar A[4], const CeedScalar B[3], - const CeedScalar C[4], CeedScalar D[4]) -{ - // A, C: 0 2 B: 0 1 D: 0 2 - // 1 3 1 2 1 3 - - // First compute entries of R = B C. - const CeedScalar R11 = B[0] * C[0] + B[1] * C[1]; - const CeedScalar R21 = B[1] * C[0] + B[2] * C[1]; - const CeedScalar R12 = B[0] * C[2] + B[1] * C[3]; - const CeedScalar R22 = B[1] * C[2] + B[2] * C[3]; - - D[0] = A[0] * R11 + A[1] * R21; - D[1] = A[2] * R11 + A[3] * R21; - D[2] = A[0] * R12 + A[1] * R22; - D[3] = A[2] * R12 + A[3] * R22; -} - -CEED_QFUNCTION_HELPER void MultAtBC33(const CeedScalar A[9], const CeedScalar B[6], - const CeedScalar C[9], CeedScalar D[9]) -{ - // A, C: 0 3 6 B: 0 1 2 D: 0 3 6 - // 1 4 7 1 3 4 1 4 7 - // 2 5 8 2 4 5 2 5 8 - - // First compute entries of R = B C. - const CeedScalar R11 = B[0] * C[0] + B[1] * C[1] + B[2] * C[2]; - const CeedScalar R21 = B[1] * C[0] + B[3] * C[1] + B[4] * C[2]; - const CeedScalar R31 = B[2] * C[0] + B[4] * C[1] + B[5] * C[2]; - const CeedScalar R12 = B[0] * C[3] + B[1] * C[4] + B[2] * C[5]; - const CeedScalar R22 = B[1] * C[3] + B[3] * C[4] + B[4] * C[5]; - const CeedScalar R32 = B[2] * C[3] + B[4] * C[4] + B[5] * C[5]; - const CeedScalar R13 = B[0] * C[6] + B[1] * C[7] + B[2] * C[8]; - const CeedScalar R23 = B[1] * C[6] + B[3] * C[7] + B[4] * C[8]; - const CeedScalar R33 = B[2] * C[6] + B[4] * C[7] + B[5] * C[8]; - - D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31; - D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31; - D[2] = A[6] * R11 + A[7] * R21 + A[8] * R31; - D[3] = A[0] * R12 + A[1] * R22 + A[2] * R32; - D[4] = A[3] * R12 + A[4] * R22 + A[5] * R32; - D[5] = A[6] * R12 + A[7] * R22 + A[8] * R32; - D[6] = A[0] * R13 + A[1] * R23 + A[2] * R33; - D[7] = A[3] * R13 + A[4] * R23 + A[5] * R33; - D[8] = A[6] * R13 + A[7] * R23 + A[8] * R33; -} - -CEED_QFUNCTION_HELPER void MultAtBC21(const CeedScalar A[2], const CeedScalar B[3], - const CeedScalar C[2], CeedScalar D[1]) -{ - // A, C: 0 B: 0 1 D: 0 - // 1 1 2 - - // First compute entries of R = B C. - const CeedScalar R11 = B[0] * C[0] + B[1] * C[1]; - const CeedScalar R21 = B[1] * C[0] + B[2] * C[1]; - - D[0] = A[0] * R11 + A[1] * R21; -} - -CEED_QFUNCTION_HELPER void MultAtBC32(const CeedScalar A[6], const CeedScalar B[6], - const CeedScalar C[6], CeedScalar D[4]) -{ - // A, C: 0 3 B: 0 1 2 D: 0 2 - // 1 4 1 3 4 1 3 - // 2 5 2 4 5 - - // First compute entries of R = B C. - const CeedScalar R11 = B[0] * C[0] + B[1] * C[1] + B[2] * C[2]; - const CeedScalar R21 = B[1] * C[0] + B[3] * C[1] + B[4] * C[2]; - const CeedScalar R31 = B[2] * C[0] + B[4] * C[1] + B[5] * C[2]; - const CeedScalar R12 = B[0] * C[3] + B[1] * C[4] + B[2] * C[5]; - const CeedScalar R22 = B[1] * C[3] + B[3] * C[4] + B[4] * C[5]; - const CeedScalar R32 = B[2] * C[3] + B[4] * C[4] + B[5] * C[5]; - - D[0] = A[0] * R11 + A[1] * R21 + A[2] * R31; - D[1] = A[3] * R11 + A[4] * R21 + A[5] * R31; - D[2] = A[0] * R12 + A[1] * R22 + A[2] * R32; - D[3] = A[3] * R12 + A[4] * R22 + A[5] * R32; -} - -CEED_QFUNCTION_HELPER void MultBA22(const CeedScalar A[4], const CeedScalar B[3], - CeedScalar C[4]) -{ - // A: 0 2 B: 0 1 C: 0 2 - // 1 3 1 2 1 3 - C[0] = B[0] * A[0] + B[1] * A[1]; - C[1] = B[1] * A[0] + B[2] * A[1]; - C[2] = B[0] * A[2] + B[1] * A[3]; - C[3] = B[1] * A[2] + B[2] * A[3]; -} - -CEED_QFUNCTION_HELPER void MultBA33(const CeedScalar A[9], const CeedScalar B[6], - CeedScalar C[9]) -{ - // A: 0 3 6 B: 0 1 2 C: 0 3 6 - // 1 4 7 1 3 4 1 4 7 - // 2 5 8 2 4 5 2 5 8 - C[0] = B[0] * A[0] + B[1] * A[1] + B[2] * A[2]; - C[1] = B[1] * A[0] + B[3] * A[1] + B[4] * A[2]; - C[2] = B[2] * A[0] + B[4] * A[1] + B[5] * A[2]; - C[3] = B[0] * A[3] + B[1] * A[4] + B[2] * A[5]; - C[4] = B[1] * A[3] + B[3] * A[4] + B[4] * A[5]; - C[5] = B[2] * A[3] + B[4] * A[4] + B[5] * A[5]; - C[6] = B[0] * A[6] + B[1] * A[7] + B[2] * A[8]; - C[7] = B[1] * A[6] + B[3] * A[7] + B[4] * A[8]; - C[8] = B[2] * A[6] + B[4] * A[7] + B[5] * A[8]; -} - -CEED_QFUNCTION_HELPER void MultBA21(const CeedScalar A[2], const CeedScalar B[3], - CeedScalar C[2]) -{ - // A: 0 B: 0 1 C: 0 - // 1 1 2 1 - C[0] = B[0] * A[0] + B[1] * A[1]; - C[1] = B[1] * A[0] + B[2] * A[1]; -} - -CEED_QFUNCTION_HELPER void MultBA32(const CeedScalar A[6], const CeedScalar B[6], - CeedScalar C[6]) -{ - // A: 0 3 B: 0 1 2 C: 0 3 - // 1 4 1 3 4 1 4 - // 2 5 2 4 5 2 5 - C[0] = B[0] * A[0] + B[1] * A[1] + B[2] * A[2]; - C[1] = B[1] * A[0] + B[3] * A[1] + B[4] * A[2]; - C[2] = B[2] * A[0] + B[4] * A[1] + B[5] * A[2]; - C[3] = B[0] * A[3] + B[1] * A[4] + B[2] * A[5]; - C[4] = B[1] * A[3] + B[3] * A[4] + B[4] * A[5]; - C[5] = B[2] * A[3] + B[4] * A[4] + B[5] * A[5]; -} - -#endif // PALACE_LIBCEED_UTILS_QF_H