diff --git a/src/cuda/fasten.hpp b/src/cuda/fasten.hpp
index 39927a4..d6a0834 100644
--- a/src/cuda/fasten.hpp
+++ b/src/cuda/fasten.hpp
@@ -253,7 +253,7 @@ template <size_t PPWI> class IMPL_CLS final : public Bude<PPWI> {
 
     Sample sample(PPWI, wgsize, p.nposes());
 
-    auto contextStart = now();
+    auto hostToDeviceStart = now();
     auto protein = allocate(p.protein);
     auto ligand = allocate(p.ligand);
     auto transforms_0 = allocate(p.poses[0]);
@@ -265,9 +265,9 @@ template <size_t PPWI> class IMPL_CLS final : public Bude<PPWI> {
     auto forcefield = allocate(p.forcefield);
     auto results = allocate<float>(sample.energies.size());
     checkError(cudaDeviceSynchronize());
-    auto contextEnd = now();
+    auto hostToDeviceEnd = now();
 
-    sample.contextTime = {contextStart, contextEnd};
+    sample.hostToDevice = {hostToDeviceStart, hostToDeviceEnd};
 
     size_t global = std::ceil(double(p.nposes()) / PPWI);
     global = std::ceil(double(global) / double(wgsize));
@@ -285,8 +285,13 @@ template <size_t PPWI> class IMPL_CLS final : public Bude<PPWI> {
       sample.kernelTimes.emplace_back(kernelStart, kernelEnd);
     }
 
+    auto deviceToHostStart = now(); 
+    
     checkError(
         cudaMemcpy(sample.energies.data(), results, sample.energies.size() * sizeof(float), cudaMemcpyDeviceToHost));
+    
+    auto deviceToHostEnd = now(); 
+    sample.deviceToHost = {deviceToHostStart, deviceToHostEnd};
 
     free(protein);
     free(ligand);