diff --git a/src/gpu_utils/gpu_utils.cu b/src/gpu_utils/gpu_utils.cu
index 08ae31ca..0270b648 100644
--- a/src/gpu_utils/gpu_utils.cu
+++ b/src/gpu_utils/gpu_utils.cu
@@ -30,3 +30,18 @@ extern "C" void gpu_update_monodomain(real *vm, float *b, real alpha, size_t n)
     kernel_update_monodomain<<<GRID, BLOCK_SIZE>>>(vm, b, alpha, n);
     cudaDeviceSynchronize();
 }
+
+__global__ void kernel_copy_vectors(real *dst, float *src, size_t n) {
+
+    unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
+
+    if(i < n) {
+        dst[i] = src[i];
+    }
+}
+
+extern "C" void gpu_copy_vectors(real *dst, float *src, size_t n) {
+    const int GRID  = (n + BLOCK_SIZE - 1)/BLOCK_SIZE;
+    kernel_copy_vectors<<<GRID, BLOCK_SIZE>>>(dst, src, n);
+    cudaDeviceSynchronize();
+}
diff --git a/src/gpu_utils/gpu_utils.h b/src/gpu_utils/gpu_utils.h
index 60068c35..ea8f3418 100644
--- a/src/gpu_utils/gpu_utils.h
+++ b/src/gpu_utils/gpu_utils.h
@@ -36,6 +36,7 @@ extern "C" {
 void gpu_vec_div_vec(real *vec1, real *vec2, real* res, size_t n);
 real gpu_ecg_integral(real *beta_im, real *distances, real *volumes, size_t vec_size);
 void gpu_update_monodomain(real *vm, float *b, real alpha, size_t n);
+void gpu_copy_vectors(real *dst, float *src, size_t n);
 void cuda_assert(int code, char const *const func, const char *const file, int const line, const char *api);
 #ifdef __cplusplus
 }
diff --git a/src/monodomain/monodomain_solver.c b/src/monodomain/monodomain_solver.c
index 1db06551..2d77dbc1 100644
--- a/src/monodomain/monodomain_solver.c
+++ b/src/monodomain/monodomain_solver.c
@@ -675,7 +675,8 @@ int solve_monodomain(struct monodomain_solver *the_monodomain_solver, struct ode
         if(save_to_file && (count % print_rate == 0) && (cur_time >= start_saving_after_dt)) {
             start_stop_watch(&write_time);
 
-            if(edp_gpu) {
+            //TODO: this is not needed if the EDO is in the GPU
+            if(edp_gpu && count > 0) {
 
 #ifdef COMPILE_CUDA
                 cudaMemcpy(gpu_rhs, linear_system_solver_result, original_num_cells * sizeof(float), cudaMemcpyDeviceToHost);
@@ -1084,7 +1085,9 @@ bool update_ode_state_vector_and_check_for_activity(real_cpu vm_threshold, struc
                 check_cuda_error(cudaMemcpy(sv, vms, mem_size, cudaMemcpyHostToDevice));
                 free(vms);
             } else {
-                check_cuda_error(cudaMemcpy(sv, linear_solver_result, mem_size, cudaMemcpyDeviceToDevice));
+                //TODO: make the gpu linear system solver use the same size as the edo solver
+                //check_cuda_error(cudaMemcpy(sv, linear_solver_result, mem_size, cudaMemcpyDeviceToDevice));
+                gpu_copy_vectors(sv, linear_solver_result, max_number_of_cells);
                 //TODO: make a kernel to check for activity
                 act = true;
             }