--- Begin Message ---
------------------------------------------------------------
revno: 6248
committer: Fredrik Valdmanis <fredrik@xxxxxxxxxxxxx>
branch nick: gpu
timestamp: Wed 2011-11-23 11:13:45 +0100
message:
Assembly seems to work on the GPU. Still needs a lot of cleaning and fixing
modified:
dolfin/la/PETScMatrix.cpp
dolfin/la/PETScMatrix.h
dolfin/la/PETScVector.cpp
dolfin/la/UnassembledMatrix.cpp
--
lp:~fredva/dolfin/dolfin-gpu
https://code.launchpad.net/~fredva/dolfin/dolfin-gpu
You are subscribed to branch lp:~fredva/dolfin/dolfin-gpu.
To unsubscribe from this branch go to https://code.launchpad.net/~fredva/dolfin/dolfin-gpu/+edit-subscription
=== modified file 'dolfin/la/PETScMatrix.cpp'
--- dolfin/la/PETScMatrix.cpp 2011-11-22 14:39:36 +0000
+++ dolfin/la/PETScMatrix.cpp 2011-11-23 10:13:45 +0000
@@ -48,7 +48,8 @@
("frobenius", NORM_FROBENIUS);
//-----------------------------------------------------------------------------
-PETScMatrix::PETScMatrix(bool use_gpu) : _use_gpu(use_gpu), _assembled(false)
+PETScMatrix::PETScMatrix(bool use_gpu) : _use_gpu(use_gpu), _assembled(false),
+ _batch_assembly(false)
{
#ifndef HAS_PETSC_CUSP
if (use_gpu)
@@ -63,7 +64,8 @@
}
//-----------------------------------------------------------------------------
PETScMatrix::PETScMatrix(boost::shared_ptr<Mat> A, bool use_gpu) :
- PETScBaseMatrix(A), _use_gpu(use_gpu), _assembled(false)
+ PETScBaseMatrix(A), _use_gpu(use_gpu), _assembled(false),
+ _batch_assembly(false)
{
#ifndef HAS_PETSC_CUSP
if (use_gpu)
@@ -77,7 +79,8 @@
// Do nothing else
}
//-----------------------------------------------------------------------------
-PETScMatrix::PETScMatrix(const PETScMatrix& A): _use_gpu(false), _assembled(false)
+PETScMatrix::PETScMatrix(const PETScMatrix& A): _use_gpu(false),
+ _assembled(false), _batch_assembly(false)
{
*this = A;
}
@@ -85,6 +88,7 @@
PETScMatrix::~PETScMatrix()
{
// Do nothing
+ //dolfin_debug("Destructing PETScmatrix");
}
//-----------------------------------------------------------------------------
bool PETScMatrix::distributed() const
@@ -149,7 +153,7 @@
//-----------------------------------------------------------------------------
void PETScMatrix::init(const GenericSparsityPattern& sparsity_pattern)
{
- info("PETScMatrix.cpp: Initializing PETSc matrix");
+ //dolfin_debug("PETScMatrix.cpp: Initializing PETSc matrix");
// Get global dimensions and local range
assert(sparsity_pattern.rank() == 2);
const uint M = sparsity_pattern.size(0);
@@ -172,6 +176,7 @@
// Initialize matrix
if (row_range.first == 0 && row_range.second == M)
{
+ //dolfin_debug("Inside serial init");
// Get number of nonzeros for each row from sparsity pattern
std::vector<uint> num_nonzeros(M);
sparsity_pattern.num_nonzeros_diagonal(num_nonzeros);
@@ -218,6 +223,7 @@
#endif
MatSetFromOptions(*A);
+ //dolfin_debug("Finished serial init");
}
else
{
@@ -266,12 +272,14 @@
if (_use_gpu &&
sparsity_pattern.local_size(0) == sparsity_pattern.local_size(1))
{
+ //dolfin_debug("Initializing UnassembledMatrix member ptr");
_batch_assembly = true;
_unassembledA.reset(new UnassembledMatrix);
_unassembledA->init(sparsity_pattern);
// TODO: issue warning if local tensor is not square
}
+ //dolfin_debug("Finished init");
}
//-----------------------------------------------------------------------------
PETScMatrix* PETScMatrix::copy() const
@@ -315,18 +323,23 @@
uint n, const uint* cols)
{
assert(A);
-
+
+ //dolfin_debug("Start of add");
+
if (_batch_assembly && !_assembled)
{
+ //dolfin_debug("Batch add");
// Cache the added block for batch assembly later
_unassembledA->add(block, m, rows, n, cols);
} else {
+ //dolfin_debug("Regular add");
// Perform regular incremental insertion
MatSetValues(*A,
static_cast<int>(m), reinterpret_cast<const int*>(rows),
static_cast<int>(n), reinterpret_cast<const int*>(cols),
block, ADD_VALUES);
}
+ //dolfin_debug("End of add");
}
//-----------------------------------------------------------------------------
void PETScMatrix::axpy(double a, const GenericMatrix& A,
@@ -485,12 +498,13 @@
{
assert(A);
- info("PETScMatrix.cpp: Applying");
+ dolfin_debug("Applying PETScMatrix");
// Perform batch assembly, only one insertion call needed
if (_batch_assembly && !_assembled)
{
- dolfin_debug2("Initializing unassembled matrix. Number of blocks, Block dim:",
- _unassembledA->num_blocks, _unassembledA->block_dim);
+ //dolfin_debug2("Initializing unassembled matrix. Number of blocks, Block dim:",
+ // _unassembledA->num_blocks, _unassembledA->block_dim);
+ dolfin_debug("Calling MatSetValuesBatch");
MatSetValuesBatch(*A, _unassembledA->num_blocks,
_unassembledA->block_dim,
reinterpret_cast<int*>(_unassembledA->indices.get()),
@@ -500,6 +514,7 @@
if (mode == "add")
{
+ dolfin_debug("Assembling with MatAssemblyBegin/End");
MatAssemblyBegin(*A, MAT_FINAL_ASSEMBLY);
MatAssemblyEnd(*A, MAT_FINAL_ASSEMBLY);
}
@@ -519,7 +534,7 @@
"apply changes to PETSc matrix",
"Unknown apply mode \"%s\"", mode.c_str());
}
- info("PETScMatrix.cpp: Finished applying");
+ //dolfin_debug("PETScMatrix.cpp: Finished applying");
}
//-----------------------------------------------------------------------------
void PETScMatrix::zero()
=== modified file 'dolfin/la/PETScMatrix.h'
--- dolfin/la/PETScMatrix.h 2011-11-22 14:39:36 +0000
+++ dolfin/la/PETScMatrix.h 2011-11-23 10:13:45 +0000
@@ -172,15 +172,15 @@
// PETSc matrix architecture
const bool _use_gpu;
+ // Indicates whether the matrix has been assembled or not
+ bool _assembled;
+
// Perform batch assembly?
bool _batch_assembly;
// Unassembled matrix pointer
boost::shared_ptr<UnassembledMatrix> _unassembledA;
- // Indicates whether the matrix has been assembled or not
- bool _assembled;
-
};
}
=== modified file 'dolfin/la/PETScVector.cpp'
--- dolfin/la/PETScVector.cpp 2011-11-21 17:14:20 +0000
+++ dolfin/la/PETScVector.cpp 2011-11-23 10:13:45 +0000
@@ -355,7 +355,7 @@
void PETScVector::apply(std::string mode)
{
assert(x);
- info("PETScVector.cpp: Applying");
+ //dolfin_debug("PETScVector.cpp: Applying");
VecAssemblyBegin(*x);
VecAssemblyEnd(*x);
}
=== modified file 'dolfin/la/UnassembledMatrix.cpp'
--- dolfin/la/UnassembledMatrix.cpp 2011-11-22 14:39:36 +0000
+++ dolfin/la/UnassembledMatrix.cpp 2011-11-23 10:13:45 +0000
@@ -47,15 +47,18 @@
block_dim = sparsity_pattern.local_size(0);
// Allocate cache to have length equal to number of values to insert
+ //dolfin_debug("Resetting values and indices");
values.reset(new double[num_blocks*block_dim*block_dim]);
indices.reset(new uint[num_blocks*block_dim]);
+ //dolfin_debug("Values and indices reset");
// Save the global size of the matrix
- shape[0] = sparsity_pattern.size(0);
- shape[1] = sparsity_pattern.size(1);
-
- dolfin_debug2("Initializing unassembled matrix. Number of blocks, Block dim:",
- num_blocks, block_dim);
+ shape.clear();
+ shape.push_back(sparsity_pattern.size(0));
+ shape.push_back(sparsity_pattern.size(1));
+ //dolfin_debug("Written to shape");
+ //dolfin_debug2("Initializing unassembled matrix. Number of blocks, Block dim:",
+ // num_blocks, block_dim);
}
//----------------------------------------------------------------------------
dolfin::uint UnassembledMatrix::size(uint dim) const
@@ -93,8 +96,11 @@
}
// Copy values into the buffers
+ //dolfin_debug("Copying values into values");
memcpy(values.get()+offset_values, block, block_dim*block_dim*sizeof(double));
+ //dolfin_debug("Copying values into indices");
memcpy(indices.get()+offset_idx, rows, block_dim*sizeof(uint));
+ //dolfin_debug("Finished copying");
counter++;
}
--- End Message ---