Commit 13d79d11 authored by Lubomir Riha's avatar Lubomir Riha
Browse files

code cleaning

parent 72e5cd8c
This diff is collapsed.
......@@ -386,12 +386,13 @@ int solve_with_conjugate_gradient3D(cublasHandle_t const cublas_handle,
preconditioner->init(m, alpha, cublas_handle, cusparse_handle);
}
//delete
for (int r = 0; r < 10; r++) {
preconditioner->run(b, x);
}
return 0;
// delete - end
// LR: TODO: delete
// //delete
// for (int r = 0; r < 10; r++) {
// preconditioner->run(b, x);
// }
// return 0;
// // delete - end
device_memset<FT>(x, FT(0), n); // x = 0
FT *p, *r, *h, *Ax, *q;
......
No preview for this file type
......@@ -41,184 +41,85 @@ int main (int argc, char *argv[])
cublasCreate(&cublas_handle);
cusparseCreate(&cusparse_handle);
const double alpha = 1; //0.01f;
int max_iter = 2; //10000;
const double alpha = 0.01f; //0.01f;
int max_iter = 10000; //10000;
double tolerance = 0.00001;
// const int m = M; //1 * 1024; //32; //8 * 1024; // 2048;
//
// int experiment;
// experiment = atoi(argv[1]);
// printf("Running experiment: %d \n",experiment);
//
// int num_iter;
// double *b, *x;
//
// double *h_x, *h_b;
//
// h_x = (double * ) malloc ( (m*m)*sizeof(double) );
// h_b = (double * ) malloc ( (m*m)*sizeof(double) );
//
// for (int i = 0; i < M; i++) {
// for (int j = 0; j < M; j++)
// {
// if ( j == 0)
// h_b[i*M + j] = i+1.0;
// else
// h_b[i*M + j] = 0.0;
//
// h_x[i*M + j] = 0.0;
// }
// }
//
//
//
// cudaMalloc((void **) &b, (m*m)*sizeof(double));
// cudaMalloc((void **) &x, (m*m)*sizeof(double));
//
// device_memset_inc<double>(x, 0.0, m*m);
// cudaMemcpy( b, h_b, (m*m)*sizeof(double) , cudaMemcpyHostToDevice );
//
//
//
///*
// num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, NULL );
// cout << num_iter << " CG only - iterations" << endl;
//
// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
//
// //for (int i = 0; i < m*m; i++)
// // cout << h_x[i] << " ";
//
// cout << endl;
//*/
//
//
// if (experiment == 1 || experiment == 0) {
// ThomasPreconditioner<double> preconditioner_th(false);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_th);
// cout << " Chebyschev - Thomas Prec w. cublas transpose. - iterations: " << num_iter << endl;
//
// // cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
// // for (int i = 0; i < m*m; i++) {
// // if (i % m == 0) cout << endl;
// // cout << h_x[i] << "\t";
// // }
// // cout << endl;
// }
//
// if (experiment == 2 || experiment == 0) {
// ThomasPreconditioner<double> preconditioner_th_it(true);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_th_it);
// cout << " Chebyschev - Thomas Prec. w implicit transpose - iterations: " << num_iter << endl;
// }
//
// if (experiment == 10 || experiment == 0) {
// SpikeThomasPreconditioner<double> preconditioner_sp4(4);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp4);
// cout << " Chebyschev - Thomas-SPIKE(4) Prec. - iterations: " << num_iter << endl;
// }
//
// if (experiment == 11 || experiment == 0) {
// SpikeThomasPreconditioner<double> preconditioner_sp8(8);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp8);
// cout << " Chebyschev - Thomas-SPIKE(8) Prec. - iterations: " << num_iter << endl;
// }
//
// if (experiment == 12 || experiment == 0) {
// SpikeThomasPreconditioner<double> preconditioner_sp16(16);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp16);
// cout << " Chebyschev - Thomas-SPIKE(16) Prec. - iterations: " << num_iter << endl;
// }
//
// if (experiment == 20 || experiment == 0) {
// InvPreconditioner<double> preconditioner_in(false);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_in);
// cout << " Chebyschev - inverse Matrix prec. w. cublas trans - iterations: " << num_iter << endl;
// }
//
//
//
//
//
//
//
//
///*
// SpikeThomasPreconditioner<double> preconditioner(8);
// num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner );
// cout << num_iter << " CG Spike-Thomas prec - iterations" << endl;
//
// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
// //for (int i = 0; i < m*m; i++)
// // cout << h_x[i] << " ";
// cout << endl;
//*/
//
//
///*
// ThomasPreconditioner<double> preconditioner2;
// num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner2 );
// cout << num_iter << " CG Thomas prec - iterations" << endl;
//
// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
// //for (int i = 0; i < m*m; i++)
// // cout << h_x[i] << " ";
//
// cout << endl;
//*/
//
//
//
//// InvPreconditioner<double> preconditionerInv;
//// num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditionerInv );
//// cout << num_iter << " CG InvM prec - iterations" << endl;
//
//// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
//// for (int i = 0; i < m*m; i++)
//// cout << h_x[i] << " ";
//
// // cout << endl;
//
int num_iter;
#define PREC double
PREC *b_3d, *x_3d;
int m_3d = M;
PREC * h_x_3d;
h_x_3d = (PREC * ) malloc ( (m_3d*m_3d*m_3d) * sizeof(PREC) );
cudaMalloc((void **) &b_3d, (m_3d*m_3d*m_3d)*sizeof(PREC));
cudaMalloc((void **) &x_3d, (m_3d*m_3d*m_3d)*sizeof(PREC));
for (int i = 0; i < m_3d*m_3d*m_3d; i++)
h_x_3d[i] = i;
cudaMemcpy( b_3d, h_x_3d, (m_3d*m_3d*m_3d)*sizeof(PREC) , cudaMemcpyHostToDevice );
//device_memset<double>(b_3d, 1.0, m_3d*m_3d*m_3d);
//Inv
ThomasPreconditioner3D<PREC> preconditioner_inv3d;
num_iter = solve_with_conjugate_gradient3D<PREC>(cublas_handle, cusparse_handle, m_3d, alpha, b_3d, x_3d, max_iter, tolerance, &preconditioner_inv3d); // NULL );
cout << num_iter << " CG 3D only - iterations" << endl;
cudaMemcpy( h_x_3d, b_3d, (m_3d*m_3d*m_3d)*sizeof(double) , cudaMemcpyDeviceToHost );
//for (int i = 0; i < m_3d*m_3d*m_3d; i++)
// cout << h_x_3d[i] << endl;
cout << endl;
// ThomasPreconditioner3D<double> preconditioner_3d;
// num_iter = solve_with_conjugate_gradient3D<double>(cublas_handle, cusparse_handle, m_3d, alpha, b_3d, x_3d, max_iter, tolerance, &preconditioner_3d);
// cout << num_iter << " CG 3D Thomas Prec - iterations" << endl;
const int m = M; //1 * 1024; //32; //8 * 1024; // 2048;
int experiment;
experiment = atoi(argv[1]);
printf("Running experiment: %d \n",experiment);
int num_iter;
double *b, *x;
double *h_x, *h_b;
h_x = (double * ) malloc ( (m*m)*sizeof(double) );
h_b = (double * ) malloc ( (m*m)*sizeof(double) );
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++)
{
if ( j == 0)
h_b[i*M + j] = i+1.0;
else
h_b[i*M + j] = 0.0;
h_x[i*M + j] = 0.0;
}
}
cudaMalloc((void **) &b, (m*m)*sizeof(double));
cudaMalloc((void **) &x, (m*m)*sizeof(double));
device_memset_inc<double>(x, 0.0, m*m);
cudaMemcpy( b, h_b, (m*m)*sizeof(double) , cudaMemcpyHostToDevice );
if (experiment == 1 || experiment == 0) {
ThomasPreconditioner<double> preconditioner_th(false);
num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_th);
cout << " Chebyschev - Thomas Prec w. cublas transpose. - iterations: " << num_iter << endl;
}
if (experiment == 2 || experiment == 0) {
ThomasPreconditioner<double> preconditioner_th_it(true);
num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_th_it);
cout << " Chebyschev - Thomas Prec. w implicit transpose - iterations: " << num_iter << endl;
}
if (experiment == 10 || experiment == 0) {
SpikeThomasPreconditioner<double> preconditioner_sp4(4);
num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp4);
cout << " Chebyschev - Thomas-SPIKE(4) Prec. - iterations: " << num_iter << endl;
}
if (experiment == 11 || experiment == 0) {
SpikeThomasPreconditioner<double> preconditioner_sp8(8);
num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp8);
cout << " Chebyschev - Thomas-SPIKE(8) Prec. - iterations: " << num_iter << endl;
}
if (experiment == 12 || experiment == 0) {
SpikeThomasPreconditioner<double> preconditioner_sp16(16);
num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp16);
cout << " Chebyschev - Thomas-SPIKE(16) Prec. - iterations: " << num_iter << endl;
}
if (experiment == 30 || experiment == 0) {
SpikeThomasPreconditioner<double> preconditioner(8);
num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner );
cout << " CG - Thomas-SPIKE(8) Prec. - iterations: " << num_iter << endl;
}
// Print results
// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
// for (int i = 0; i < m*m; i++)
// cout << h_x[i] << "\n";
// cout << endl;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment