Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Lubomir Riha
ETH-ADI
Commits
13d79d11
Commit
13d79d11
authored
Sep 12, 2016
by
Lubomir Riha
Browse files
code cleaning
parent
72e5cd8c
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
preconditioner.h
View file @
13d79d11
This diff is collapsed.
Click to expand it.
solver.h
View file @
13d79d11
...
...
@@ -386,12 +386,13 @@ int solve_with_conjugate_gradient3D(cublasHandle_t const cublas_handle,
preconditioner
->
init
(
m
,
alpha
,
cublas_handle
,
cusparse_handle
);
}
//delete
for
(
int
r
=
0
;
r
<
10
;
r
++
)
{
preconditioner
->
run
(
b
,
x
);
}
return
0
;
// delete - end
// LR: TODO: delete
// //delete
// for (int r = 0; r < 10; r++) {
// preconditioner->run(b, x);
// }
// return 0;
// // delete - end
device_memset
<
FT
>
(
x
,
FT
(
0
),
n
);
// x = 0
FT
*
p
,
*
r
,
*
h
,
*
Ax
,
*
q
;
...
...
test
View file @
13d79d11
No preview for this file type
test.cu
View file @
13d79d11
...
...
@@ -41,184 +41,85 @@ int main (int argc, char *argv[])
cublasCreate
(
&
cublas_handle
);
cusparseCreate
(
&
cusparse_handle
);
const
double
alpha
=
1
;
//0.01f;
int
max_iter
=
2
;
//10000;
const
double
alpha
=
0.01
f
;
//0.01f;
int
max_iter
=
10000
;
//10000;
double
tolerance
=
0.00001
;
// const int m = M; //1 * 1024; //32; //8 * 1024; // 2048;
//
// int experiment;
// experiment = atoi(argv[1]);
// printf("Running experiment: %d \n",experiment);
//
// int num_iter;
// double *b, *x;
//
// double *h_x, *h_b;
//
// h_x = (double * ) malloc ( (m*m)*sizeof(double) );
// h_b = (double * ) malloc ( (m*m)*sizeof(double) );
//
// for (int i = 0; i < M; i++) {
// for (int j = 0; j < M; j++)
// {
// if ( j == 0)
// h_b[i*M + j] = i+1.0;
// else
// h_b[i*M + j] = 0.0;
//
// h_x[i*M + j] = 0.0;
// }
// }
//
//
//
// cudaMalloc((void **) &b, (m*m)*sizeof(double));
// cudaMalloc((void **) &x, (m*m)*sizeof(double));
//
// device_memset_inc<double>(x, 0.0, m*m);
// cudaMemcpy( b, h_b, (m*m)*sizeof(double) , cudaMemcpyHostToDevice );
//
//
//
///*
// num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, NULL );
// cout << num_iter << " CG only - iterations" << endl;
//
// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
//
// //for (int i = 0; i < m*m; i++)
// // cout << h_x[i] << " ";
//
// cout << endl;
//*/
//
//
// if (experiment == 1 || experiment == 0) {
// ThomasPreconditioner<double> preconditioner_th(false);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_th);
// cout << " Chebyschev - Thomas Prec w. cublas transpose. - iterations: " << num_iter << endl;
//
// // cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
// // for (int i = 0; i < m*m; i++) {
// // if (i % m == 0) cout << endl;
// // cout << h_x[i] << "\t";
// // }
// // cout << endl;
// }
//
// if (experiment == 2 || experiment == 0) {
// ThomasPreconditioner<double> preconditioner_th_it(true);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_th_it);
// cout << " Chebyschev - Thomas Prec. w implicit transpose - iterations: " << num_iter << endl;
// }
//
// if (experiment == 10 || experiment == 0) {
// SpikeThomasPreconditioner<double> preconditioner_sp4(4);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp4);
// cout << " Chebyschev - Thomas-SPIKE(4) Prec. - iterations: " << num_iter << endl;
// }
//
// if (experiment == 11 || experiment == 0) {
// SpikeThomasPreconditioner<double> preconditioner_sp8(8);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp8);
// cout << " Chebyschev - Thomas-SPIKE(8) Prec. - iterations: " << num_iter << endl;
// }
//
// if (experiment == 12 || experiment == 0) {
// SpikeThomasPreconditioner<double> preconditioner_sp16(16);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_sp16);
// cout << " Chebyschev - Thomas-SPIKE(16) Prec. - iterations: " << num_iter << endl;
// }
//
// if (experiment == 20 || experiment == 0) {
// InvPreconditioner<double> preconditioner_in(false);
// num_iter = solve_with_chebyshev_iteration<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner_in);
// cout << " Chebyschev - inverse Matrix prec. w. cublas trans - iterations: " << num_iter << endl;
// }
//
//
//
//
//
//
//
//
///*
// SpikeThomasPreconditioner<double> preconditioner(8);
// num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner );
// cout << num_iter << " CG Spike-Thomas prec - iterations" << endl;
//
// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
// //for (int i = 0; i < m*m; i++)
// // cout << h_x[i] << " ";
// cout << endl;
//*/
//
//
///*
// ThomasPreconditioner<double> preconditioner2;
// num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditioner2 );
// cout << num_iter << " CG Thomas prec - iterations" << endl;
//
// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
// //for (int i = 0; i < m*m; i++)
// // cout << h_x[i] << " ";
//
// cout << endl;
//*/
//
//
//
//// InvPreconditioner<double> preconditionerInv;
//// num_iter = solve_with_conjugate_gradient<double>(cublas_handle, cusparse_handle, m, alpha, b, x, max_iter, tolerance, &preconditionerInv );
//// cout << num_iter << " CG InvM prec - iterations" << endl;
//
//// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
//// for (int i = 0; i < m*m; i++)
//// cout << h_x[i] << " ";
//
// // cout << endl;
//
int
num_iter
;
#define PREC double
PREC
*
b_3d
,
*
x_3d
;
int
m_3d
=
M
;
PREC
*
h_x_3d
;
h_x_3d
=
(
PREC
*
)
malloc
(
(
m_3d
*
m_3d
*
m_3d
)
*
sizeof
(
PREC
)
);
cudaMalloc
((
void
**
)
&
b_3d
,
(
m_3d
*
m_3d
*
m_3d
)
*
sizeof
(
PREC
));
cudaMalloc
((
void
**
)
&
x_3d
,
(
m_3d
*
m_3d
*
m_3d
)
*
sizeof
(
PREC
));
for
(
int
i
=
0
;
i
<
m_3d
*
m_3d
*
m_3d
;
i
++
)
h_x_3d
[
i
]
=
i
;
cudaMemcpy
(
b_3d
,
h_x_3d
,
(
m_3d
*
m_3d
*
m_3d
)
*
sizeof
(
PREC
)
,
cudaMemcpyHostToDevice
);
//device_memset<double>(b_3d, 1.0, m_3d*m_3d*m_3d);
//Inv
ThomasPreconditioner3D
<
PREC
>
preconditioner_inv3d
;
num_iter
=
solve_with_conjugate_gradient3D
<
PREC
>
(
cublas_handle
,
cusparse_handle
,
m_3d
,
alpha
,
b_3d
,
x_3d
,
max_iter
,
tolerance
,
&
preconditioner_inv3d
);
// NULL );
cout
<<
num_iter
<<
" CG 3D only - iterations"
<<
endl
;
cudaMemcpy
(
h_x_3d
,
b_3d
,
(
m_3d
*
m_3d
*
m_3d
)
*
sizeof
(
double
)
,
cudaMemcpyDeviceToHost
);
//for (int i = 0; i < m_3d*m_3d*m_3d; i++)
// cout << h_x_3d[i] << endl;
cout
<<
endl
;
// ThomasPreconditioner3D<double> preconditioner_3d;
// num_iter = solve_with_conjugate_gradient3D<double>(cublas_handle, cusparse_handle, m_3d, alpha, b_3d, x_3d, max_iter, tolerance, &preconditioner_3d);
// cout << num_iter << " CG 3D Thomas Prec - iterations" << endl;
const
int
m
=
M
;
//1 * 1024; //32; //8 * 1024; // 2048;
int
experiment
;
experiment
=
atoi
(
argv
[
1
]);
printf
(
"Running experiment: %d
\n
"
,
experiment
);
int
num_iter
;
double
*
b
,
*
x
;
double
*
h_x
,
*
h_b
;
h_x
=
(
double
*
)
malloc
(
(
m
*
m
)
*
sizeof
(
double
)
);
h_b
=
(
double
*
)
malloc
(
(
m
*
m
)
*
sizeof
(
double
)
);
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
for
(
int
j
=
0
;
j
<
M
;
j
++
)
{
if
(
j
==
0
)
h_b
[
i
*
M
+
j
]
=
i
+
1.0
;
else
h_b
[
i
*
M
+
j
]
=
0.0
;
h_x
[
i
*
M
+
j
]
=
0.0
;
}
}
cudaMalloc
((
void
**
)
&
b
,
(
m
*
m
)
*
sizeof
(
double
));
cudaMalloc
((
void
**
)
&
x
,
(
m
*
m
)
*
sizeof
(
double
));
device_memset_inc
<
double
>
(
x
,
0.0
,
m
*
m
);
cudaMemcpy
(
b
,
h_b
,
(
m
*
m
)
*
sizeof
(
double
)
,
cudaMemcpyHostToDevice
);
if
(
experiment
==
1
||
experiment
==
0
)
{
ThomasPreconditioner
<
double
>
preconditioner_th
(
false
);
num_iter
=
solve_with_chebyshev_iteration
<
double
>
(
cublas_handle
,
cusparse_handle
,
m
,
alpha
,
b
,
x
,
max_iter
,
tolerance
,
&
preconditioner_th
);
cout
<<
" Chebyschev - Thomas Prec w. cublas transpose. - iterations: "
<<
num_iter
<<
endl
;
}
if
(
experiment
==
2
||
experiment
==
0
)
{
ThomasPreconditioner
<
double
>
preconditioner_th_it
(
true
);
num_iter
=
solve_with_chebyshev_iteration
<
double
>
(
cublas_handle
,
cusparse_handle
,
m
,
alpha
,
b
,
x
,
max_iter
,
tolerance
,
&
preconditioner_th_it
);
cout
<<
" Chebyschev - Thomas Prec. w implicit transpose - iterations: "
<<
num_iter
<<
endl
;
}
if
(
experiment
==
10
||
experiment
==
0
)
{
SpikeThomasPreconditioner
<
double
>
preconditioner_sp4
(
4
);
num_iter
=
solve_with_chebyshev_iteration
<
double
>
(
cublas_handle
,
cusparse_handle
,
m
,
alpha
,
b
,
x
,
max_iter
,
tolerance
,
&
preconditioner_sp4
);
cout
<<
" Chebyschev - Thomas-SPIKE(4) Prec. - iterations: "
<<
num_iter
<<
endl
;
}
if
(
experiment
==
11
||
experiment
==
0
)
{
SpikeThomasPreconditioner
<
double
>
preconditioner_sp8
(
8
);
num_iter
=
solve_with_chebyshev_iteration
<
double
>
(
cublas_handle
,
cusparse_handle
,
m
,
alpha
,
b
,
x
,
max_iter
,
tolerance
,
&
preconditioner_sp8
);
cout
<<
" Chebyschev - Thomas-SPIKE(8) Prec. - iterations: "
<<
num_iter
<<
endl
;
}
if
(
experiment
==
12
||
experiment
==
0
)
{
SpikeThomasPreconditioner
<
double
>
preconditioner_sp16
(
16
);
num_iter
=
solve_with_chebyshev_iteration
<
double
>
(
cublas_handle
,
cusparse_handle
,
m
,
alpha
,
b
,
x
,
max_iter
,
tolerance
,
&
preconditioner_sp16
);
cout
<<
" Chebyschev - Thomas-SPIKE(16) Prec. - iterations: "
<<
num_iter
<<
endl
;
}
if
(
experiment
==
30
||
experiment
==
0
)
{
SpikeThomasPreconditioner
<
double
>
preconditioner
(
8
);
num_iter
=
solve_with_conjugate_gradient
<
double
>
(
cublas_handle
,
cusparse_handle
,
m
,
alpha
,
b
,
x
,
max_iter
,
tolerance
,
&
preconditioner
);
cout
<<
" CG - Thomas-SPIKE(8) Prec. - iterations: "
<<
num_iter
<<
endl
;
}
// Print results
// cudaMemcpy( h_x, x, (m*m)*sizeof(double) , cudaMemcpyDeviceToHost );
// for (int i = 0; i < m*m; i++)
// cout << h_x[i] << "\n";
// cout << endl;
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment