• Tidak ada hasil yang ditemukan

Membersihkan semua memori yang tertinggal dalam proses inpainting. 7)Tampilkan Gambar

8) Simpan Gambar

outpuinpainting.convertTo(outpuinpainting, CV_8UC1, 255.0); imwrite("outputGPU.png", outpuinpainting);

Kode Menyimpan gambar

Untuk menyimpan gambar hasil inpainting sesuai dengan format yang benar, maka data

akan dikonversi terlebih dahulu ke tipe CV_8UC1. Setelah itu diberikan parameter 255, hal

ini bertujuan sebagai penanda nilai maksimum dari matriks yang dibuat. Setelah format

tercukupi, maka data disimpan dengan bantuan perintah imwrite.

Lampiran 5. Kode Lengkap Inpainting Berbasis CPU #include <iostream> #include <iomanip> #include <chrono> #include <math.h> #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/features2d/features2d.hpp" const double PI = 3.1415; using namespace std; using namespace cv; Mat src; Mat u; Mat mask;

char window_name[] = "Cahn Hilliard Inpainting ";

Mat ComputeCurve(Mat src, int rows, int cols, int vartype); double psnr(Mat & img_src, Mat & img_compressed);

double mse(Mat & img1, Mat & img2);

double ssim(Mat & img_src, Mat & img_compressed, int block_size, bool show_progress = false);

double sigma(Mat & m, int i, int j, int block_size);

double cov(Mat & m1, Mat & m2, int i, int j, int block_size);

int main(void) {

double maxs = 0, mins = 0, maxm = 0, minm = 0, maxg = 0, ming = 0; Mat lambda, g;

int vartype = CV_64FC1;

src = imread("input.png", CV_LOAD_IMAGE_GRAYSCALE);

mask = imread("input-mask_i.png", CV_LOAD_IMAGE_GRAYSCALE); src.convertTo(src, vartype);

mask.convertTo(mask, vartype); minMaxLoc(src, &mins, &maxs); minMaxLoc(mask, &minm, &maxm); src = src / maxs;

mask = 1-mask / maxm; lambda = mask.clone(); g = src.clone();

minMaxLoc(g, &ming, &maxg); divide(g, maxg, g);

//parameter

int dt = 100;

double epsilon = 0.005;

double ep2 = pow(epsilon , 2); int lpower = 2; double lambda0 = 100; lambda = lambda.mul(lambda0); double c1 = 200; double c2 = 100; int Itermax = 2000; int rows = g.rows; int cols = g.cols;

u = g;

imshow("Lambda", lambda); imshow("Original Image", g); waitKey(1);

Mat Lambda1 = Mat::zeros(rows, rows, vartype); Mat Lambda2 = Mat::zeros(cols, cols, vartype); for (int j = 0; j<rows; j++){

Lambda1.at<double>(j, j) = 2 * (cos(2 * j * PI / rows) - 1); }

for (int j = 0; j<cols; j++){

Lambda2.at<double>(j, j) = 2 * (cos(2 * j * PI / cols) - 1); }

int h1 = 1; int h2 = 1;

Mat Denominator = 1 / (h1*h1)*(Lambda1*Mat::ones(rows, cols, vartype)) + 1 / (h2*h2)*(Mat::ones(rows, cols, vartype)*Lambda2);

Mat ubar, uabar, LUbar, LU0bar, LapCurvbar,curvbar; Mat curv,alpha,LU,tesfft,ivtes;

Mat complexLapCurvbar[] = { Mat::zeros(u.size(), vartype), Mat::zeros(u.size(), vartype) };

Mat complexubar[] = { Mat::zeros(u.size(), vartype), Mat::zeros(u.size(), vartype) };

dft(u, ubar, DFT_COMPLEX_OUTPUT);

dft(u.mul(lambda), LU0bar, DFT_COMPLEX_OUTPUT); LUbar = LU0bar.clone();

//LUbar = LU0bar;

std::chrono::time_point<std::chrono::system_clock> start, end; start = std::chrono::system_clock::now();

for (int i = 0; i<200; i++) {

curv = ComputeCurve(u, src.rows, src.cols, vartype); dft(curv, curvbar,DFT_COMPLEX_OUTPUT);

split(curvbar, complexLapCurvbar); complexLapCurvbar[0] = complexLapCurvbar[0].mul(Denominator); complexLapCurvbar[1] = complexLapCurvbar[1].mul(Denominator); merge(complexLapCurvbar, 2, LapCurvbar); alpha = 1 + c2*dt + dt*c1*Denominator.mul(Denominator); split(ubar, complexubar); complexubar[0] = complexubar[0].mul(alpha); complexubar[1] = complexubar[1].mul(alpha); merge(complexubar, 2, uabar);

ubar = uabar - dt*LapCurvbar + dt*(LU0bar - LUbar); tesfft = dt*LapCurvbar;

split(ubar, complexubar);

divide( complexubar[0],alpha, complexubar[0]); divide(complexubar[1], alpha, complexubar[1]); merge(complexubar, 2, ubar);

idft(ubar, u, DFT_SCALE | DFT_REAL_OUTPUT); // Applying IDFT

LU = u.mul(lambda);

//imshow("Cahn Hilliard high order Inpainting", u); //waitKey(1);

cout << "Iterasi - " << i + 1 << endl; }

end = std::chrono::system_clock::now(); u.convertTo(u, CV_8UC1, 255.0);

imshow("Cahn Hilliard high order Inpainting", u); imwrite("output.png", u);

std::chrono::duration<double> elapsed_seconds = end - start; time_t end_time = std::chrono::system_clock::to_time_t(end); struct tm timeinfo;

localtime_s(&timeinfo, &end_time);

cout << "Finished computation in " << put_time(&timeinfo, " %d-%m-%Y at %H:%M:%S") << endl;

cout << "Elapsed time: " << elapsed_seconds.count() << "s\n"; u.convertTo(u, CV_64FC1);

cout << "\nQuality Measurement" << endl; cout << "MSE : " << mse(g, u) << endl; cout << "PSNR: " << psnr(g, u) << endl;

cout << "SSIM: " << ssim(g, u, 8, false) << endl; waitKey(0);

return 0; }

Mat ComputeCurve(Mat src, int rows, int cols, int vartype) {

Mat result = Mat::zeros(rows, cols, vartype); Mat ux = Mat::zeros(rows, cols, vartype); double epsilon = 0.005;

double ep2 = pow(epsilon, 2); int w, n, s, e = 0;

for (int i = 0; i < rows; i++) {

for (int j = 0; j < cols; j++) { e = j + 1; w = j - 1; n = i - 1; s = i + 1; w <= 0 ? w = 0 : w = w; e>cols - 1 ? e = cols - 1 : e = e; n <= 0 ? n = 0 : n = n; s > rows - 1 ? s = rows - 1 : s = s;

double x = (src.at<double>(i, e) - src.at<double>(i, w)) / 2; double y = (src.at<double>(s, j) - src.at<double>(n, j)) / 2; double xx = src.at<double>(i, w) + src.at<double>(i, e) - 2 * src.at<double>(i, j);

double yy = src.at<double>(s, j) + src.at<double>(n, j) - 2 * src.at<double>(i, j);

double dp = src.at<double>(n, w) + src.at<double>(s, e); double dm = src.at<double>(n, e) + src.at<double>(s, w); double xy = (dp - dm) / 4;

double num = xx*(ep2 + y*y) - 2 * x*y*xy + yy*(ep2 + x*x);; double den = pow((ep2 + x*x + y*y), 1.5);

//ux.at<double>(i, j) = curve;

result.at<double>(i, j) =curve; }

}

return result; }

double psnr(Mat & img_src, Mat & img_compressed) {

int D = 255;

return (10 * log10((D*D) / mse(img_src, img_compressed))); }

double mse(Mat & img1, Mat & img2) {

int i, j; double mse = 0;

int height = img1.rows; int width = img1.cols; for (i = 0; i < height; i++) for (j = 0; j < width; j++)

mse += (img1.at<double>(i, j) - img2.at<double>(i, j)) * (img1.at<double>(i, j) - img2.at<double>(i, j));

mse /= height * width; return mse;

}

double ssim(Mat & img_src, Mat & img_compressed, int block_size, bool show_progress) {

// typicaly block 8

double ssim = 0;

float C1 = (float)(0.01 * 255 * 0.01 * 255); float C2 = (float)(0.03 * 255 * 0.03 * 255); int nbBlockPerHeight = img_src.rows / block_size; int nbBlockPerWidth = img_src.cols / block_size; for (int k = 0; k < nbBlockPerHeight; k++) {

for (int l = 0; l < nbBlockPerWidth; l++) {

int m = k * block_size; int n = l * block_size;

double avg_o = mean(img_src(Range(k, k + block_size), Range(l, l + block_size)))[0];

double avg_r = mean(img_compressed(Range(k, k + block_size), Range(l, l + block_size)))[0];

double sigma_o = sigma(img_src, m, n, block_size);

double sigma_r = sigma(img_compressed, m, n, block_size);

double sigma_ro = cov(img_src, img_compressed, m, n, block_size); ssim += ((2 * avg_o * avg_r + C1) * (2 * sigma_ro + C2)) /

((avg_o * avg_o + avg_r * avg_r + C1) * (sigma_o * sigma_o + sigma_r * sigma_r + C2)); }

// Progress

cout << "\r>>SSIM [" << (int)((((double)k) / nbBlockPerHeight) * 100) << "%]";

}

ssim /= nbBlockPerHeight * nbBlockPerWidth; if (show_progress)

{

cout << "\r>>SSIM [100%]" << endl; cout << "SSIM : " << ssim << endl; }

return ssim; }

double sigma(Mat & m, int i, int j, int block_size) {

double sd = 0;

Mat m_tmp = m(Range(i, i + block_size), Range(j, j + block_size)); Mat m_squared(block_size, block_size, CV_64F);

multiply(m_tmp, m_tmp, m_squared);

// E(x)

double avg = mean(m_tmp)[0];

// E(x²)

double avg_2 = mean(m_squared)[0];

sd = sqrt(avg_2 - avg * avg); return sd;

}

double cov(Mat & m1, Mat & m2, int i, int j, int block_size) {

Mat m3 = Mat::zeros(block_size, block_size, m1.depth());

Mat m1_tmp = m1(Range(i, i + block_size), Range(j, j + block_size)); Mat m2_tmp = m2(Range(i, i + block_size), Range(j, j + block_size));

multiply(m1_tmp, m2_tmp, m3);

double avg_ro = mean(m3)[0]; // E(XY)

double avg_r = mean(m1_tmp)[0]; // E(X)

double avg_o = mean(m2_tmp)[0]; // E(Y)

double sd_ro = avg_ro - avg_o * avg_r; // E(XY) - E(X)E(Y)

return sd_ro; }

Lampiran 6. Kode Lengkap Inpainting Pemrosesan Paralel untuk CPU #include "opencv2\core\core.hpp" #include "opencv2\opencv.hpp" #include "opencv2\highgui\highgui.hpp" #include "cuda_runtime.h" #include <stdio.h> #include <math.h> const double PI = 3.1415; using namespace std; using namespace cv;

void gpu_image2double(unsigned char *h_src, unsigned char *h_lamda, double* h_outsrc, double *h_outlamda, int cols, int rows);

void gpu_createvariabel(double *h_src, double * h_out, int cols, int rows); void gpu_inpaintinghighorder(double *h_src, double *h_lamda, double *denominator, double* h_output, double * h_curve, int iteration, float *elapsedtime, int numRows, int numCols);

void main() {

printf(" CUDA Inpainting High Order Started \n"); int vartype = CV_64FC1;

//Read Data

Mat src = imread("original.png", CV_LOAD_IMAGE_COLOR); Mat mask = imread("mask.png", CV_LOAD_IMAGE_COLOR); Mat lambda, u, g;

int rows = src.rows; int cols = src.cols;

//Init Parameter

int Itermax = 1000;

Mat Lambda1 = Mat::zeros(rows, rows, vartype); Mat Lambda2 = Mat::zeros(cols, cols, vartype); for (int j = 0; j<rows; j++){

Lambda1.at<double>(j, j) = 2 * (cos(2 *j * PI / rows) - 1); }

for (int j = 0; j<cols; j++){

Lambda2.at<double>(j, j) = 2 * (cos(2 *j * PI / cols) - 1); }

int h1 = 1; int h2 = 1;

Mat Denominator = 1 / (h1*h1)*(Lambda1*Mat::ones(rows,cols, vartype)) + 1 / (h2*h2)*(Mat::ones(rows, cols, vartype)*Lambda2);

cvtColor(src, src, CV_BGR2GRAY); cvtColor(mask, mask, CV_BGR2GRAY);

//GPU JADI

unsigned char * h_src = (unsigned char*)src.data; unsigned char * h_lamda = (unsigned char*)mask.data; double *h_denominator = (double*)Denominator.data; double *h_lambda1 = (double*)Lambda1.data;

double *h_lambda2 = (double*)Lambda2.data;

double *h_doubleSrc = (double*)malloc(sizeof(double)*src.cols*src.rows); double *h_doubleLamda = (double*)malloc(sizeof(double)*src.cols*src.rows);

double *h_out = (double*)malloc(sizeof(double)*src.cols*src.rows); double *h_curve = (double*)malloc(sizeof(double)*src.cols*src.rows); float *elapsedtime = 0;

gpu_image2double(h_src, h_lamda, h_doubleSrc, h_doubleLamda, rows, cols); gpu_inpaintinghighorder(h_doubleSrc, h_doubleLamda, h_denominator,

h_out,h_curve, 2000, elapsedtime, rows,cols);

Mat outputsrc(src.rows, src.cols, vartype, h_doubleSrc); Mat outpuinpainting(src.rows, src.cols, vartype, h_out); Mat outputcurve(src.rows, src.cols, vartype, h_curve); imshow("lambda asli", mask);

printf("Print Inpainting 10 first data\n"); for (int j = 0; j < 10; j++)

{

cout << outpuinpainting.at<double>(0, j)<<" "; }

printf("\n\nPrint Src 10 first data\n"); for (int j = 0; j < 10; j++)

{

cout << outputsrc.at<double>(0, j) << " "; }

printf("\n\nPrint Curve 10 first data\n"); for (int j = 0; j < 10; j++)

{

cout << outputcurve.at<double>(0, j) << " "; }

imshow("soruce", outputsrc);

imshow("inpainting", outpuinpainting);

outpuinpainting.convertTo(outpuinpainting, CV_8UC1, 255.0); imwrite("outputGPU.png", outpuinpainting);

imshow("Curve", outputcurve);

//cout << "Elapsed time: " << elapsedtime << "s\n";

waitKey(); }

Lampiran 7. Kode Lengkap Inpainting Pemrosesan Paralel GPU #include "cuda_runtime.h" #include "cufft.h" #include "utils.h" #include <stdio.h> #include <algorithm> #include "assert.h" __constant__ int dt = 100;

__constant__ double epsilon = 0.005;

__constant__ double ep2 =0.000025; //ep2 = epsilon^2;

//__constant__ int lpower = 2;

__constant__ double lambda0 = 100; //lambda0 = 10 ^ lpower;

__constant__ double c1 = 200; //1 / epsilon;

__constant__ double c2 = 100; //c2 = lambda0;

__global__

void maxreduce(double * d_in, double * d_out, int numRows, int numCols) {

extern __shared__ double sdata[];

// each thread loads one element from global to shared mem

unsigned int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

unsigned int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)) {

return; }

unsigned int tid = threadIdx.x;

unsigned int idx = col + numCols * row; // current pixel index

sdata[tid] = d_in[idx]; __syncthreads();

// do reduction in shared mem

for

(unsigned int s = 1; s < blockDim.x; s *= 2) { if (tid % (2 * s) == 0) {

sdata[tid] = max(sdata[tid], sdata[tid + s]);

//sdata[tid] =sdata[tid]+sdata[tid + s];sum

}

__syncthreads(); }

// write result for this block to global mem

if (tid == 0) d_out[blockIdx.x] = sdata[0]; }

__global__

void Convert2double(unsigned char* const inputChannel, double* const outputChannel, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

int idx = col + numCols * row; // current pixel index

if ((row >= numRows) || (col >= numCols)) {

return; }

double intensity = inputChannel[idx]; outputChannel[idx] = intensity / 255; }

void computelubarcomplex(double *u, double* lambda, cufftDoubleComplex *clubar, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

int index = col + numCols * row; // current pixel index

double value = 0;

value = u[index] * lambda[index]; clubar[index].x =value;

clubar[index].y = 0.0f; }

__global__

void computeLU(double *u, double* lambda, cufftDoubleComplex* lubar, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

double value = 0;

int index = col + numCols * row; // current pixel index

value = u[index] * lambda[index]; lubar[index].x= value;

lubar[index].y = 0; }

__global__

void computelapcurvbar(cufftDoubleComplex * curve, double *Denominator, cufftDoubleComplex * result, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

double x,y = 0;

int index = col + numCols * row; // current pixel index

x = curve[index].x * Denominator[index]; y = curve[index].y * Denominator[index]; result[index].x = x; result[index].y = y; } __global__

void copyData(cufftDoubleComplex * lu0bar, cufftDoubleComplex * lubar, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

int index = col + numCols * row; cufftDoubleComplex temp;

temp = lu0bar[index]; lubar[index] = temp; }

__global__

void Inverse2double(unsigned char* const inputChannel, double* const outputChannel, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

int idx = col + numCols * row; // current pixel index

if ((row >= numRows) || (col >= numCols)) {

return; }

double intensity = inputChannel[idx];

outputChannel[idx] = (1 - intensity / 255) * lambda0; //lambda=lambda *. lambda0;

}

__global__ void real2complex(double * in, cufftDoubleComplex *out, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

int index = col + numCols * row; out[index].x = in[index];

out[index].y = 0.0f; }

__global__ void complex2real(cufftDoubleComplex *in, double * out, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

int index = col + numCols * row;

double result = in[index].x / ((double)numRows*(double)numCols); out[index] = result;

}

static __device__ __host__ inline cufftDoubleComplex ComplexScale(cufftDoubleComplex a, double s) { cufftDoubleComplex c; c.x = s * a.x; c.y = s * a.y; return c; }

static __device__ __host__ inline cufftDoubleComplex ComplexSub(cufftDoubleComplex a, cufftDoubleComplex b)

{

cufftDoubleComplex c; c.x = a.x - b.x; c.y = a.y - b.y; return c;

}

static __device__ __host__ inline cufftDoubleComplex ComplexAdd(cufftDoubleComplex a, cufftDoubleComplex b)

{

cufftDoubleComplex c; c.x = a.x + b.x; c.y = a.y + b.y; return c;

static __device__ __host__ inline cufftDoubleComplex ComplexScaleDiv(cufftDoubleComplex a, double s) { cufftDoubleComplex c; c.x = a.x/s; c.y = a.y/s; return c; } __global__

void ComputeCurve(double *u, cufftDoubleComplex *curv, double *cur, int size, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

int index = col + numCols * row; int idown = index + numCols; int iup = index - numCols; int iright = index + 1; int ileft = index - 1;

////for diagonal

int iupleft = iup - 1; int iupright = iup + 1; int idownleft = idown - 1; int idownright = idown + 1;

//menangani pojok

iup<0 ? iup = index : iup = iup;

ileft < 0 ? ileft = index : ileft = ileft; idown >= size ? idown = index : idown = idown; iright >= size ? iright = index : iright = iright;

////menangani pojok if (row == 0){ iupleft = ileft; iupright = iright; ////idownleft = idown; }

else if (row == numRows - 1){ idownleft = ileft; idownright = iright; }

////supaya 0,0 mengikuti colom

if (col == 0){

ileft = index; //menangani selisih kanan kiri

iupleft = iup; //menangani diagonal pojok

idownleft = idown; }

else if (col == numCols - 1){ iright = index;

iupright = iup; idownright = idown; }

double x = (u[ileft] - u[iright])/2; double y = (u[idown] - u[iup])/2;

double xx = u[ileft] + u[iright] - 2 * u[index]; double yy = u[iup] + u[idown] - 2 * u[index]; double Dp = u[iupleft] + u[idownright]; double Dm = u[iupright] + u[idownleft]; double xy = (Dp - Dm) / 4;

double Num = xx * (ep2 + y * y) - 2 * x * y * xy + yy * (ep2 + x * x); double Den = powf((ep2 + x * x + y * y), 1.5);

double Curv = Num / Den;

//Curv complex format

curv[index].x = Curv; curv[index].y = 0; cur[index] = Curv; __syncthreads(); } __global__

void normalize(cufftDoubleComplex *din, double * dout, int numRows, int numCols) {

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

int index = col + numCols * row;

double value = din[index].x/(numCols*numRows); dout[index] = value;

}

__global__

void CahnHilliardHighOrder(double * ubar, double * Denominator, double * lapcurvbar, double * lu0bar, double * lubar, double *newubar, int numRows, int numCols)

{

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

int index = col + numCols * row; newubar[index] = ((1 + c2*dt +

dt*c1*Denominator[index]*Denominator[index])*ubar[index] - dt*lapcurvbar[index] + dt*(lu0bar[index] - lubar[index]))/ (1 + dt*c2 + dt*c1*Denominator[index] * Denominator[index]);

}

__global__

void CahnHilliardHighOrderComplex(cufftDoubleComplex * ubar, double * Denominator, cufftDoubleComplex * lapcurvbar, cufftDoubleComplex * lu0bar, cufftDoubleComplex * lubar, cufftDoubleComplex *newubar, cufftDoubleComplex *temp, int numRows, int numCols)

{

double dt = 100; double c1 = 200; double c2 = 100;

int row = blockIdx.y * blockDim.y + threadIdx.y; // current row

int col = blockIdx.x * blockDim.x + threadIdx.x; // current column

if ((row >= numRows) || (col >= numCols)){ return;

}

int index = col + numCols * row;

double alpha = 1 + c2*dt + dt*c1*(Denominator[index] * Denominator[index]); cufftDoubleComplex result123 = ComplexSub(lubar[index], lu0bar[index]);

cufftDoubleComplex result = ComplexSub(ComplexScale(ubar[index], alpha), ComplexAdd(ComplexScale(lapcurvbar[index], dt), ComplexScale(ComplexSub(lubar[index], lu0bar[index] ), dt)));

result = ComplexScaleDiv(result, alpha); newubar[index] = result;

temp[index] = result123; }

void gpu_image2double(unsigned char *h_src, unsigned char *h_lamda, double* h_outsrc, double *h_outlamda, int numRows, int numCols)

{

double *d_outlamda, *d_outsrc; unsigned char *d_src, *d_lamda; int size = numRows*numCols; const dim3 blockSize(32, 32, 1);

const dim3 gridSize((numCols / 32) + 1, (numRows / 32) + 1, 1); checkCudaErrors(cudaMalloc(&d_src, sizeof(unsigned char)* size)); checkCudaErrors(cudaMalloc(&d_lamda, sizeof(unsigned char)* size)); checkCudaErrors(cudaMalloc(&d_outlamda, sizeof(double)* size)); checkCudaErrors(cudaMalloc(&d_outsrc, sizeof(double)* size));

checkCudaErrors(cudaMemcpy(d_src, h_src, sizeof(unsigned char)*size,

cudaMemcpyHostToDevice));

checkCudaErrors(cudaMemcpy(d_lamda, h_lamda, sizeof(unsigned char)*size,

cudaMemcpyHostToDevice));

checkCudaErrors(cudaMemset(d_outlamda, 0, sizeof(double)*size)); checkCudaErrors(cudaMemset(d_outsrc, 0, sizeof(double)*size));

Convert2double << <gridSize, blockSize >> >(d_src, d_outsrc, numRows, numCols); Inverse2double << <gridSize, blockSize >> >(d_lamda, d_outlamda, numRows, numCols);

checkCudaErrors(cudaMemcpy(h_outsrc, d_outsrc, sizeof(double)* size,

cudaMemcpyDeviceToHost));

checkCudaErrors(cudaMemcpy(h_outlamda, d_outlamda, sizeof(double)* size,

cudaMemcpyDeviceToHost)); cudaFree(d_src); cudaFree(d_lamda); cudaFree(d_outlamda); cudaFree(d_outsrc); }

void gpu_inpaintinghighorder(double *h_src, double *h_lamda, double *h_denominator, double* h_output, double* h_curve, int iteration, float *elapsedTime, int numRows, int numCols)

{

const dim3 blockSize(32, 32, 1);

const dim3 gridSize((numCols / 32) + 1, (numRows / 32) + 1, 1); int sizedata = numRows*numCols;

int sizedouble = sizeof(cufftDoubleReal)*sizedata; int sizecomplex = sizeof(cufftDoubleComplex)*sizedata; cufftDoubleReal * d_u, *d_lambda, *d_denominator, *d_output; cufftDoubleReal * ux, *uy, *uxx, *uyy, *uxy, *dp, *dm; cufftDoubleReal *LUbar, *curve, *result;

cufftDoubleComplex *cu, *cubar, *cLU0bar, *cLU, *cLUbar, *ccurve, *ccurvebar, *lapcurvbar, *cresult, *cresult2;

cudaEventCreate(&start); cudaEventCreate(&stop); checkCudaErrors(cudaMalloc(&d_u, sizedouble)); checkCudaErrors(cudaMalloc(&result, sizedouble)); checkCudaErrors(cudaMalloc(&d_output, sizedouble)); checkCudaErrors(cudaMalloc(&d_lambda, sizedouble)); checkCudaErrors(cudaMalloc(&d_denominator, sizedouble)); checkCudaErrors(cudaMalloc(&ux, sizedouble)); checkCudaErrors(cudaMalloc(&uy, sizedouble)); checkCudaErrors(cudaMalloc(&uxx, sizedouble)); checkCudaErrors(cudaMalloc(&uyy, sizedouble)); checkCudaErrors(cudaMalloc(&dp, sizedouble)); checkCudaErrors(cudaMalloc(&dm, sizedouble)); checkCudaErrors(cudaMalloc(&uxy, sizedouble)); checkCudaErrors(cudaMalloc(&curve, sizedouble)); checkCudaErrors(cudaMalloc(&LUbar, sizedouble)); checkCudaErrors(cudaMalloc(&cubar, sizecomplex)); checkCudaErrors(cudaMalloc(&cu, sizecomplex)); checkCudaErrors(cudaMalloc(&cLU0bar, sizecomplex)); checkCudaErrors(cudaMalloc(&cLU, sizecomplex)); checkCudaErrors(cudaMalloc(&cLUbar, sizecomplex)); checkCudaErrors(cudaMalloc(&ccurve, sizecomplex)); checkCudaErrors(cudaMalloc(&ccurvebar, sizecomplex)); checkCudaErrors(cudaMalloc(&lapcurvbar, sizecomplex)); checkCudaErrors(cudaMalloc(&cresult, sizecomplex)); checkCudaErrors(cudaMalloc(&cresult2, sizecomplex));

checkCudaErrors(cudaMemcpy(d_u, h_src, sizedouble, cudaMemcpyHostToDevice)); checkCudaErrors(cudaMemcpy(d_lambda, h_lamda, sizedouble,

cudaMemcpyHostToDevice));

checkCudaErrors(cudaMemcpy(d_denominator, h_denominator, sizedouble,

cudaMemcpyHostToDevice)); cufftHandle pFFT; cufftHandle pIFFT;

if (cufftPlan2d(&pFFT,numRows,numCols, CUFFT_Z2Z) != CUFFT_SUCCESS){ fprintf(stderr, "CUFFT error:01.FFT Plan creation Real 2 Complex failed");

return; }

if (cufftPlan2d(&pIFFT,numRows,numCols, CUFFT_Z2Z) != CUFFT_SUCCESS){ fprintf(stderr, "CUFFT error:02.IFFT Plan creation Complex 2 Real failed");

return; }

computeLU << <gridSize, blockSize >> >(d_u, d_lambda, cLU, numRows, numCols); if (cufftExecZ2Z(pFFT, cLU, cLUbar, CUFFT_FORWARD) != CUFFT_SUCCESS){

fprintf(stderr, "CUFFT error: Exec FFT lubar to Complex lubar failed"); return;

}

copyData << <gridSize, blockSize >> >(cLUbar, cLU0bar, numRows, numCols);

//lu0bar copy!!!!

if (cufftExecZ2Z(pFFT, cu, cubar, CUFFT_FORWARD) != CUFFT_SUCCESS){ fprintf(stderr, "CUFFT error: Exec FFT u to Complex ubar failed"); return;

}

cudaEventRecord(start);

for (int i = 0; i < iteration; i++) {

ComputeCurve << < gridSize, blockSize >> >(d_u, ccurve,curve, sizedata, numRows, numCols);

if (cufftExecZ2Z(pFFT, ccurve, ccurvebar, CUFFT_FORWARD) !=

CUFFT_SUCCESS){

fprintf(stderr, "CUFFT error: Exec FFT curv to Complex failed"); return;

}

computelapcurvbar << <gridSize, blockSize >> >(ccurvebar, d_denominator, lapcurvbar, numRows, numCols); //lapcurvbar = Denominator.*fft2(curv);

//lapruvbar agak beda sedikit, karena nilai 0nya.

CahnHilliardHighOrderComplex << <gridSize, blockSize >> >(cubar, d_denominator, lapcurvbar, cLU0bar, cLUbar, cresult,cresult2, numRows, numCols);

if (cufftExecZ2Z(pIFFT, cresult, cu, CUFFT_INVERSE) != CUFFT_SUCCESS){

//ifft

fprintf(stderr, "CUFFT error: Exec IFFT curv to Complex failed"); return;

}

copyData << <gridSize, blockSize >> >(cresult, cubar, numRows, numCols);

//lu0bar copy!!!!

complex2real << <gridSize, blockSize >> >(cu, d_u, numRows, numCols); computeLU << <gridSize, blockSize >> >(d_u, d_lambda, cLU, numRows, numCols); //new LU

if (cufftExecZ2Z(pFFT, cLU, cLUbar, CUFFT_FORWARD) != CUFFT_SUCCESS){

//LUbar

fprintf(stderr, "CUFFT error: Exec IFFT lubar to Complex failed");

return; }

}

if (cufftExecZ2Z(pIFFT, cresult2, cresult2, CUFFT_INVERSE) != CUFFT_SUCCESS){ fprintf(stderr, "CUFFT error: Exec IFFT u to Complex result failed"); return;

};

cudaEventRecord(stop);

normalize << <gridSize, blockSize >> >(cresult2, curve, numRows, numCols); checkCudaErrors(cudaMemcpy(h_output, d_u, sizedouble, cudaMemcpyDeviceToHost)); checkCudaErrors(cudaMemcpy(h_curve, curve, sizedouble,

cudaMemcpyDeviceToHost));

cudaEventSynchronize(stop); float elapsedTime1 = 0;

cudaEventElapsedTime(&elapsedTime1, start, stop); printf("GPU: %f ms", elapsedTime1);

cufftDestroy(pFFT); cufftDestroy(pIFFT);

cudaFree(d_u); cudaFree(d_lambda);

cudaFree(d_denominator); cudaFree(d_output); cudaFree(cubar); cudaFree(LUbar); cudaFree(cLUbar); cudaFree(cLU0bar); cudaFree(curve); cudaFree(ccurve); cudaFree(lapcurvbar); cudaFree(cresult); }

Dokumen terkait