diff --git a/include/algos.hpp b/include/algos.hpp index d6c9f0c..ba3b8d0 100644 --- a/include/algos.hpp +++ b/include/algos.hpp @@ -24,7 +24,7 @@ void bgr2gray_opencv(const cv::Mat& src, cv::Mat& dst); void boxFilter_halide(uint16_t* src, uint16_t* dst, int height, int width); void boxFilter_opencv(const cv::Mat& src, cv::Mat& dst); void ascii_art_ref(const uint8_t* src, uint8_t* dst, int height, int width); -void ascii_art_halide(uint8_t* src, uint8_t* dst, int input_height, int input_width); +void ascii_art_halide(uint8_t* src, float* dst, int input_height, int input_width); void julia_ref(uint8_t* dst, int height, int width); void halide_julia(uint8_t* dst, int height, int width); diff --git a/perf/perf_ascii.cpp b/perf/perf_ascii.cpp index a614801..4647944 100644 --- a/perf/perf_ascii.cpp +++ b/perf/perf_ascii.cpp @@ -28,10 +28,10 @@ PERF_TEST(ascii_art, halide) { const int height = 1080; const std::string grey_scale = "$@B%8&WM#*OAHKDPQWMRZO0QLCJUYXVJFT/|()1{}[]?-_+~<>i!lI;:,^`'. "; - Mat src(height, width, CV_8UC1), dst(height/ry, width/rx, CV_8U); + Mat src(height, width, CV_8UC1), dst(height/ry, width/rx, CV_32F); randu(src, 0, 256); PERF_SAMPLE_BEGIN() - ascii_art_halide(src.ptr(), dst.ptr(), src.rows, src.cols); + ascii_art_halide(src.ptr(), dst.ptr(), src.rows, src.cols); PERF_SAMPLE_END() SANITY_CHECK_NOTHING(); diff --git a/src/ASCII-ART.cpp b/src/ASCII-ART.cpp index ad090f3..6e7470c 100644 --- a/src/ASCII-ART.cpp +++ b/src/ASCII-ART.cpp @@ -22,13 +22,13 @@ static const int norm_h = 3; static const int rx = 15; static const int ry = 19; -void ascii_art_halide(uint8_t* src, uint8_t* dst, int input_height, int input_width) { +void ascii_art_halide(uint8_t* src, float* dst, int input_height, int input_width) { int output_width = input_width / rx; int output_height = input_height / ry; - - + + Buffer input(src, {input_width, input_height}); - Buffer output(dst, {input_width / rx, input_height / ry}); + Buffer output(dst, {input_width / rx, input_height / ry}); #ifdef __riscv ascii_art(input, output); #else @@ -38,17 +38,24 @@ void ascii_art_halide(uint8_t* src, uint8_t* dst, int input_height, int input_wi Var x("x"), y("y"); RDom r(0, rx, 0, ry); - Expr s = sum(cast(input(x*rx + r.x, y*ry + r.y))); - + + Func casted; + casted(x, y) = cast(input(x, y)); + + Expr s = sum(casted(x*rx + r.x, y*ry + r.y)); + //s = Halide::clamp(s/(rx*ry),0,255); - ascii(x, y) = cast(s/(rx*ry)); - // ascii.realize(output); + ascii(x, y) = s/(rx*ry); + + casted.compute_root(); + ascii.vectorize(x, 4); + // Compile Target target; target.os = Target::OS::Linux; target.arch = Target::Arch::RISCV; target.bits = 64; - //target.vector_bits = factor * sizeof(uint8_t) * 8; + target.vector_bits = 128; // Tested XuanTie C906 has 128-bit vector unit CV_Assert(target.vector_bits <= 128); @@ -76,11 +83,11 @@ void ascii_art_ref(const uint8_t* src, uint8_t* dst, int input_height, int input float lum = 0; for(int j = 0; j < ry; ++j){ for(int i = 0; i < rx; ++i){ - lum += src[ (y * ry + j) * input_width + x * rx +i]; - } + lum += src[ (y * ry + j) * input_width + x * rx +i]; + } } dst[y * output_width + x] = static_cast(lum/(rx*ry)); lum = 0; - } + } } } diff --git a/test/test_ascii.cpp b/test/test_ascii.cpp index 6af04ef..b29a16e 100644 --- a/test/test_ascii.cpp +++ b/test/test_ascii.cpp @@ -10,12 +10,10 @@ TEST(ascii_art, halide){ const std::string grey_scale = "$@B%8&WM#*OAHKDPQWMRZO0QLCJUYXVJFT/|()1{}[]?-_+~<>i!lI;:,^`'. "; cv::Mat src = imread("cat.jpeg", cv::IMREAD_GRAYSCALE); - cv::Mat dst(src.rows/ry, src.cols/rx, CV_8U), + cv::Mat dst(src.rows/ry, src.cols/rx, CV_32F), render(src.rows, src.cols, CV_8U, cv::Scalar(255)); - ascii_art_halide(src.ptr(), dst.ptr(), src.rows, src.cols); - char *s; - s = (char*)dst.ptr(); + ascii_art_halide(src.ptr(), dst.ptr(), src.rows, src.cols); std::vector> lums={}; std::vector symbols(256); float lum_min = 255; @@ -42,14 +40,16 @@ TEST(ascii_art, halide){ for(int i = 0; i < dst.rows; i++) for(int j = 0; j < dst.cols; j++){ - uint8_t lum = dst.at(i, j); - int index = (static_cast(lum)/255)*(lums.size()-1); + float lum = dst.at(i, j); + int index = (min(lum, 255.0f)/255.0f)*(lums.size()-1); + std::cout << lum << std::endl; + CV_Assert(index < lums.size()); cv::Mat roi = render.colRange(j*rx, (j+1)*rx).rowRange(i*ry, (i+1)*ry); cv::putText(roi, std::string(1, grey_scale[index]), cv::Point(1, ry-1), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0), 1, cv::LINE_AA); } - imwrite("res_cat_ascii_halide.png", render); - imwrite("src_cat_ascii_halide.png", src); - ASSERT_EQ(true, true); + // imwrite("res_cat_ascii_halide.png", render); + // imwrite("src_cat_ascii_halide.png", src); + // ASSERT_EQ(true, true); }