From c049115ea477581d30328ea43489269d3c928787 Mon Sep 17 00:00:00 2001 From: zoziha Date: Fri, 1 Sep 2023 14:03:43 +0800 Subject: [PATCH 1/8] reduce the buffer size in getline --- src/fpm_filesystem.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpm_filesystem.F90 b/src/fpm_filesystem.F90 index 177ee85fea..840ffc139c 100644 --- a/src/fpm_filesystem.F90 +++ b/src/fpm_filesystem.F90 @@ -678,7 +678,7 @@ subroutine getline(unit, line, iostat, iomsg) !> Error message character(len=:), allocatable, optional :: iomsg - integer, parameter :: BUFFER_SIZE = 32768 + integer, parameter :: BUFFER_SIZE = 1024 character(len=BUFFER_SIZE) :: buffer character(len=256) :: msg integer :: size From a6da02b5b3541ee05275508027515cc2e6957f2d Mon Sep 17 00:00:00 2001 From: zoziha Date: Fri, 1 Sep 2023 19:52:49 +0800 Subject: [PATCH 2/8] improve read_lines: use binary reading --- src/fpm_filesystem.F90 | 53 +++++++++++++++++++++++++++++++------- src/fpm_source_parsing.f90 | 4 +-- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/src/fpm_filesystem.F90 b/src/fpm_filesystem.F90 index 840ffc139c..b3bf67b58c 100644 --- a/src/fpm_filesystem.F90 +++ b/src/fpm_filesystem.F90 @@ -2,6 +2,7 @@ !! module fpm_filesystem use,intrinsic :: iso_fortran_env, only : stdin=>input_unit, stdout=>output_unit, stderr=>error_unit + use,intrinsic :: iso_c_binding, only: c_new_line use fpm_environment, only: get_os_type, & OS_UNKNOWN, OS_LINUX, OS_MACOS, OS_WINDOWS, & OS_CYGWIN, OS_SOLARIS, OS_FREEBSD, OS_OPENBSD @@ -50,6 +51,8 @@ end function c_is_dir end interface #endif + integer, parameter :: max_line = 100000 !! maximum number of lines in a text file + contains !> Extract filename from path with/without suffix @@ -307,13 +310,27 @@ function read_lines_expanded(fh) result(lines) type(string_t), allocatable :: lines(:) integer :: i - integer :: iostat - character(len=:),allocatable :: line_buffer_read + integer :: length, count + character(len=:), allocatable :: content + integer, save :: idx(max_line) = 1 + + inquire (fh, size=length) + allocate (character(len=length) :: content) + + ! read file into a single string + read (fh) content + count = 0 + do i = 1, length + if (content(i:i) == c_new_line) then + count = count + 1 + idx(count + 1) = i + 1 + end if + end do - allocate(lines(number_of_rows(fh))) - do i = 1, size(lines) - call getline(fh, line_buffer_read, iostat) - lines(i)%s = dilate(line_buffer_read) + ! allocate lines from file content string + allocate (lines(count)) + do i = 1, count + allocate(lines(i)%s, source=dilate(content(idx(i):idx(i + 1) - 1))) end do end function read_lines_expanded @@ -324,11 +341,27 @@ function read_lines(fh) result(lines) type(string_t), allocatable :: lines(:) integer :: i - integer :: iostat + integer :: length, count + character(len=:), allocatable :: content + integer, save :: idx(max_line) = 1 + + inquire (fh, size=length) + allocate (character(len=length) :: content) + + ! read file into a single string + read (fh) content + count = 0 + do i = 1, length + if (content(i:i) == c_new_line) then + count = count + 1 + idx(count + 1) = i + 1 + end if + end do - allocate(lines(number_of_rows(fh))) - do i = 1, size(lines) - call getline(fh, lines(i)%s, iostat) + ! allocate lines from file content string + allocate (lines(count)) + do i = 1, count + allocate(lines(i)%s, source=content(idx(i):idx(i + 1) - 1)) end do end function read_lines diff --git a/src/fpm_source_parsing.f90 b/src/fpm_source_parsing.f90 index f303a1c2cf..36f2d58ce2 100644 --- a/src/fpm_source_parsing.f90 +++ b/src/fpm_source_parsing.f90 @@ -82,7 +82,7 @@ function parse_f_source(f_filename,error) result(f_source) f_source%file_name = f_filename - open(newunit=fh,file=f_filename,status='old') + open(newunit=fh,file=f_filename,status='old',access="stream",form="unformatted") file_lines = read_lines_expanded(fh) close(fh) @@ -427,7 +427,7 @@ function parse_c_source(c_filename,error) result(c_source) allocate(c_source%modules_provided(0)) allocate(c_source%parent_modules(0)) - open(newunit=fh,file=c_filename,status='old') + open(newunit=fh,file=c_filename,status='old',access="stream",form="unformatted") file_lines = read_lines(fh) close(fh) From ea84821ea5ecbe6ff4c9121b189903446ea96f9a Mon Sep 17 00:00:00 2001 From: zoziha Date: Fri, 1 Sep 2023 20:17:53 +0800 Subject: [PATCH 3/8] fix read_lines in list_files --- src/fpm_filesystem.F90 | 2 +- src/fpm_source_parsing.f90 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fpm_filesystem.F90 b/src/fpm_filesystem.F90 index b3bf67b58c..9a278e8c76 100644 --- a/src/fpm_filesystem.F90 +++ b/src/fpm_filesystem.F90 @@ -513,7 +513,7 @@ recursive subroutine list_files(dir, files, recurse) call fpm_stop(2,'*list_files*:directory listing failed') end if - open (newunit=fh, file=temp_file, status='old') + open (newunit=fh, file=temp_file, status='old',access='stream',form='unformatted') files = read_lines(fh) close(fh,status="delete") diff --git a/src/fpm_source_parsing.f90 b/src/fpm_source_parsing.f90 index 36f2d58ce2..3f83327386 100644 --- a/src/fpm_source_parsing.f90 +++ b/src/fpm_source_parsing.f90 @@ -82,7 +82,7 @@ function parse_f_source(f_filename,error) result(f_source) f_source%file_name = f_filename - open(newunit=fh,file=f_filename,status='old',access="stream",form="unformatted") + open(newunit=fh,file=f_filename,status='old',access='stream',form='unformatted') file_lines = read_lines_expanded(fh) close(fh) @@ -427,7 +427,7 @@ function parse_c_source(c_filename,error) result(c_source) allocate(c_source%modules_provided(0)) allocate(c_source%parent_modules(0)) - open(newunit=fh,file=c_filename,status='old',access="stream",form="unformatted") + open(newunit=fh,file=c_filename,status='old',access='stream',form='unformatted') file_lines = read_lines(fh) close(fh) From 1feebe62fb54ec3e74638790862a66f035e52d03 Mon Sep 17 00:00:00 2001 From: zoziha Date: Fri, 1 Sep 2023 23:45:39 +0800 Subject: [PATCH 4/8] read_lines uses the same static array idx --- src/fpm_filesystem.F90 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fpm_filesystem.F90 b/src/fpm_filesystem.F90 index 9a278e8c76..03823a1911 100644 --- a/src/fpm_filesystem.F90 +++ b/src/fpm_filesystem.F90 @@ -52,6 +52,7 @@ end function c_is_dir #endif integer, parameter :: max_line = 100000 !! maximum number of lines in a text file + integer :: idx(max_line) = 1 !! indexes for read_lines contains @@ -312,7 +313,6 @@ function read_lines_expanded(fh) result(lines) integer :: i integer :: length, count character(len=:), allocatable :: content - integer, save :: idx(max_line) = 1 inquire (fh, size=length) allocate (character(len=length) :: content) @@ -343,7 +343,6 @@ function read_lines(fh) result(lines) integer :: i integer :: length, count character(len=:), allocatable :: content - integer, save :: idx(max_line) = 1 inquire (fh, size=length) allocate (character(len=length) :: content) From c0b8643db7d5d3e96757658445f5635d8e15ffbc Mon Sep 17 00:00:00 2001 From: zoziha Date: Tue, 5 Sep 2023 16:38:41 +0800 Subject: [PATCH 5/8] fix CRLF --- src/fpm_strings.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpm_strings.f90 b/src/fpm_strings.f90 index 55f57537e5..47eb1e1a40 100644 --- a/src/fpm_strings.f90 +++ b/src/fpm_strings.f90 @@ -1269,7 +1269,7 @@ subroutine remove_newline_characters(string) integer :: feed,length - character(*), parameter :: CRLF = new_line('a')//achar(13) + character(*), parameter :: CRLF = achar(13)//new_line('a') character(*), parameter :: SPACE = ' ' call remove_characters_in_set(string%s,set=CRLF,replace_with=SPACE) From 92b6e5083f41cdecbdd950b0ee8f813fdfaf7f16 Mon Sep 17 00:00:00 2001 From: zoziha Date: Tue, 5 Sep 2023 16:39:25 +0800 Subject: [PATCH 6/8] add split_first_last --- src/fpm_strings.f90 | 71 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/src/fpm_strings.f90 b/src/fpm_strings.f90 index 47eb1e1a40..58f14f8cef 100644 --- a/src/fpm_strings.f90 +++ b/src/fpm_strings.f90 @@ -13,6 +13,8 @@ !! - [[LOWER]] Changes a string to lowercase over optional specified column range !!### Parsing and joining !! - [[SPLIT]] parse string on delimiter characters and store tokens into an allocatable array +!! - [[SPLIT_FIRST_LAST]] Computes the first and last indices of tokens in input string, delimited by the characters in set, +!! and stores them into first and last output arrays. !! - [[STRING_CAT]] Concatenate an array of **type(string_t)** into a single **CHARACTER** variable !! - [[JOIN]] append an array of **CHARACTER** variables into a single **CHARACTER** variable !!### Testing @@ -40,7 +42,7 @@ module fpm_strings implicit none private -public :: f_string, lower, split, str_ends_with, string_t, str_begins_with_str +public :: f_string, lower, split, split_first_last, str_ends_with, string_t, str_begins_with_str public :: to_fortran_name, is_fortran_name public :: string_array_contains, string_cat, len_trim, operator(.in.), fnv_1a public :: replace, resize, str, join, glob @@ -463,6 +465,73 @@ subroutine split(input_line,array,delimiters,order,nulls) enddo end subroutine split +!! Author: Milan Curcic +!! Computes the first and last indices of tokens in input string, delimited +!! by the characters in set, and stores them into first and last output +!! arrays. +pure subroutine split_first_last(string, set, first, last) + character(*), intent(in) :: string + character(*), intent(in) :: set + integer, allocatable, intent(out) :: first(:) + integer, allocatable, intent(out) :: last(:) + + integer, dimension(len(string) + 1) :: istart, iend + integer :: p, n, slen + + slen = len(string) + + n = 0 + if (slen > 0) then + p = 0 + do while (p < slen) + n = n + 1 + istart(n) = min(p + 1, slen) + call split_pos(string, set, p) + iend(n) = p - 1 + end do + end if + + first = istart(:n) + last = iend(:n) + +end subroutine split_first_last + +!! Author: Milan Curcic +!! If back is absent, computes the leftmost token delimiter in string whose +!! position is > pos. If back is present and true, computes the rightmost +!! token delimiter in string whose position is < pos. The result is stored +!! in pos. +pure subroutine split_pos(string, set, pos, back) + character(*), intent(in) :: string + character(*), intent(in) :: set + integer, intent(in out) :: pos + logical, intent(in), optional :: back + + logical :: backward + integer :: result_pos, bound + + if (len(string) == 0) then + pos = 1 + return + end if + + !TODO use optval when implemented in stdlib + !backward = optval(back, .false.) + backward = .false. + if (present(back)) backward = back + + if (backward) then + bound = min(len(string), max(pos - 1, 0)) + result_pos = scan(string(:bound), set, back=.true.) + else + result_pos = scan(string(min(pos + 1, len(string)):), set) + pos + if (result_pos < pos + 1) result_pos = len(string) + 1 + end if + + pos = result_pos + +end subroutine split_pos + !> Returns string with characters in charset replaced with target_char. pure function replace(string, charset, target_char) result(res) character(*), intent(in) :: string From a16f4b58e83474ba26ec34d55a8cdcef82e14a36 Mon Sep 17 00:00:00 2001 From: zoziha Date: Tue, 5 Sep 2023 16:46:29 +0800 Subject: [PATCH 7/8] fix read_lines --- src/fpm_filesystem.F90 | 49 ++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/src/fpm_filesystem.F90 b/src/fpm_filesystem.F90 index 03823a1911..0c96efcf70 100644 --- a/src/fpm_filesystem.F90 +++ b/src/fpm_filesystem.F90 @@ -7,7 +7,7 @@ module fpm_filesystem OS_UNKNOWN, OS_LINUX, OS_MACOS, OS_WINDOWS, & OS_CYGWIN, OS_SOLARIS, OS_FREEBSD, OS_OPENBSD use fpm_environment, only: separator, get_env, os_is_unix - use fpm_strings, only: f_string, replace, string_t, split, dilate, str_begins_with_str + use fpm_strings, only: f_string, replace, string_t, split, split_first_last, dilate, str_begins_with_str use iso_c_binding, only: c_char, c_ptr, c_int, c_null_char, c_associated, c_f_pointer use fpm_error, only : fpm_stop, error_t, fatal_error implicit none @@ -51,9 +51,6 @@ end function c_is_dir end interface #endif - integer, parameter :: max_line = 100000 !! maximum number of lines in a text file - integer :: idx(max_line) = 1 !! indexes for read_lines - contains !> Extract filename from path with/without suffix @@ -310,27 +307,25 @@ function read_lines_expanded(fh) result(lines) integer, intent(in) :: fh type(string_t), allocatable :: lines(:) - integer :: i - integer :: length, count + integer :: i, length character(len=:), allocatable :: content + integer, allocatable :: first(:), last(:) inquire (fh, size=length) allocate (character(len=length) :: content) + if (length == 0) then + allocate (lines(0)) + return + end if ! read file into a single string read (fh) content - count = 0 - do i = 1, length - if (content(i:i) == c_new_line) then - count = count + 1 - idx(count + 1) = i + 1 - end if - end do + call split_first_last(content, c_new_line, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows) ! allocate lines from file content string - allocate (lines(count)) - do i = 1, count - allocate(lines(i)%s, source=dilate(content(idx(i):idx(i + 1) - 1))) + allocate (lines(size(first))) + do i = 1, size(first) + allocate(lines(i)%s, source=dilate(content(first(i):last(i)))) end do end function read_lines_expanded @@ -340,27 +335,25 @@ function read_lines(fh) result(lines) integer, intent(in) :: fh type(string_t), allocatable :: lines(:) - integer :: i - integer :: length, count + integer :: i, length character(len=:), allocatable :: content + integer, allocatable :: first(:), last(:) inquire (fh, size=length) allocate (character(len=length) :: content) + if (length == 0) then + allocate (lines(0)) + return + end if ! read file into a single string read (fh) content - count = 0 - do i = 1, length - if (content(i:i) == c_new_line) then - count = count + 1 - idx(count + 1) = i + 1 - end if - end do + call split_first_last(content, c_new_line, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows) ! allocate lines from file content string - allocate (lines(count)) - do i = 1, count - allocate(lines(i)%s, source=content(idx(i):idx(i + 1) - 1)) + allocate (lines(size(first))) + do i = 1, size(first) + allocate(lines(i)%s, source=content(first(i):last(i))) end do end function read_lines From 067cc3c37b9777fbb5cd40a1a35104cd970e2188 Mon Sep 17 00:00:00 2001 From: zoziha Date: Wed, 20 Dec 2023 01:30:41 +0800 Subject: [PATCH 8/8] add read_text_file --- src/fpm_filesystem.F90 | 53 +++++++++++++++++++++++--------------- src/fpm_source_parsing.f90 | 8 ++---- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/src/fpm_filesystem.F90 b/src/fpm_filesystem.F90 index 0c96efcf70..c7b72c965d 100644 --- a/src/fpm_filesystem.F90 +++ b/src/fpm_filesystem.F90 @@ -51,6 +51,8 @@ end function c_is_dir end interface #endif + character(*), parameter :: eol = new_line('a') !! End of line + contains !> Extract filename from path with/without suffix @@ -303,24 +305,21 @@ integer function number_of_rows(s) result(nrows) end function number_of_rows !> read lines into an array of TYPE(STRING_T) variables expanding tabs -function read_lines_expanded(fh) result(lines) - integer, intent(in) :: fh +function read_lines_expanded(filename) result(lines) + character(len=*), intent(in) :: filename type(string_t), allocatable :: lines(:) - integer :: i, length + integer :: i character(len=:), allocatable :: content integer, allocatable :: first(:), last(:) - inquire (fh, size=length) - allocate (character(len=length) :: content) - if (length == 0) then + content = read_text_file(filename) + if (len(content) == 0) then allocate (lines(0)) return end if - ! read file into a single string - read (fh) content - call split_first_last(content, c_new_line, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows) + call split_first_last(content, eol, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows) ! allocate lines from file content string allocate (lines(size(first))) @@ -331,24 +330,21 @@ function read_lines_expanded(fh) result(lines) end function read_lines_expanded !> read lines into an array of TYPE(STRING_T) variables -function read_lines(fh) result(lines) - integer, intent(in) :: fh +function read_lines(filename) result(lines) + character(len=*), intent(in) :: filename type(string_t), allocatable :: lines(:) - integer :: i, length + integer :: i character(len=:), allocatable :: content integer, allocatable :: first(:), last(:) - inquire (fh, size=length) - allocate (character(len=length) :: content) - if (length == 0) then + content = read_text_file(filename) + if (len(content) == 0) then allocate (lines(0)) return end if - ! read file into a single string - read (fh) content - call split_first_last(content, c_new_line, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows) + call split_first_last(content, eol, first, last) ! TODO: \r (< macOS X), \n (>=macOS X/Linux/Unix), \r\n (Windows) ! allocate lines from file content string allocate (lines(size(first))) @@ -358,6 +354,22 @@ function read_lines(fh) result(lines) end function read_lines +!> read text file into a string +function read_text_file(filename) result(string) + character(len=*), intent(in) :: filename + character(len=:), allocatable :: string + integer :: fh, length + + open (newunit=fh, file=filename, status='old', action='read', & + access='stream', form='unformatted') + inquire (fh, size=length) + allocate (character(len=length) :: string) + if (length == 0) return + read (fh) string + close (fh) + +end function read_text_file + !> Create a directory. Create subdirectories as needed subroutine mkdir(dir, echo) character(len=*), intent(in) :: dir @@ -505,9 +517,8 @@ recursive subroutine list_files(dir, files, recurse) call fpm_stop(2,'*list_files*:directory listing failed') end if - open (newunit=fh, file=temp_file, status='old',access='stream',form='unformatted') - files = read_lines(fh) - close(fh,status="delete") + files = read_lines(temp_file) + call delete_file(temp_file) do i=1,size(files) files(i)%s = join_path(dir,files(i)%s) diff --git a/src/fpm_source_parsing.f90 b/src/fpm_source_parsing.f90 index 3f83327386..59f8fd4d33 100644 --- a/src/fpm_source_parsing.f90 +++ b/src/fpm_source_parsing.f90 @@ -82,9 +82,7 @@ function parse_f_source(f_filename,error) result(f_source) f_source%file_name = f_filename - open(newunit=fh,file=f_filename,status='old',access='stream',form='unformatted') - file_lines = read_lines_expanded(fh) - close(fh) + file_lines = read_lines_expanded(f_filename) ! for efficiency in parsing make a lowercase left-adjusted copy of the file ! Need a copy because INCLUDE (and #include) file arguments are case-sensitive @@ -427,9 +425,7 @@ function parse_c_source(c_filename,error) result(c_source) allocate(c_source%modules_provided(0)) allocate(c_source%parent_modules(0)) - open(newunit=fh,file=c_filename,status='old',access='stream',form='unformatted') - file_lines = read_lines(fh) - close(fh) + file_lines = read_lines(c_filename) ! Ignore empty files, returned as FPM_UNIT_UNKNOWN if (len_trim(file_lines) < 1) then