Index: trunk/src/parsing/parsing.nw =================================================================== --- trunk/src/parsing/parsing.nw (revision 8774) +++ trunk/src/parsing/parsing.nw (revision 8775) @@ -1,5404 +1,6123 @@ % -*- ess-noweb-default-code-mode: f90-mode; noweb-default-code-mode: f90-mode; -*- % WHIZARD code as NOWEB source: text handling and parsing %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \chapter{Text handling} \includemodulegraph{parsing} \whizard\ has to handle complex structures in input (and output) data. Doing this in a generic and transparent way requires a generic lexer and parser. The necessary modules are implemented here: \begin{description} \item[ifiles] Implementation of line-oriented internal files in a more flexible way (linked lists of variable-length strings) than the Fortran builtin features. \item[lexers] Read text and transform it into a token stream. \item[syntax\_rules] Define the rules for interpreting tokens, to be used by the WHIZARD parser. \item[parser] Categorize tokens (keyword, string, number etc.) and use a set of syntax rules to transform the input into a parse tree. \item[xml] Read and parse XML text, separate from the WHIZARD parser. \end{description} @ \clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Internal files} The internal files introduced here ([[ifile]]) are a replacement for the built-in internal files, which are fixed-size arrays of fixed-length character strings. The [[ifile]] type is a doubly-linked list of variable-length character strings with line numbers. <<[[ifiles.f90]]>>= <> module ifiles <> - use io_units - use system_defs, only: EOF <> <> <> <> -contains - -<> + interface +<> + end interface end module ifiles @ %def ifiles @ +<<[[ifiles_sub.f90]]>>= +<> + +submodule (ifiles) ifiles_s + + use system_defs, only: EOF + use io_units + + implicit none + +contains + +<> + +end submodule ifiles_s + +@ %def ifiles_s +@ \subsection{The line type} The line entry type is for internal use, it is the list entry to be collected in an [[ifile]] object. <>= type :: line_entry_t private type(line_entry_t), pointer :: previous => null () type(line_entry_t), pointer :: next => null () type(string_t) :: string integer :: index end type line_entry_t @ %def line_entry_t @ Create a new list entry, given a varying string as input. The line number and pointers are not set, these make sense only within an [[ifile]]. -<>= +<>= subroutine line_entry_create (line, string) type(line_entry_t), pointer :: line type(string_t), intent(in) :: string allocate (line) line%string = string end subroutine line_entry_create @ %def line_entry_create @ Destroy a single list entry: Since the pointer components should not be deallocated explicitly, just deallocate the object itself. -<>= +<>= subroutine line_entry_destroy (line) type(line_entry_t), pointer :: line deallocate (line) end subroutine line_entry_destroy @ %def line_entry_destroy @ \subsection{The ifile type} The internal file is a linked list of line entries. <>= public :: ifile_t <>= type :: ifile_t private type(line_entry_t), pointer :: first => null () type(line_entry_t), pointer :: last => null () integer :: n_lines = 0 contains <> end type ifile_t @ %def ifile_t @ We need no explicit initializer, but a routine which recursively deallocates the contents may be appropriate. After this, existing line pointers may become undefined, so they should be nullified before the file is destroyed. <>= public :: ifile_clear -<>= - subroutine ifile_clear (ifile) +<>= + module subroutine ifile_clear (ifile) + class(ifile_t), intent(inout) :: ifile + end subroutine ifile_clear +<>= + module subroutine ifile_clear (ifile) class(ifile_t), intent(inout) :: ifile type(line_entry_t), pointer :: current do while (associated (ifile%first)) current => ifile%first ifile%first => current%next call line_entry_destroy (current) end do nullify (ifile%last) ifile%n_lines = 0 end subroutine ifile_clear @ %def ifile_clear @ The finalizer is just an alias for the above. <>= public :: ifile_final <>= procedure :: final => ifile_clear <>= interface ifile_final module procedure ifile_clear end interface @ %def ifile_final @ \subsection{I/O on ifiles} Fill an ifile from an ordinary external file, i.e., I/O unit. If the ifile is not empty, the old contents will be destroyed. We can read a fixed-length character string, an ISO varying string, an ordinary internal file (character-string array), or from an external unit. In the latter case, lines are appended until EOF is reached. Finally, there is a variant which reads from another ifile, effectively copying it. <>= public :: ifile_read <>= interface ifile_read module procedure ifile_read_from_string module procedure ifile_read_from_char module procedure ifile_read_from_unit module procedure ifile_read_from_char_array module procedure ifile_read_from_ifile end interface -<>= - subroutine ifile_read_from_string (ifile, string) +<>= + module subroutine ifile_read_from_string (ifile, string) + type(ifile_t), intent(inout) :: ifile + type(string_t), intent(in) :: string + end subroutine ifile_read_from_string + module subroutine ifile_read_from_char (ifile, char) + type(ifile_t), intent(inout) :: ifile + character(*), intent(in) :: char + end subroutine ifile_read_from_char + module subroutine ifile_read_from_char_array (ifile, char) + type(ifile_t), intent(inout) :: ifile + character(*), dimension(:), intent(in) :: char + end subroutine ifile_read_from_char_array + module subroutine ifile_read_from_unit (ifile, unit, iostat) + type(ifile_t), intent(inout) :: ifile + integer, intent(in) :: unit + integer, intent(out), optional :: iostat + end subroutine ifile_read_from_unit + module subroutine ifile_read_from_ifile (ifile, ifile_in) + type(ifile_t), intent(inout) :: ifile + type(ifile_t), intent(in) :: ifile_in + end subroutine ifile_read_from_ifile +<>= + module subroutine ifile_read_from_string (ifile, string) type(ifile_t), intent(inout) :: ifile type(string_t), intent(in) :: string call ifile_clear (ifile) call ifile_append (ifile, string) end subroutine ifile_read_from_string - subroutine ifile_read_from_char (ifile, char) + module subroutine ifile_read_from_char (ifile, char) type(ifile_t), intent(inout) :: ifile character(*), intent(in) :: char call ifile_clear (ifile) call ifile_append (ifile, char) end subroutine ifile_read_from_char - subroutine ifile_read_from_char_array (ifile, char) + module subroutine ifile_read_from_char_array (ifile, char) type(ifile_t), intent(inout) :: ifile character(*), dimension(:), intent(in) :: char call ifile_clear (ifile) call ifile_append (ifile, char) end subroutine ifile_read_from_char_array - subroutine ifile_read_from_unit (ifile, unit, iostat) + module subroutine ifile_read_from_unit (ifile, unit, iostat) type(ifile_t), intent(inout) :: ifile integer, intent(in) :: unit integer, intent(out), optional :: iostat call ifile_clear (ifile) call ifile_append (ifile, unit, iostat) end subroutine ifile_read_from_unit - subroutine ifile_read_from_ifile (ifile, ifile_in) + module subroutine ifile_read_from_ifile (ifile, ifile_in) type(ifile_t), intent(inout) :: ifile type(ifile_t), intent(in) :: ifile_in call ifile_clear (ifile) call ifile_append (ifile, ifile_in) end subroutine ifile_read_from_ifile @ %def ifile_read @ Append to an ifile. The same as reading, but without resetting the ifile. In addition, there is a routine for appending a whole ifile. <>= public :: ifile_append <>= generic :: append => & ifile_append_from_char procedure, private :: ifile_append_from_char <>= interface ifile_append module procedure ifile_append_from_string module procedure ifile_append_from_char module procedure ifile_append_from_unit module procedure ifile_append_from_char_array module procedure ifile_append_from_ifile end interface -<>= - subroutine ifile_append_from_string (ifile, string) +<>= + module subroutine ifile_append_from_string (ifile, string) + class(ifile_t), intent(inout) :: ifile + type(string_t), intent(in) :: string + end subroutine ifile_append_from_string + module subroutine ifile_append_from_char (ifile, char) + class(ifile_t), intent(inout) :: ifile + character(*), intent(in) :: char + end subroutine ifile_append_from_char + module subroutine ifile_append_from_char_array (ifile, char) + class(ifile_t), intent(inout) :: ifile + character(*), dimension(:), intent(in) :: char + end subroutine ifile_append_from_char_array + module subroutine ifile_append_from_ifile (ifile, ifile_in) + class(ifile_t), intent(inout) :: ifile + type(ifile_t), intent(in) :: ifile_in + end subroutine ifile_append_from_ifile + module subroutine ifile_append_from_unit (ifile, unit, iostat) + class(ifile_t), intent(inout) :: ifile + integer, intent(in) :: unit + integer, intent(out), optional :: iostat + end subroutine ifile_append_from_unit +<>= + module subroutine ifile_append_from_string (ifile, string) class(ifile_t), intent(inout) :: ifile type(string_t), intent(in) :: string type(line_entry_t), pointer :: current call line_entry_create (current, string) current%index = ifile%n_lines + 1 if (associated (ifile%last)) then current%previous => ifile%last ifile%last%next => current else ifile%first => current end if ifile%last => current ifile%n_lines = current%index end subroutine ifile_append_from_string - subroutine ifile_append_from_char (ifile, char) + module subroutine ifile_append_from_char (ifile, char) class(ifile_t), intent(inout) :: ifile character(*), intent(in) :: char call ifile_append_from_string (ifile, var_str (trim (char))) end subroutine ifile_append_from_char - subroutine ifile_append_from_char_array (ifile, char) + module subroutine ifile_append_from_char_array (ifile, char) class(ifile_t), intent(inout) :: ifile character(*), dimension(:), intent(in) :: char integer :: i do i = 1, size (char) call ifile_append_from_string (ifile, var_str (trim (char(i)))) end do end subroutine ifile_append_from_char_array - subroutine ifile_append_from_unit (ifile, unit, iostat) + module subroutine ifile_append_from_unit (ifile, unit, iostat) class(ifile_t), intent(inout) :: ifile integer, intent(in) :: unit integer, intent(out), optional :: iostat type(string_t) :: buffer integer :: ios ios = 0 READ_LOOP: do call get (unit, buffer, iostat = ios) if (ios == EOF .or. ios > 0) exit READ_LOOP call ifile_append_from_string (ifile, buffer) end do READ_LOOP if (present (iostat)) then iostat = ios else if (ios > 0) then call get (unit, buffer) ! trigger error again end if end subroutine ifile_append_from_unit - subroutine ifile_append_from_ifile (ifile, ifile_in) + module subroutine ifile_append_from_ifile (ifile, ifile_in) class(ifile_t), intent(inout) :: ifile type(ifile_t), intent(in) :: ifile_in type(line_entry_t), pointer :: current current => ifile_in%first do while (associated (current)) call ifile_append_from_string (ifile, current%string) current => current%next end do end subroutine ifile_append_from_ifile @ %def ifile_append @ Write the ifile contents to an external unit <>= public :: ifile_write -<>= - subroutine ifile_write (ifile, unit, iostat) +<>= + module subroutine ifile_write (ifile, unit, iostat) + type(ifile_t), intent(in) :: ifile + integer, intent(in), optional :: unit + integer, intent(out), optional :: iostat + end subroutine ifile_write +<>= + module subroutine ifile_write (ifile, unit, iostat) type(ifile_t), intent(in) :: ifile integer, intent(in), optional :: unit integer, intent(out), optional :: iostat integer :: u type(line_entry_t), pointer :: current u = given_output_unit (unit); if (u < 0) return current => ifile%first do while (associated (current)) call put_line (u, current%string, iostat) current => current%next end do end subroutine ifile_write @ %def ifile_write @ Convert the ifile to an array of strings, which is allocated by this function: <>= public :: ifile_to_string_array -<>= - subroutine ifile_to_string_array (ifile, string) +<>= + module subroutine ifile_to_string_array (ifile, string) + type(ifile_t), intent(in) :: ifile + type(string_t), dimension(:), intent(inout), allocatable :: string + end subroutine ifile_to_string_array +<>= + module subroutine ifile_to_string_array (ifile, string) type(ifile_t), intent(in) :: ifile type(string_t), dimension(:), intent(inout), allocatable :: string type(line_entry_t), pointer :: current integer :: i allocate (string (ifile_get_length (ifile))) current => ifile%first do i = 1, ifile_get_length (ifile) string(i) = current%string current => current%next end do end subroutine ifile_to_string_array @ %def ifile_to_string_array @ \subsection{Ifile tools} <>= public :: ifile_get_length -<>= - function ifile_get_length (ifile) result (length) +<>= + module function ifile_get_length (ifile) result (length) + integer :: length + type(ifile_t), intent(in) :: ifile + end function ifile_get_length +<>= + module function ifile_get_length (ifile) result (length) integer :: length type(ifile_t), intent(in) :: ifile length = ifile%n_lines end function ifile_get_length @ %def ifile_get_length @ \subsection{Line pointers} Instead of the implicit pointer used in ordinary file access, we define explicit pointers, so there can be more than one at a time. <>= public :: line_p <>= type :: line_p private type(line_entry_t), pointer :: p => null () end type line_p @ %def line @ Assign a file pointer to the first or last line in an ifile: <>= public :: line_init -<>= - subroutine line_init (line, ifile, back) +<>= + module subroutine line_init (line, ifile, back) + type(line_p), intent(inout) :: line + type(ifile_t), intent(in) :: ifile + logical, intent(in), optional :: back + end subroutine line_init +<>= + module subroutine line_init (line, ifile, back) type(line_p), intent(inout) :: line type(ifile_t), intent(in) :: ifile logical, intent(in), optional :: back if (present (back)) then if (back) then line%p => ifile%last else line%p => ifile%first end if else line%p => ifile%first end if end subroutine line_init @ %def line_init @ Remove the pointer association: <>= public :: line_final -<>= - subroutine line_final (line) +<>= + module subroutine line_final (line) + type(line_p), intent(inout) :: line + end subroutine line_final +<>= + module subroutine line_final (line) type(line_p), intent(inout) :: line nullify (line%p) end subroutine line_final @ %def line_final @ Go one step forward <>= public :: line_advance -<>= - subroutine line_advance (line) +<>= + module subroutine line_advance (line) + type(line_p), intent(inout) :: line + end subroutine line_advance +<>= + module subroutine line_advance (line) type(line_p), intent(inout) :: line if (associated (line%p)) line%p => line%p%next end subroutine line_advance @ %def line_advance @ Go one step backward <>= public :: line_backspace -<>= - subroutine line_backspace (line) +<>= + module subroutine line_backspace (line) + type(line_p), intent(inout) :: line + end subroutine line_backspace +<>= + module subroutine line_backspace (line) type(line_p), intent(inout) :: line if (associated (line%p)) line%p => line%p%previous end subroutine line_backspace @ %def line_backspace @ Check whether we are accessing a valid line <>= public :: line_is_associated -<>= - function line_is_associated (line) result (ok) +<>= + module function line_is_associated (line) result (ok) + logical :: ok + type(line_p), intent(in) :: line + end function line_is_associated +<>= + module function line_is_associated (line) result (ok) logical :: ok type(line_p), intent(in) :: line ok = associated (line%p) end function line_is_associated @ %def line_is_associated @ \subsection{Access lines via pointers} We do not need the ifile as an argument to these functions, because the [[line]] type will point to an existing ifile. <>= public :: line_get_string -<>= - function line_get_string (line) result (string) +<>= + module function line_get_string (line) result (string) + type(string_t) :: string + type(line_p), intent(in) :: line + end function line_get_string +<>= + module function line_get_string (line) result (string) type(string_t) :: string type(line_p), intent(in) :: line if (associated (line%p)) then string = line%p%string else string = "" end if end function line_get_string @ %def line_get_string @ Variant where the line pointer is advanced after reading. <>= public :: line_get_string_advance -<>= - function line_get_string_advance (line) result (string) +<>= + module function line_get_string_advance (line) result (string) + type(string_t) :: string + type(line_p), intent(inout) :: line + end function line_get_string_advance +<>= + module function line_get_string_advance (line) result (string) type(string_t) :: string type(line_p), intent(inout) :: line if (associated (line%p)) then string = line%p%string call line_advance (line) else string = "" end if end function line_get_string_advance @ %def line_get_string_advance <>= public :: line_get_index -<>= - function line_get_index (line) result (index) +<>= + module function line_get_index (line) result (index) + integer :: index + type(line_p), intent(in) :: line + end function line_get_index +<>= + module function line_get_index (line) result (index) integer :: index type(line_p), intent(in) :: line if (associated (line%p)) then index = line%p%index else index = 0 end if end function line_get_index @ %def line_get_index <>= public :: line_get_length -<>= - function line_get_length (line) result (length) +<>= + module function line_get_length (line) result (length) + integer :: length + type(line_p), intent(in) :: line + end function line_get_length +<>= + module function line_get_length (line) result (length) integer :: length type(line_p), intent(in) :: line if (associated (line%p)) then length = len (line%p%string) else length = 0 end if end function line_get_length @ %def line_get_length @ \clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Lexer} The lexer purpose is to read from a line-separated character input stream (usually a file) and properly chop the stream into lexemes (tokens). [The parser will transform lexemes into meaningful tokens, to be stored in a parse tree, therefore we do not use the term 'token' here.] The input is read line-by-line, but interpreted free-form, except for quotes and the comment syntax. (Fortran 2003 would allow us to use a stream type for reading.) In an object-oriented approach, we can dynamically create and destroy lexers, including the lexer setup. The main lexer function is to return a lexeme according to the basic lexer rules (quotes, comments, whitespace, special classes). There is also a routine to write back a lexeme to the input stream (but only once). For the rules, we separate the possible characters into classes. Whitespace usually consists of blank, tab, and line-feed, where any number of consecutive whitespace is equivalent to one. Quoted strings are enclosed by a pair of quote characters, possibly multiline. Comments are similar to quotes, but interpreted as whitespace. Numbers are identified (not distinguishing real and integer) but not interpreted. Other character classes make up identifiers. <<[[lexers.f90]]>>= <> module lexers <> - use io_units - use string_utils - use system_defs, only: EOF, EOR - use system_defs, only: LF - use system_defs, only: WHITESPACE_CHARS, LCLETTERS, UCLETTERS, DIGITS - use diagnostics use ifiles, only: ifile_t - use ifiles, only: line_p, line_is_associated, line_init, line_final - use ifiles, only: line_get_string_advance + use ifiles, only: line_p <> <> <> <> <> + interface +<> + end interface + +end module lexers +@ %def lexers +@ +<<[[lexers_sub.f90]]>>= +<> + +submodule (lexers) lexers_s + + use io_units + use string_utils + use system_defs, only: EOF, EOR + use system_defs, only: LF + use system_defs, only: WHITESPACE_CHARS, LCLETTERS, UCLETTERS, DIGITS + use ifiles, only: line_get_string_advance + use ifiles, only: line_is_associated, line_init, line_final + use diagnostics + + implicit none + contains <> -end module lexers -@ %def lexers +end submodule lexers_s + +@ %def lexers_s @ \subsection{Input streams} For flexible input, we define a generic stream type that refers to either an external file, an external unit which is already open, a string, an [[ifile]] object (internal file, i.e., string list), or a line pointer to an [[ifile]] object. The stream type actually follows the idea of a formatted external file, which is line-oriented. Thus, the stream reader always returns a whole record (input line). Note that only in the string version, the stream contents are stored inside the stream object. In the [[ifile]] version, the stream contains only the line pointer, while in the external-file case, the line pointer is implicitly created by the runtime library. <>= public :: stream_t <>= type :: stream_t type(string_t), pointer :: filename => null () integer, pointer :: unit => null () type(string_t), pointer :: string => null () type(ifile_t), pointer :: ifile => null () type(line_p), pointer :: line => null () integer :: record = 0 logical :: eof = .false. contains <> end type stream_t @ %def stream_t @ The initializers refer to the specific version. The stream should be undefined before calling this. <>= public :: stream_init <>= generic :: init => & stream_init_filename, & stream_init_unit, & stream_init_string, & stream_init_ifile, & stream_init_line procedure, private :: stream_init_filename procedure, private :: stream_init_unit procedure, private :: stream_init_string procedure, private :: stream_init_ifile procedure, private :: stream_init_line <>= interface stream_init module procedure stream_init_filename module procedure stream_init_unit module procedure stream_init_string module procedure stream_init_ifile module procedure stream_init_line end interface +<>= + module subroutine stream_init_filename (stream, filename) + class(stream_t), intent(out) :: stream + character(*), intent(in) :: filename + end subroutine stream_init_filename + module subroutine stream_init_unit (stream, unit) + class(stream_t), intent(out) :: stream + integer, intent(in) :: unit + end subroutine stream_init_unit + module subroutine stream_init_string (stream, string) + class(stream_t), intent(out) :: stream + type(string_t), intent(in) :: string + end subroutine stream_init_string + module subroutine stream_init_ifile (stream, ifile) + class(stream_t), intent(out) :: stream + type(ifile_t), intent(in) :: ifile + end subroutine stream_init_ifile + module subroutine stream_init_line (stream, line) + class(stream_t), intent(out) :: stream + type(line_p), intent(in) :: line + end subroutine stream_init_line <>= - subroutine stream_init_filename (stream, filename) + module subroutine stream_init_filename (stream, filename) class(stream_t), intent(out) :: stream character(*), intent(in) :: filename integer :: unit unit = free_unit () open (unit=unit, file=filename, status="old", action="read") call stream_init_unit (stream, unit) allocate (stream%filename) stream%filename = filename end subroutine stream_init_filename - subroutine stream_init_unit (stream, unit) + module subroutine stream_init_unit (stream, unit) class(stream_t), intent(out) :: stream integer, intent(in) :: unit allocate (stream%unit) stream%unit = unit stream%eof = .false. end subroutine stream_init_unit - subroutine stream_init_string (stream, string) + module subroutine stream_init_string (stream, string) class(stream_t), intent(out) :: stream type(string_t), intent(in) :: string allocate (stream%string) stream%string = string end subroutine stream_init_string - subroutine stream_init_ifile (stream, ifile) + module subroutine stream_init_ifile (stream, ifile) class(stream_t), intent(out) :: stream type(ifile_t), intent(in) :: ifile type(line_p) :: line call line_init (line, ifile) call stream_init_line (stream, line) allocate (stream%ifile) stream%ifile = ifile end subroutine stream_init_ifile - subroutine stream_init_line (stream, line) + module subroutine stream_init_line (stream, line) class(stream_t), intent(out) :: stream type(line_p), intent(in) :: line allocate (stream%line) stream%line = line end subroutine stream_init_line @ %def stream_init @ The finalizer restores the initial state. If an external file was opened, it is closed. <>= public :: stream_final <>= procedure :: final => stream_final +<>= + module subroutine stream_final (stream) + class(stream_t), intent(inout) :: stream + end subroutine stream_final <>= - subroutine stream_final (stream) + module subroutine stream_final (stream) class(stream_t), intent(inout) :: stream if (associated (stream%filename)) then close (stream%unit) deallocate (stream%unit) deallocate (stream%filename) else if (associated (stream%unit)) then deallocate (stream%unit) else if (associated (stream%string)) then deallocate (stream%string) else if (associated (stream%ifile)) then call line_final (stream%line) deallocate (stream%line) deallocate (stream%ifile) else if (associated (stream%line)) then call line_final (stream%line) deallocate (stream%line) end if end subroutine stream_final @ %def stream_final @ This returns the next record from the input stream. Depending on the stream type, the stream pointers are modified: Reading from external unit, the external file is advanced (implicitly). Reading from string, the string is replaced by an empty string. Reading from [[ifile]], the line pointer is advanced. Note that the [[iostat]] argument is mandatory. <>= public :: stream_get_record +<>= + module subroutine stream_get_record (stream, string, iostat) + type(stream_t), intent(inout) :: stream + type(string_t), intent(out) :: string + integer, intent(out) :: iostat + end subroutine stream_get_record <>= - subroutine stream_get_record (stream, string, iostat) + module subroutine stream_get_record (stream, string, iostat) type(stream_t), intent(inout) :: stream type(string_t), intent(out) :: string integer, intent(out) :: iostat if (associated (stream%unit)) then if (stream%eof) then iostat = EOF else call get (stream%unit, string, iostat=iostat) if (iostat == EOR) then iostat = 0 stream%record = stream%record + 1 end if if (iostat == EOF) then iostat = 0 stream%eof = .true. if (len (string) /= 0) stream%record = stream%record + 1 end if end if else if (associated (stream%string)) then if (len (stream%string) /= 0) then string = stream%string stream%string = "" iostat = 0 stream%record = stream%record + 1 else string = "" iostat = EOF end if else if (associated (stream%line)) then if (line_is_associated (stream%line)) then string = line_get_string_advance (stream%line) iostat = 0 stream%record = stream%record + 1 else string = "" iostat = EOF end if else call msg_bug (" Attempt to read from uninitialized input stream") end if end subroutine stream_get_record @ %def stream_get_record @ Return the current stream source as a message string. <>= public :: stream_get_source_info_string +<>= + module function stream_get_source_info_string (stream) result (string) + type(string_t) :: string + type(stream_t), intent(in) :: stream + end function stream_get_source_info_string <>= - function stream_get_source_info_string (stream) result (string) + module function stream_get_source_info_string (stream) result (string) type(string_t) :: string type(stream_t), intent(in) :: stream character(20) :: buffer if (associated (stream%filename)) then string = "File '" // stream%filename // "' (unit = " write (buffer, "(I0)") stream%unit string = string // trim (buffer) // ")" else if (associated (stream%unit)) then write (buffer, "(I0)") stream%unit string = "Unit " // trim (buffer) else if (associated (stream%string)) then string = "Input string" else if (associated (stream%ifile) .or. associated (stream%line)) then string = "Internal file" else string = "" end if end function stream_get_source_info_string @ %def stream_get_source_info_string @ Return the index of the record just read as a message string. <>= public :: stream_get_record_info_string +<>= + module function stream_get_record_info_string (stream) result (string) + type(string_t) :: string + type(stream_t), intent(in) :: stream + end function stream_get_record_info_string <>= - function stream_get_record_info_string (stream) result (string) + module function stream_get_record_info_string (stream) result (string) type(string_t) :: string type(stream_t), intent(in) :: stream character(20) :: buffer string = stream_get_source_info_string (stream) if (string /= "") string = string // ", " write (buffer, "(I0)") stream%record string = string // "line " // trim (buffer) end function stream_get_record_info_string @ %def stream_get_record_info_string @ \subsection{Keyword list} The lexer should be capable of identifying a token as a known keyword. To this end, we store a list of keywords: <>= public :: keyword_list_t <>= type :: keyword_entry_t private type(string_t) :: string type(keyword_entry_t), pointer :: next => null () end type keyword_entry_t type :: keyword_list_t private type(keyword_entry_t), pointer :: first => null () type(keyword_entry_t), pointer :: last => null () end type keyword_list_t @ %def keyword_entry_t keyword_list_t @ Add a new string to the keyword list, unless it is already there: <>= public :: keyword_list_add +<>= + module subroutine keyword_list_add (keylist, string) + type(keyword_list_t), intent(inout) :: keylist + type(string_t), intent(in) :: string + end subroutine keyword_list_add <>= - subroutine keyword_list_add (keylist, string) + module subroutine keyword_list_add (keylist, string) type(keyword_list_t), intent(inout) :: keylist type(string_t), intent(in) :: string type(keyword_entry_t), pointer :: k_entry_new if (.not. keyword_list_contains (keylist, string)) then allocate (k_entry_new) k_entry_new%string = string if (associated (keylist%first)) then keylist%last%next => k_entry_new else keylist%first => k_entry_new end if keylist%last => k_entry_new end if end subroutine keyword_list_add @ %def keyword_list_add @ Return true if a string is a keyword. <>= public :: keyword_list_contains +<>= + module function keyword_list_contains (keylist, string) result (found) + type(keyword_list_t), intent(in) :: keylist + type(string_t), intent(in) :: string + logical :: found + end function keyword_list_contains <>= - function keyword_list_contains (keylist, string) result (found) + module function keyword_list_contains (keylist, string) result (found) type(keyword_list_t), intent(in) :: keylist type(string_t), intent(in) :: string logical :: found found = .false. call check_rec (keylist%first) contains recursive subroutine check_rec (k_entry) type(keyword_entry_t), pointer :: k_entry if (associated (k_entry)) then if (k_entry%string /= string) then call check_rec (k_entry%next) else found = .true. end if end if end subroutine check_rec end function keyword_list_contains @ %def keyword_list_contains @ Write the keyword list <>= public :: keyword_list_write <>= interface keyword_list_write module procedure keyword_list_write_unit end interface +<>= + module subroutine keyword_list_write_unit (keylist, unit) + type(keyword_list_t), intent(in) :: keylist + integer, intent(in) :: unit + end subroutine keyword_list_write_unit <>= - subroutine keyword_list_write_unit (keylist, unit) + module subroutine keyword_list_write_unit (keylist, unit) type(keyword_list_t), intent(in) :: keylist integer, intent(in) :: unit write (unit, "(A)") "Keyword list:" if (associated (keylist%first)) then call keyword_write_rec (keylist%first) write (unit, *) else write (unit, "(1x,A)") "[empty]" end if contains recursive subroutine keyword_write_rec (k_entry) type(keyword_entry_t), intent(in), pointer :: k_entry if (associated (k_entry)) then write (unit, "(1x,A)", advance="no") char (k_entry%string) call keyword_write_rec (k_entry%next) end if end subroutine keyword_write_rec end subroutine keyword_list_write_unit @ %def keyword_list_write @ Clear the keyword list <>= public :: keyword_list_final +<>= + module subroutine keyword_list_final (keylist) + type(keyword_list_t), intent(inout) :: keylist + end subroutine keyword_list_final <>= - subroutine keyword_list_final (keylist) + module subroutine keyword_list_final (keylist) type(keyword_list_t), intent(inout) :: keylist call keyword_destroy_rec (keylist%first) nullify (keylist%last) contains recursive subroutine keyword_destroy_rec (k_entry) type(keyword_entry_t), pointer :: k_entry if (associated (k_entry)) then call keyword_destroy_rec (k_entry%next) deallocate (k_entry) end if end subroutine keyword_destroy_rec end subroutine keyword_list_final @ %def keyword_list_final @ \subsection{Lexeme templates} This type is handled like a rudimentary regular expression. It determines the lexer behavior when matching a string. The actual objects made from this type and the corresponding matching routines are listed below. <>= type :: template_t - private + !!! !!! Compiler bug in ifort 20/21/22: no structure constants for + !!! !!! types with private components in submodules possible + !!! private integer :: type character(256) :: charset1, charset2 integer :: len1, len2 end type template_t @ %def template_t @ These are the types that valid lexemes can have: <>= public :: T_KEYWORD, T_IDENTIFIER, T_QUOTED, T_NUMERIC <>= integer, parameter :: T_KEYWORD = 1 integer, parameter :: T_IDENTIFIER = 2, T_QUOTED = 3, T_NUMERIC = 4 @ %def T_KEYWORD T_IDENTIFIER T_QUOTED T_NUMERIC @ These are special types: <>= integer, parameter :: EMPTY = 0, WHITESPACE = 10 integer, parameter :: NO_MATCH = 11, IO_ERROR = 12, OVERFLOW = 13 integer, parameter :: UNMATCHED_QUOTE = 14 @ %def EMPTY WHITESPACE NO_MATCH IO_ERROR OVERFLOW UNMATCHED_QUOTE @ In addition, we have [[EOF]] which is a negative integer, normally $-1$. @ Printout for debugging: <>= subroutine lexeme_type_write (type, unit) integer, intent(in) :: type integer, intent(in) :: unit select case (type) case (EMPTY); write(unit,"(A)",advance="no") " EMPTY " case (WHITESPACE); write(unit,"(A)",advance="no") " WHITESPACE " case (T_IDENTIFIER);write(unit,"(A)",advance="no") " IDENTIFIER " case (T_QUOTED); write(unit,"(A)",advance="no") " QUOTED " case (T_NUMERIC); write(unit,"(A)",advance="no") " NUMERIC " case (IO_ERROR); write(unit,"(A)",advance="no") " IO_ERROR " case (OVERFLOW); write(unit,"(A)",advance="no") " OVERFLOW " case (UNMATCHED_QUOTE); write(unit,"(A)",advance="no") " UNMATCHEDQ " case (NO_MATCH); write(unit,"(A)",advance="no") " NO_MATCH " case (EOF); write(unit,"(A)",advance="no") " EOF " case default; write(unit,"(A)",advance="no") " [illegal] " end select end subroutine lexeme_type_write subroutine template_write (tt, unit) type(template_t), intent(in) :: tt integer, intent(in) :: unit call lexeme_type_write (tt%type, unit) write (unit, "(A)", advance="no") "'" // tt%charset1(1:tt%len1) // "'" write (unit, "(A)", advance="no") " '" // tt%charset2(1:tt%len2) // "'" end subroutine template_write @ %def template_write @ The matching functions all return the number of matched characters in the provided string. If this number is zero, the match has failed. The [[template]] functions are declared [[pure]] because they appear in [[forall]] loops below. A template for whitespace: <>= pure function template_whitespace (chars) result (tt) character(*), intent(in) :: chars type(template_t) :: tt tt = template_t (WHITESPACE, chars, "", len (chars), 0) end function template_whitespace @ %def template_whitespace @ Just match the string against the character set. <>= subroutine match_whitespace (tt, s, n) type(template_t), intent(in) :: tt character(*), intent(in) :: s integer, intent(out) :: n n = verify (s, tt%charset1(1:tt%len1)) - 1 if (n < 0) n = len (s) end subroutine match_whitespace @ %def match_whitespace @ A template for normal identifiers. To match, a lexeme should have a first character in class [[chars1]] and an arbitrary number of further characters in class [[chars2]]. If the latter is empty, we are looking for a single-character lexeme. <>= pure function template_identifier (chars1, chars2) result (tt) character(*), intent(in) :: chars1, chars2 type(template_t) :: tt tt = template_t (T_IDENTIFIER, chars1, chars2, len(chars1), len(chars2)) end function template_identifier @ %def template_identifier @ Here, the first letter must match, the others may or may not. <>= subroutine match_identifier (tt, s, n) type(template_t), intent(in) :: tt character(*), intent(in) :: s integer, intent(out) :: n if (verify (s(1:1), tt%charset1(1:tt%len1)) == 0) then n = verify (s(2:), tt%charset2(1:tt%len2)) if (n == 0) n = len (s) else n = 0 end if end subroutine match_identifier @ %def match_identifier @ A template for quoted strings. The same template applies for comments. The first character set indicates the left quote (could be a sequence of several characters), the second one the matching right quote. <>= pure function template_quoted (chars1, chars2) result (tt) character(*), intent(in) :: chars1, chars2 type(template_t) :: tt tt = template_t (T_QUOTED, chars1, chars2, len (chars1), len (chars2)) end function template_quoted @ %def template_quoted @ Here, the beginning of the string must exactly match the first character set, then we look for the second one. If found, return. If there is a first quote but no second one, return a negative number, indicating this error condition. <>= subroutine match_quoted (tt, s, n, range) type(template_t), intent(in) :: tt character(*), intent(in) :: s integer, intent(out) :: n integer, dimension(2), intent(out) :: range character(tt%len1) :: ch1 character(tt%len2) :: ch2 integer :: i ch1 = tt%charset1 if (s(1:tt%len1) == ch1) then ch2 = tt%charset2 do i = tt%len1 + 1, len (s) - tt%len2 + 1 if (s(i:i+tt%len2-1) == ch2) then n = i + tt%len2 - 1 range(1) = tt%len1 + 1 range(2) = i - 1 return end if end do n = -1 range = 0 else n = 0 range = 0 end if end subroutine match_quoted @ %def match_quoted @ A template for real numbers. The first character set is the set of allowed exponent letters. In accordance with the other functions we return the lexeme as a string but do not read it. <>= pure function template_numeric (chars) result (tt) character(*), intent(in) :: chars type(template_t) :: tt tt = template_t (T_NUMERIC, chars, "", len (chars), 0) end function template_numeric @ %def template_numeric @ A numeric lexeme may be real or integer. We purposely do not allow for a preceding sign. If the number is followed by an exponent, this is included, otherwise the rest is ignored. There is a possible pitfall with this behavior: while the string [[1e3]] will be interpreted as a single number, the analogous string [[1a3]] will be split into the number [[1]] and an identifier [[a3]]. There is no easy way around such an ambiguity. We should make sure that the syntax does not contain identifiers like [[a3]] or [[e3]]. <>= subroutine match_numeric (tt, s, n) type(template_t), intent(in) :: tt character(*), intent(in) :: s integer, intent(out) :: n integer :: i, n0 character(10), parameter :: digits = "0123456789" character(2), parameter :: signs = "-+" n = verify (s, digits) - 1 if (n < 0) then n = 0 return else if (s(n+1:n+1) == ".") then i = verify (s(n+2:), digits) - 1 if (i < 0) then n = len (s) return else if (i > 0 .or. n > 0) then n = n + 1 + i end if end if n0 = n if (n > 0) then if (verify (s(n+1:n+1), tt%charset1(1:tt%len1)) == 0) then n = n + 1 if (verify (s(n+1:n+1), signs) == 0) n = n + 1 i = verify (s(n+1:), digits) - 1 if (i < 0) then n = len (s) else if (i == 0) then n = n0 else n = n + i end if end if end if end subroutine match_numeric @ %def match_numeric @ The generic matching routine. With Fortran 2003 we would define separate types and use a SELECT TYPE instead. <>= subroutine match_template (tt, s, n, range) type(template_t), intent(in) :: tt character(*), intent(in) :: s integer, intent(out) :: n integer, dimension(2), intent(out) :: range select case (tt%type) case (WHITESPACE) call match_whitespace (tt, s, n) range = 0 case (T_IDENTIFIER) call match_identifier (tt, s, n) range(1) = 1 range(2) = len_trim (s) case (T_QUOTED) call match_quoted (tt, s, n, range) case (T_NUMERIC) call match_numeric (tt, s, n) range(1) = 1 range(2) = len_trim (s) case default call msg_bug ("Invalid lexeme template encountered") end select end subroutine match_template @ %def match_template @ Match against an array of templates. Return the index of the first template that matches together with the number of characters matched and the range of the relevant substring. If all fails, these numbers are zero. <>= subroutine match (tt, s, n, range, ii) type(template_t), dimension(:), intent(in) :: tt character(*), intent(in) :: s integer, intent(out) :: n integer, dimension(2), intent(out) :: range integer, intent(out) :: ii integer :: i do i = 1, size (tt) call match_template (tt(i), s, n, range) if (n /= 0) then ii = i return end if end do n = 0 ii = 0 end subroutine match @ %def match @ \subsection{The lexer setup} This object contains information about character classes. As said above, one class consists of quoting chars (matching left and right), another one of comment chars (similar), a class of whitespace, and several classes of characters that make up identifiers. When creating the lexer setup, the character classes are transformed into lexeme templates which are to be matched in a certain predefined order against the input stream. BLANK should always be taken as whitespace, some things may depend on this. TAB is also fixed by convention, but may in principle be modified. Newline (DOS!) and linefeed are also defined as whitespace. @ The lexer setup, containing the list of lexeme templates. No defaults yet. The type with index zero will be assigned to the [[NO_MATCH]] lexeme. The keyword list is not stored, just a pointer to it. We anticipate that the keyword list is part of the syntax table, and the lexer needs not alter it. Furthermore, the lexer is typically finished before the syntax table is. <>= integer, parameter :: CASE_KEEP = 0, CASE_UP = 1, CASE_DOWN = 2 @ %def CASE_KEEP CASE_UP CASE_DOWN <>= type :: lexer_setup_t private type(template_t), dimension(:), allocatable :: tt integer, dimension(:), allocatable :: type integer :: keyword_case = CASE_KEEP type(keyword_list_t), pointer :: keyword_list => null () end type lexer_setup_t @ %def lexer_setup @ Fill the lexer setup object. Some things are hardcoded here (whitespace, alphanumeric identifiers), some are free: comment chars (but these must be single, and comments must be terminated by line-feed), quote chars and matches (must be single), characters to be read as one-character lexeme, special classes (characters of one class that should be glued together as identifiers). <>= subroutine lexer_setup_init (setup, & comment_chars, quote_chars, quote_match, & single_chars, special_class, & keyword_list, upper_case_keywords) type(lexer_setup_t), intent(inout) :: setup character(*), intent(in) :: comment_chars character(*), intent(in) :: quote_chars, quote_match character(*), intent(in) :: single_chars character(*), dimension(:), intent(in) :: special_class type(keyword_list_t), pointer :: keyword_list logical, intent(in), optional :: upper_case_keywords integer :: n, i if (present (upper_case_keywords)) then if (upper_case_keywords) then setup%keyword_case = CASE_UP else setup%keyword_case = CASE_DOWN end if else setup%keyword_case = CASE_KEEP end if n = 1 + len (comment_chars) + len (quote_chars) + 1 & + len (single_chars) + size (special_class) + 1 allocate (setup%tt(n)) allocate (setup%type(0:n)) n = 0 setup%type(n) = NO_MATCH n = n + 1 setup%tt(n) = template_whitespace (WHITESPACE_CHARS) setup%type(n) = EMPTY forall (i = 1:len(comment_chars)) setup%tt(n+i) = template_quoted (comment_chars(i:i), LF) setup%type(n+i) = EMPTY end forall n = n + len (comment_chars) forall (i = 1:len(quote_chars)) setup%tt(n+i) = template_quoted (quote_chars(i:i), quote_match(i:i)) setup%type(n+i) = T_QUOTED end forall n = n + len (quote_chars) setup%tt(n+1) = template_numeric ("EeDd") setup%type(n+1) = T_NUMERIC n = n + 1 forall (i = 1:len (single_chars)) setup%tt(n+i) = template_identifier (single_chars(i:i), "") setup%type(n+i) = T_IDENTIFIER end forall n = n + len (single_chars) forall (i = 1:size (special_class)) setup%tt(n+i) = template_identifier & (trim (special_class(i)), trim (special_class(i))) setup%type(n+i) = T_IDENTIFIER end forall n = n + size (special_class) setup%tt(n+1) = template_identifier & (LCLETTERS//UCLETTERS, LCLETTERS//DIGITS//"_"//UCLETTERS) setup%type(n+1) = T_IDENTIFIER n = n + 1 if (n /= size (setup%tt)) & call msg_bug ("Size mismatch in lexer setup") setup%keyword_list => keyword_list end subroutine lexer_setup_init @ %def lexer_setup_init @ The destructor is needed only if the object is not itself part of an allocatable array <>= subroutine lexer_setup_final (setup) type(lexer_setup_t), intent(inout) :: setup deallocate (setup%tt, setup%type) setup%keyword_list => null () end subroutine lexer_setup_final @ %def lexer_setup_final @ For debugging: Write the lexer setup <>= subroutine lexer_setup_write (setup, unit) type(lexer_setup_t), intent(in) :: setup integer, intent(in) :: unit integer :: i write (unit, "(A)") "Lexer setup:" if (allocated (setup%tt)) then do i = 1, size (setup%tt) call template_write (setup%tt(i), unit) write (unit, '(A)', advance = "no") " -> " call lexeme_type_write (setup%type(i), unit) write (unit, *) end do else write (unit, *) "[empty]" end if if (associated (setup%keyword_list)) then call keyword_list_write (setup%keyword_list, unit) end if end subroutine lexer_setup_write @ %def lexer_setup_write @ \subsection{The lexeme type} An object of this type is returned by the lexer. Apart from the lexeme string, it gives information about the relevant substring (first and last character index) and the lexeme type. Interpreting the string is up to the parser. <>= public :: lexeme_t <>= type :: lexeme_t private integer :: type = EMPTY type(string_t) :: s integer :: b = 0, e = 0 end type lexeme_t @ %def lexeme_t @ Debugging aid: <>= public :: lexeme_write +<>= + module subroutine lexeme_write (t, unit) + type(lexeme_t), intent(in) :: t + integer, intent(in) :: unit + end subroutine lexeme_write <>= - subroutine lexeme_write (t, unit) + module subroutine lexeme_write (t, unit) type(lexeme_t), intent(in) :: t integer, intent(in) :: unit integer :: u u = given_output_unit (unit); if (u < 0) return select case (t%type) case (T_KEYWORD) write (u, *) "KEYWORD: '" // char (t%s) // "'" case (T_IDENTIFIER) write (u, *) "IDENTIFIER: '" // char (t%s) // "'" case (T_QUOTED) write (u, *) "QUOTED: '" // char (t%s) // "'" case (T_NUMERIC) write (u, *) "NUMERIC: '" // char (t%s) // "'" case (UNMATCHED_QUOTE) write (u, *) "Unmatched quote: "// char (t%s) case (OVERFLOW); write (u, *) "Overflow: "// char (t%s) case (EMPTY); write (u, *) "Empty lexeme" case (NO_MATCH); write (u, *) "No match" case (IO_ERROR); write (u, *) "IO error" case (EOF); write (u, *) "EOF" case default write (u, *) "Error" end select end subroutine lexeme_write @ %def lexeme_write @ Store string and type in a lexeme. The range determines the beginning and end of the relevant part of the string. Check for a keyword. <>= subroutine lexeme_set (t, keyword_list, s, range, type, keyword_case) type(lexeme_t), intent(out) :: t type(keyword_list_t), pointer :: keyword_list type(string_t), intent(in) :: s type(string_t) :: keyword integer, dimension(2), intent(in) :: range integer, intent(in) :: type integer, intent(in), optional :: keyword_case t%type = type if (present (keyword_case)) then select case (keyword_case) case (CASE_KEEP); keyword = s case (CASE_UP); keyword = upper_case (s) case (CASE_DOWN); keyword = lower_case (s) end select else keyword = s end if if (type == T_IDENTIFIER) then if (associated (keyword_list)) then if (keyword_list_contains (keyword_list, keyword)) & t%type = T_KEYWORD end if end if select case (t%type) case (T_KEYWORD); t%s = keyword case default; t%s = s end select t%b = range(1) t%e = range(2) end subroutine lexeme_set subroutine lexeme_clear (t) type(lexeme_t), intent(out) :: t t%type = EMPTY t%s = "" end subroutine lexeme_clear @ %def lexeme_set lexeme_clear @ Retrieve the lexeme string, the relevant part of it, and the type. The last function returns true if there is a break condition reached (error or EOF). <>= public :: lexeme_get_string public :: lexeme_get_contents public :: lexeme_get_delimiters public :: lexeme_get_type +<>= + module function lexeme_get_string (t) result (s) + type(string_t) :: s + type(lexeme_t), intent(in) :: t + end function lexeme_get_string + module function lexeme_get_contents (t) result (s) + type(string_t) :: s + type(lexeme_t), intent(in) :: t + end function lexeme_get_contents + module function lexeme_get_delimiters (t) result (del) + type(string_t), dimension(2) :: del + type(lexeme_t), intent(in) :: t + end function lexeme_get_delimiters + module function lexeme_get_type (t) result (type) + integer :: type + type(lexeme_t), intent(in) :: t + end function lexeme_get_type <>= - function lexeme_get_string (t) result (s) + module function lexeme_get_string (t) result (s) type(string_t) :: s type(lexeme_t), intent(in) :: t s = t%s end function lexeme_get_string - function lexeme_get_contents (t) result (s) + module function lexeme_get_contents (t) result (s) type(string_t) :: s type(lexeme_t), intent(in) :: t s = extract (t%s, t%b, t%e) end function lexeme_get_contents - function lexeme_get_delimiters (t) result (del) + module function lexeme_get_delimiters (t) result (del) type(string_t), dimension(2) :: del type(lexeme_t), intent(in) :: t del(1) = extract (t%s, finish = t%b-1) del(2) = extract (t%s, start = t%e+1) end function lexeme_get_delimiters - function lexeme_get_type (t) result (type) + module function lexeme_get_type (t) result (type) integer :: type type(lexeme_t), intent(in) :: t type = t%type end function lexeme_get_type @ %def lexeme_get_string lexeme_get_contents lexeme_get_type @ Check for a generic break condition (error/eof) and for eof in particular. <>= public :: lexeme_is_break public :: lexeme_is_eof +<>= + module function lexeme_is_break (t) result (break) + logical :: break + type(lexeme_t), intent(in) :: t + end function lexeme_is_break + module function lexeme_is_eof (t) result (ok) + logical :: ok + type(lexeme_t), intent(in) :: t + end function lexeme_is_eof <>= - function lexeme_is_break (t) result (break) + module function lexeme_is_break (t) result (break) logical :: break type(lexeme_t), intent(in) :: t select case (t%type) case (EOF, IO_ERROR, OVERFLOW, NO_MATCH) break = .true. case default break = .false. end select end function lexeme_is_break - function lexeme_is_eof (t) result (ok) + module function lexeme_is_eof (t) result (ok) logical :: ok type(lexeme_t), intent(in) :: t ok = t%type == EOF end function lexeme_is_eof @ %def lexeme_is_break lexeme_is_eof @ \subsection{The lexer object} We store the current lexeme and the current line. The line buffer is set each time a new line is read from file. The working buffer has one character more, to hold any trailing blank. Pointers to line and column are for debugging, they will be used to make up readable error messages for the parser. <>= public :: lexer_t <>= type :: lexer_t private type(lexer_setup_t) :: setup type(stream_t), pointer :: stream => null () type(lexeme_t) :: lexeme type(string_t) :: previous_line2 type(string_t) :: previous_line1 type(string_t) :: current_line integer :: lines_read = 0 integer :: current_column = 0 integer :: previous_column = 0 type(string_t) :: buffer type(lexer_t), pointer :: parent => null () contains <> end type lexer_t @ %def lexer_t @ Create-setup wrapper <>= public :: lexer_init <>= procedure :: init => lexer_init +<>= + module subroutine lexer_init (lexer, & + comment_chars, quote_chars, quote_match, & + single_chars, special_class, & + keyword_list, upper_case_keywords, & + parent) + class(lexer_t), intent(inout) :: lexer + character(*), intent(in) :: comment_chars + character(*), intent(in) :: quote_chars, quote_match + character(*), intent(in) :: single_chars + character(*), dimension(:), intent(in) :: special_class + type(keyword_list_t), pointer :: keyword_list + logical, intent(in), optional :: upper_case_keywords + type(lexer_t), target, intent(in), optional :: parent + end subroutine lexer_init <>= - subroutine lexer_init (lexer, & + module subroutine lexer_init (lexer, & comment_chars, quote_chars, quote_match, & single_chars, special_class, & keyword_list, upper_case_keywords, & parent) class(lexer_t), intent(inout) :: lexer character(*), intent(in) :: comment_chars character(*), intent(in) :: quote_chars, quote_match character(*), intent(in) :: single_chars character(*), dimension(:), intent(in) :: special_class type(keyword_list_t), pointer :: keyword_list logical, intent(in), optional :: upper_case_keywords type(lexer_t), target, intent(in), optional :: parent call lexer_setup_init (lexer%setup, & comment_chars = comment_chars, & quote_chars = quote_chars, & quote_match = quote_match, & single_chars = single_chars, & special_class = special_class, & keyword_list = keyword_list, & upper_case_keywords = upper_case_keywords) if (present (parent)) lexer%parent => parent call lexer_clear (lexer) end subroutine lexer_init @ %def lexer_init @ Clear the lexer state, but not the setup. This should be done when the lexing starts, but it is not known whether the lexer was used before. <>= public :: lexer_clear <>= procedure :: clear => lexer_clear +<>= + module subroutine lexer_clear (lexer) + class(lexer_t), intent(inout) :: lexer + end subroutine lexer_clear <>= - subroutine lexer_clear (lexer) + module subroutine lexer_clear (lexer) class(lexer_t), intent(inout) :: lexer call lexeme_clear (lexer%lexeme) lexer%previous_line2 = "" lexer%previous_line1 = "" lexer%current_line = "" lexer%lines_read = 0 lexer%current_column = 0 lexer%previous_column = 0 lexer%buffer = "" end subroutine lexer_clear @ %def lexer_clear @ Reset lexer state and delete setup <>= public :: lexer_final <>= procedure :: final => lexer_final +<>= + module subroutine lexer_final (lexer) + class(lexer_t), intent(inout) :: lexer + end subroutine lexer_final <>= - subroutine lexer_final (lexer) + module subroutine lexer_final (lexer) class(lexer_t), intent(inout) :: lexer call lexer%clear () call lexer_setup_final (lexer%setup) end subroutine lexer_final @ %def lexer_final @ \subsection{The lexer routine} For lexing we need to associate an input stream to the lexer. <>= public :: lexer_assign_stream <>= procedure :: assign_stream => lexer_assign_stream +<>= + module subroutine lexer_assign_stream (lexer, stream) + class(lexer_t), intent(inout) :: lexer + type(stream_t), intent(in), target :: stream + end subroutine lexer_assign_stream <>= - subroutine lexer_assign_stream (lexer, stream) + module subroutine lexer_assign_stream (lexer, stream) class(lexer_t), intent(inout) :: lexer type(stream_t), intent(in), target :: stream lexer%stream => stream end subroutine lexer_assign_stream @ %def lexer_assign_stream @ The lexer. The lexer function takes the lexer and returns the currently stored lexeme. If there is none, it is read from buffer, matching against the lexeme templates in the lexer setup. Empty lexemes, i.e., comments and whitespace, are discarded and the buffer is read again until we have found a nonempty lexeme (which may also be EOF or an error condition). The initial state of the lexer contains an empty lexeme, so reading from buffer is forced. The empty state is restored after returning the lexeme. A nonempty lexeme is present in the lexer only if [[lex_back]] has been executed before. The workspace is the [[lexer%buffer]], treated as a sort of input stream. We chop off lexemes from the beginning, adjusting the buffer to the left. Whenever the buffer is empty, or we are matching against an open quote which has not terminated, we read a new line and append it to the right. This may result in special conditions, which for simplicity are also returned as lexemes: I/O error, buffer overflow, end of file. If the latter happens during reading a quoted string, we return an unmatched-quote lexeme. Obviously, the special-condition lexemes have to be caught by the parser. Note that reading further lines is only necessary when reading a quoted string. Otherwise, the line-feed that ends each line is interpreted as whitespace which terminates a preceding lexeme, so there are no other valid multiline lexemes. To enable meaningful error messages, we also keep track of the line number of the last line read, and the beginning and the end of the current lexeme with respect to this line. The lexer is implemented as a function that returns the next lexeme (i.e., token). It uses the [[lexer]] setup and modifies the buffers and pointers stored within the lexer, a side effect. The lexer reads from an input stream object, which also is modified by this reading, e.g., a line pointer is advanced. <>= public :: lex +<>= + module subroutine lex (lexeme, lexer) + type(lexeme_t), intent(out) :: lexeme + type(lexer_t), intent(inout) :: lexer + end subroutine lex <>= - subroutine lex (lexeme, lexer) + module subroutine lex (lexeme, lexer) type(lexeme_t), intent(out) :: lexeme type(lexer_t), intent(inout) :: lexer integer :: iostat1, iostat2 integer :: pos integer, dimension(2) :: range integer :: template_index, type if (.not. associated (lexer%stream)) & call msg_bug ("Lexer called without assigned stream") GET_LEXEME: do while (lexeme_get_type (lexer%lexeme) == EMPTY) if (len (lexer%buffer) /= 0) then iostat1 = 0 else call lexer_read_line (lexer, iostat1) end if select case (iostat1) case (0) MATCH_BUFFER: do call match (lexer%setup%tt, char (lexer%buffer), & pos, range, template_index) if (pos >= 0) then type = lexer%setup%type(template_index) exit MATCH_BUFFER else pos = 0 call lexer_read_line (lexer, iostat2) select case (iostat2) case (EOF); type = UNMATCHED_QUOTE; exit MATCH_BUFFER case (1); type = IO_ERROR; exit MATCH_BUFFER case (2); type = OVERFLOW; exit MATCH_BUFFER end select end if end do MATCH_BUFFER case (EOF); type = EOF case (1); type = IO_ERROR case (2); type = OVERFLOW end select call lexeme_set (lexer%lexeme, lexer%setup%keyword_list, & extract (lexer%buffer, finish=pos), range, type, & lexer%setup%keyword_case) lexer%buffer = remove (lexer%buffer, finish=pos) lexer%previous_column = lexer%current_column lexer%current_column = lexer%current_column + pos end do GET_LEXEME lexeme = lexer%lexeme call lexeme_clear (lexer%lexeme) end subroutine lex @ %def lex @ Read a line and append it to the input buffer. If the input buffer overflows, return [[iostat=2]]. Otherwise, [[iostat=1]] indicates an I/O error, and [[iostat=-1]] the EOF. The input stream may either be an external unit or a [[ifile]] object. In the latter case, a line is read and the line pointer is advanced. Note that inserting [[LF]] between input lines is the Unix convention. Since we are doing this explicitly when gluing lines together, we can pattern-match against [[LF]] without having to worry about the system. <>= subroutine lexer_read_line (lexer, iostat) type(lexer_t), intent(inout) :: lexer integer, intent(out) :: iostat type(string_t) :: current_line current_line = lexer%current_line call stream_get_record (lexer%stream, lexer%current_line, iostat) if (iostat == 0) then lexer%lines_read = lexer%lines_read + 1 lexer%previous_line2 = lexer%previous_line1 lexer%previous_line1 = current_line lexer%buffer = lexer%buffer // lexer%current_line // LF lexer%previous_column = 0 lexer%current_column = 0 end if end subroutine lexer_read_line @ %def lexer_read_line @ Once in a while we have read one lexeme to many, which can be pushed back into the input stream. Do not do this more than once. <>= public :: lexer_put_back +<>= + module subroutine lexer_put_back (lexer, lexeme) + type(lexer_t), intent(inout) :: lexer + type(lexeme_t), intent(in) :: lexeme + end subroutine lexer_put_back <>= - subroutine lexer_put_back (lexer, lexeme) + module subroutine lexer_put_back (lexer, lexeme) type(lexer_t), intent(inout) :: lexer type(lexeme_t), intent(in) :: lexeme if (lexeme_get_type (lexer%lexeme) == EMPTY) then lexer%lexeme = lexeme else call msg_bug (" Lexer: lex_back fails; probably called twice") end if end subroutine lexer_put_back @ %def lexer_put__back @ \subsection{Diagnostics} For debugging: print just the setup <>= public :: lexer_write_setup +<>= + module subroutine lexer_write_setup (lexer, unit) + type(lexer_t), intent(in) :: lexer + integer, intent(in), optional :: unit + end subroutine lexer_write_setup <>= - subroutine lexer_write_setup (lexer, unit) + module subroutine lexer_write_setup (lexer, unit) type(lexer_t), intent(in) :: lexer integer, intent(in), optional :: unit integer :: u u = given_output_unit (unit); if (u < 0) return call lexer_setup_write (lexer%setup, u) end subroutine lexer_write_setup @ %def lexer_write_setup @ This is useful for error printing: show the current line with index and a pointer to the current column within the line. <>= public :: lexer_show_location +<>= + module subroutine lexer_show_location (lexer) + type(lexer_t), intent(in) :: lexer + end subroutine lexer_show_location <>= - subroutine lexer_show_location (lexer) + module subroutine lexer_show_location (lexer) type(lexer_t), intent(in) :: lexer type(string_t) :: loc_str if (associated (lexer%parent)) then call lexer_show_source (lexer%parent) call msg_message ("[includes]") else call msg_message () end if if (associated (lexer%stream)) then call msg_message & (char (stream_get_record_info_string (lexer%stream)) // ":") end if if (lexer%lines_read >= 4) call msg_result ("[...]") if (lexer%lines_read >= 3) call msg_result (char (lexer%previous_line2)) if (lexer%lines_read >= 2) call msg_result (char (lexer%previous_line1)) if (lexer%lines_read >= 1) then call msg_result (char (lexer%current_line)) loc_str = repeat (" ", lexer%previous_column) loc_str = loc_str // "^" if (lexer%current_column > lexer%previous_column) then loc_str = loc_str & // repeat ("-", max (lexer%current_column & - lexer%previous_column - 1, 0)) & // "^" end if call msg_result (char (loc_str)) end if end subroutine lexer_show_location @ %def lexer_show_location @ This just prints the current stream source. <>= recursive subroutine lexer_show_source (lexer) type(lexer_t), intent(in) :: lexer if (associated (lexer%parent)) then call lexer_show_source (lexer%parent) call msg_message ("[includes]") else call msg_message () end if if (associated (lexer%stream)) then call msg_message & (char (stream_get_source_info_string (lexer%stream)) // ":") end if end subroutine lexer_show_source @ %def lexer_show_source @ \subsection{Unit tests} Test module, followed by the corresponding implementation module. <<[[lexers_ut.f90]]>>= <> module lexers_ut use unit_tests use lexers_uti <> <> contains <> end module lexers_ut @ %def lexers_ut @ <<[[lexers_uti.f90]]>>= <> module lexers_uti <> use lexers <> <> contains <> end module lexers_uti @ %def lexers_ut @ API: driver for the unit tests below. <>= public :: lexer_test <>= subroutine lexer_test (u, results) integer, intent(in) :: u type(test_results_t), intent(inout) :: results <> end subroutine lexer_test @ %def lexer_test @ Test the lexer by lexing and printing all lexemes from unit [[u]], one per line, using preset conventions. <>= call test (lexer_1, "lexer_1", & "check lexer", u, results) <>= public :: lexer_1 <>= subroutine lexer_1 (u) integer, intent(in) :: u type(lexer_t), target :: lexer type(stream_t), target :: stream type(string_t) :: string type(lexeme_t) :: lexeme string = "abcdefghij" call lexer_init (lexer, & comment_chars = "", & quote_chars = "<'""", & quote_match = ">'""", & single_chars = "?*+|=,()", & special_class = ["."], & keyword_list = null ()) call stream_init (stream, string) call lexer_assign_stream (lexer, stream) do call lex (lexeme, lexer) call lexeme_write (lexeme, u) if (lexeme_is_break (lexeme)) exit end do call stream_final (stream) call lexer_final (lexer) end subroutine lexer_1 @ %def lexer_1 @ \clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Syntax rules} This module provides tools to handle syntax rules in an abstract way. <<[[syntax_rules.f90]]>>= <> module syntax_rules <> - use io_units - use diagnostics - use system_defs, only: LCLETTERS, UCLETTERS, DIGITS - use ifiles, only: line_p, line_init, line_get_string_advance, line_final - use ifiles, only: ifile_t, ifile_get_length + use ifiles, only: ifile_t use lexers <> <> <> <> <> -contains - -<> + interface +<> + end interface end module syntax_rules @ %def syntax_rules @ +<<[[syntax_rules_sub.f90]]>>= +<> + +submodule (syntax_rules) syntax_rules_s + + use system_defs, only: LCLETTERS, UCLETTERS, DIGITS + use io_units + use diagnostics + use ifiles, only: ifile_get_length + use ifiles, only: line_p, line_init, line_get_string_advance, line_final + + implicit none + +<> + +contains + +<> + +end submodule syntax_rules_s + +@ %def syntax_rules_s \subsection{Syntax rules} Syntax rules are used by the parser. They determine how to translate the stream of lexemes as returned by the lexer into the parse tree node. A rule may be terminal, i.e., replace a matching lexeme into a terminal node. The node will contain the lexeme interpreted as a recognized token: \begin{itemize} \item a keyword: unquoted fixed character string; \item a real number, to be determined at runtime; \item an integer, to be determined at runtime; \item a boolean value, to be determined at runtime; \item a quoted token (e.g., string), to be determined at runtime; \item an identifier (unquoted string that is not a recognized keyword), to be determined at runtime. \end{itemize} It may be nonterminal, i.e., contain a sequence of child rules. These are matched consecutively (and recursively) against the input stream; the resulting node will be a branch node. \begin{itemize} \item the file, i.e., the input stream as a whole; \item a sequence of syntax elements, where the last syntax element may be optional, or optional repetitive; \end{itemize} Sequences carry a flag that tells whether the last child is optional or may be repeated an arbitrary number of times, correponding to the regexp modifiers [[?]], [[*]], and [[+]]. We also need an alternative rule; this will be replaced by the node generated by one of its children that matches; thus, it does not create a node of its own. \begin{itemize} \item an alternative of syntax elements. \end{itemize} We also define special types of sequences as convenience macros: \begin{itemize} \item a list: a sequence where the elements are separated by a separator keyword (e.g., commas), the separators are thrown away when parsing the list; \item a group: a sequence of three tokens, where the first and third ones are left and right delimiters, the delimiters are thrown away; \item an argument list: a delimited list, containing both delimiters and separators. \end{itemize} It would be great to have a polymorphic type for this purpose, but until Fortran 2003 is out we have to emulate this. Here are the syntax element codes: <>= public :: S_UNKNOWN public :: S_LOGICAL, S_INTEGER, S_REAL, S_COMPLEX, S_QUOTED public :: S_IDENTIFIER, S_KEYWORD public :: S_SEQUENCE, S_LIST, S_GROUP, S_ARGS public :: S_ALTERNATIVE public :: S_IGNORE <>= integer, parameter :: & S_UNKNOWN = 0, & S_LOGICAL = 1, S_INTEGER = 2, S_REAL = 3, S_COMPLEX = 4, & S_QUOTED = 5, S_IDENTIFIER = 6, S_KEYWORD = 7, & S_SEQUENCE = 8, S_LIST = 9, S_GROUP = 10, S_ARGS = 11, & S_ALTERNATIVE = 12, & S_IGNORE = 99 @ We need arrays of rule pointers, therefore this construct. <>= type :: rule_p private type(syntax_rule_t), pointer :: p => null () end type rule_p @ %def rule_p @ Return the association status of the rule pointer: -<>= +<>= elemental function rule_is_associated (rp) result (ok) logical :: ok type (rule_p), intent(in) :: rp ok = associated (rp%p) end function rule_is_associated @ %def rule_is_associated @ The rule type is one of the types listed above, represented by an integer code. The keyword, for a non-keyword rule, is an identifier used for the printed syntax table. The array of children is needed for nonterminal rules. In that case, there is a modifier for the last element (blank, "?", "*", or "+"), mirrored in the flags [[opt]] and [[rep]]. Then, we have the character constants used as separators and delimiters for this rule. Finally, the [[used]] flag can be set to indicate that this rule is the child of another rule. <>= public :: syntax_rule_t <>= type :: syntax_rule_t private integer :: type = S_UNKNOWN logical :: used = .false. type(string_t) :: keyword type(string_t) :: separator type(string_t), dimension(2) :: delimiter type(rule_p), dimension(:), allocatable :: child character(1) :: modifier = "" logical :: opt = .false., rep = .false. contains <> end type syntax_rule_t @ %def syntax_rule_t @ Initializer: Set type and key for a rule, but do not (yet) allocate anything. Finalizer: not needed (no pointer components). -<>= +<>= subroutine syntax_rule_init (rule, key, type) type(syntax_rule_t), intent(inout) :: rule type(string_t), intent(in) :: key integer, intent(in) :: type rule%keyword = key rule%type = type select case (rule%type) case (S_GROUP) call syntax_rule_set_delimiter (rule) case (S_LIST) call syntax_rule_set_separator (rule) case (S_ARGS) call syntax_rule_set_delimiter (rule) call syntax_rule_set_separator (rule) end select end subroutine syntax_rule_init @ %def syntax_rule_init @ These characters will not be enclosed in quotes when writing syntax rules: -<>= +<>= character(*), parameter :: & UNQUOTED = "(),|_"//LCLETTERS//UCLETTERS//DIGITS @ +\subsection{Accessing rules} +This is the API for syntax rules: +<>= + public :: syntax_rule_get_type +<>= + module function syntax_rule_get_type (rule) result (type) + integer :: type + type(syntax_rule_t), intent(in) :: rule + end function syntax_rule_get_type +<>= + module function syntax_rule_get_type (rule) result (type) + integer :: type + type(syntax_rule_t), intent(in) :: rule + type = rule%type + end function syntax_rule_get_type + +@ %def syntax_rule_get_type +<>= + public :: syntax_rule_get_key +<>= + module function syntax_rule_get_key (rule) result (key) + class(syntax_rule_t), intent(in) :: rule + type(string_t) :: key + end function syntax_rule_get_key +<>= + procedure :: get_key => syntax_rule_get_key +<>= + module function syntax_rule_get_key (rule) result (key) + class(syntax_rule_t), intent(in) :: rule + type(string_t) :: key + key = rule%keyword + end function syntax_rule_get_key + +@ %def syntax_rule_get_key +<>= + public :: syntax_rule_get_separator + public :: syntax_rule_get_delimiter +<>= + module function syntax_rule_get_separator (rule) result (separator) + type(string_t) :: separator + type(syntax_rule_t), intent(in) :: rule + end function syntax_rule_get_separator + module function syntax_rule_get_delimiter (rule) result (delimiter) + type(string_t), dimension(2) :: delimiter + type(syntax_rule_t), intent(in) :: rule + end function syntax_rule_get_delimiter +<>= + module function syntax_rule_get_separator (rule) result (separator) + type(string_t) :: separator + type(syntax_rule_t), intent(in) :: rule + separator = rule%separator + end function syntax_rule_get_separator + + module function syntax_rule_get_delimiter (rule) result (delimiter) + type(string_t), dimension(2) :: delimiter + type(syntax_rule_t), intent(in) :: rule + delimiter = rule%delimiter + end function syntax_rule_get_delimiter + +@ %def syntax_rule_get_separator syntax_rule_get_delimiter +@ Accessing child rules. If we use [[syntax_rule_get_n_sub]] for determining +loop bounds, we do not need a check in the second routine. +<>= + public :: syntax_rule_get_n_sub + public :: syntax_rule_get_sub_ptr +<>= + module function syntax_rule_get_sub_ptr (rule, i) result (sub) + type(syntax_rule_t), pointer :: sub + type(syntax_rule_t), intent(in), target :: rule + integer, intent(in) :: i + end function syntax_rule_get_sub_ptr + module function syntax_rule_get_n_sub (rule) result (n) + integer :: n + type(syntax_rule_t), intent(in) :: rule + end function syntax_rule_get_n_sub +<>= + module function syntax_rule_get_n_sub (rule) result (n) + integer :: n + type(syntax_rule_t), intent(in) :: rule + if (allocated (rule%child)) then + n = size (rule%child) + else + n = 0 + end if + end function syntax_rule_get_n_sub + + module function syntax_rule_get_sub_ptr (rule, i) result (sub) + type(syntax_rule_t), pointer :: sub + type(syntax_rule_t), intent(in), target :: rule + integer, intent(in) :: i + sub => rule%child(i)%p + end function syntax_rule_get_sub_ptr + + subroutine syntax_rule_set_sub (rule, i, sub) + type(syntax_rule_t), intent(inout) :: rule + integer, intent(in) :: i + type(syntax_rule_t), intent(in), target :: sub + rule%child(i)%p => sub + end subroutine syntax_rule_set_sub + +@ %def syntax_rule_get_n_sub syntax_rule_get_sub_ptr syntax_rule_set_sub +@ Return the modifier flags: +<>= + public :: syntax_rule_last_optional + public :: syntax_rule_last_repetitive +<>= + module function syntax_rule_last_optional (rule) result (opt) + logical :: opt + type(syntax_rule_t), intent(in) :: rule + end function syntax_rule_last_optional + module function syntax_rule_last_repetitive (rule) result (rep) + logical :: rep + type(syntax_rule_t), intent(in) :: rule + end function syntax_rule_last_repetitive +<>= + module function syntax_rule_last_optional (rule) result (opt) + logical :: opt + type(syntax_rule_t), intent(in) :: rule + opt = rule%opt + end function syntax_rule_last_optional + module function syntax_rule_last_repetitive (rule) result (rep) + logical :: rep + type(syntax_rule_t), intent(in) :: rule + rep = rule%rep + end function syntax_rule_last_repetitive + +@ %def syntax_rule_last_optional syntax_rule_last_repetitive +@ +Return true if the rule is atomic, i.e., logical, real, keyword etc. +<>= + public :: syntax_rule_is_atomic +<>= + module function syntax_rule_is_atomic (rule) result (atomic) + logical :: atomic + type(syntax_rule_t), intent(in) :: rule + end function syntax_rule_is_atomic +<>= + module function syntax_rule_is_atomic (rule) result (atomic) + logical :: atomic + type(syntax_rule_t), intent(in) :: rule + select case (rule%type) + case (S_LOGICAL, S_INTEGER, S_REAL, S_COMPLEX, S_IDENTIFIER, & + S_KEYWORD, S_QUOTED) + atomic = .true. + case default + atomic = .false. + end select + end function syntax_rule_is_atomic + +@ %def syntax_rule_is_atomic +@ \subsection{I/O} Write an account of the rule. Setting [[short]] true will suppress the node type. Setting [[key_only]] true will suppress the definition. Setting [[advance]] false will suppress the trailing newline. <>= public :: syntax_rule_write <>= procedure :: write => syntax_rule_write -<>= - subroutine syntax_rule_write (rule, unit, short, key_only, advance) +<>= + module subroutine syntax_rule_write (rule, unit, short, key_only, advance) + class(syntax_rule_t), intent(in) :: rule + integer, intent(in), optional :: unit + logical, intent(in), optional :: short, key_only, advance + end subroutine syntax_rule_write +<>= + module subroutine syntax_rule_write (rule, unit, short, key_only, advance) class(syntax_rule_t), intent(in) :: rule integer, intent(in), optional :: unit logical, intent(in), optional :: short, key_only, advance logical :: typ, def, adv integer :: u u = given_output_unit (unit); if (u < 0) return typ = .true.; if (present (short)) typ = .not. short def = .true.; if (present (key_only)) def = .not. key_only adv = .true.; if (present (advance)) adv = advance select case (rule%type) case (S_UNKNOWN); call write_atom ("???", typ) case (S_IGNORE); call write_atom ("IGNORE", typ) case (S_LOGICAL); call write_atom ("LOGICAL", typ) case (S_INTEGER); call write_atom ("INTEGER", typ) case (S_REAL); call write_atom ("REAL", typ) case (S_COMPLEX); call write_atom ("COMPLEX", typ) case (S_IDENTIFIER); call write_atom ("IDENTIFIER", typ) case (S_KEYWORD); call write_atom ("KEYWORD", typ) case (S_QUOTED) call write_quotes (typ, def, & del = rule%delimiter) case (S_SEQUENCE) call write_sequence ("SEQUENCE", typ, def, size (rule%child)) case (S_GROUP) call write_sequence ("GROUP", typ, def, size (rule%child), & del = rule%delimiter) case (S_LIST) call write_sequence ("LIST", typ, def, size (rule%child), & sep = rule%separator) case (S_ARGS) call write_sequence ("ARGUMENTS", typ, def, size (rule%child), & del = rule%delimiter, & sep = rule%separator) case (S_ALTERNATIVE) call write_sequence ("ALTERNATIVE", typ, def, size (rule%child), & sep = var_str ("|")) end select if (adv) write (u, *) contains subroutine write_type (type) character(*), intent(in) :: type character(11) :: str str = type write (u, "(1x,A)", advance="no") str end subroutine write_type subroutine write_key write (u, "(1x,A)", advance="no") char (wkey (rule)) end subroutine write_key subroutine write_atom (type, typ) character(*), intent(in) :: type logical, intent(in) :: typ if (typ) call write_type (type) call write_key end subroutine write_atom subroutine write_maybe_quoted (string) character(*), intent(in) :: string character, parameter :: q = "'" character, parameter :: qq = '"' if (verify (string, UNQUOTED) == 0) then write (u, "(1x,A)", advance = "no") trim (string) else if (verify (string, q) == 0) then write (u, "(1x,A)", advance = "no") qq // trim (string) // qq else write (u, "(1x,A)", advance = "no") q // trim (string) // q end if end subroutine write_maybe_quoted subroutine write_quotes (typ, def, del) logical, intent(in) :: typ, def type(string_t), dimension(2), intent(in) :: del if (typ) call write_type ("QUOTED") call write_key if (def) then write (u, "(1x,'=')", advance="no") call write_maybe_quoted (char (del(1))) write (u, "(1x,A)", advance="no") "..." call write_maybe_quoted (char (del(2))) end if end subroutine write_quotes subroutine write_sequence (type, typ, def, n, del, sep) character(*), intent(in) :: type logical, intent(in) :: typ, def integer, intent(in) :: n type(string_t), dimension(2), intent(in), optional :: del type(string_t), intent(in), optional :: sep integer :: i if (typ) call write_type (type) call write_key if (def) then write (u, "(1x,'=')", advance="no") if (present (del)) call write_maybe_quoted (char (del(1))) do i = 1, n if (i > 1 .and. present (sep)) & call write_maybe_quoted (char (sep)) write (u, "(1x,A)", advance="no") & char (wkey (syntax_rule_get_sub_ptr(rule, i))) if (i == n) write (u, "(A)", advance="no") trim (rule%modifier) end do if (present (del)) call write_maybe_quoted (char (del(2))) end if end subroutine write_sequence end subroutine syntax_rule_write @ %def syntax_rule_write @ In the printed representation, the keyword strings are enclosed as [[<...>]], unless they are bare keywords. Bare keywords are enclosed as [['..']] if they contain a character which is not a letter, digit, or underscore. If they contain a single-quote character, they are enclosed as [[".."]]. (A keyword must not contain both single- and double-quotes.) -<>= +<>= function wkey (rule) result (string) type(string_t) :: string type(syntax_rule_t), intent(in) :: rule select case (rule%type) case (S_KEYWORD) if (verify (rule%keyword, UNQUOTED) == 0) then string = rule%keyword else if (scan (rule%keyword, "'") == 0) then string = "'" // rule%keyword // "'" else string = '"' // rule%keyword // '"' end if case default string = "<" // rule%keyword // ">" end select end function wkey @ %def wkey @ \subsection{Completing syntax rules} Set the separator and delimiter entries, using defaults: -<>= +<>= subroutine syntax_rule_set_separator (rule, separator) type(syntax_rule_t), intent(inout) :: rule type(string_t), intent(in), optional :: separator if (present (separator)) then rule%separator = separator else rule%separator = "," end if end subroutine syntax_rule_set_separator subroutine syntax_rule_set_delimiter (rule, delimiter) type(syntax_rule_t), intent(inout) :: rule type(string_t), dimension(2), intent(in), optional :: delimiter if (present (delimiter)) then rule%delimiter = delimiter else rule%delimiter(1) = "(" rule%delimiter(2) = ")" end if end subroutine syntax_rule_set_delimiter @ %def syntax_rule_set_separator syntax_rule_set_delimiter @ Set the modifier entry and corresponding flags: -<>= +<>= function is_modifier (string) result (ok) logical :: ok type(string_t), intent(in) :: string select case (char (string)) case (" ", "?", "*", "+"); ok = .true. case default; ok = .false. end select end function is_modifier subroutine syntax_rule_set_modifier (rule, modifier) type(syntax_rule_t), intent(inout) :: rule type(string_t), intent(in) :: modifier rule%modifier = char (modifier) select case (rule%modifier) case (" ") case ("?"); rule%opt = .true. case ("*"); rule%opt = .true.; rule%rep = .true. case ("+"); rule%rep = .true. case default call msg_bug (" Syntax: sequence modifier '" // rule%modifier & // "' is not one of '+' '*' '?'") end select end subroutine syntax_rule_set_modifier @ %def is_modifier syntax_rule_set_modifier @ Check a finalized rule for completeness -<>= +<>= subroutine syntax_rule_check (rule) type(syntax_rule_t), intent(in) :: rule if (rule%keyword == "") call msg_bug ("Rule key not set") select case (rule%type) case (S_UNKNOWN); call bug (" Undefined rule") case (S_IGNORE, S_LOGICAL, S_INTEGER, S_REAL, S_COMPLEX, & S_IDENTIFIER, S_KEYWORD) case (S_QUOTED) if (rule%delimiter(1) == "" .or. rule%delimiter(2) == "") & call bug (" Missing quote character(s)") case (S_SEQUENCE) case (S_GROUP) if (rule%delimiter(1) == "" .or. rule%delimiter(2) == "") & call bug (" Missing delimiter(s)") case (S_LIST) if (rule%separator == "") call bug (" Missing separator") case (S_ARGS) if (rule%delimiter(1) == "" .or. rule%delimiter(2) == "") & call bug (" Missing delimiter(s)") if (rule%separator == "") call bug (" Missing separator") case (S_ALTERNATIVE) case default call bug (" Undefined syntax code") end select select case (rule%type) case (S_SEQUENCE, S_GROUP, S_LIST, S_ARGS, S_ALTERNATIVE) if (allocated (rule%child)) then if (.not.all (rule_is_associated (rule%child))) & call bug (" Child rules not all associated") else call bug (" Parent rule without children") end if case default if (allocated (rule%child)) call bug (" Non-parent rule with children") end select contains subroutine bug (string) character(*), intent(in) :: string call msg_bug (" Syntax table: Rule " // char (rule%keyword) // ": " & // string) end subroutine bug end subroutine syntax_rule_check @ %def syntax_rule_check @ -\subsection{Accessing rules} -This is the API for syntax rules: -<>= - public :: syntax_rule_get_type -<>= - function syntax_rule_get_type (rule) result (type) - integer :: type - type(syntax_rule_t), intent(in) :: rule - type = rule%type - end function syntax_rule_get_type - -@ %def syntax_rule_get_type -<>= - public :: syntax_rule_get_key -<>= - procedure :: get_key => syntax_rule_get_key -<>= - function syntax_rule_get_key (rule) result (key) - class(syntax_rule_t), intent(in) :: rule - type(string_t) :: key - key = rule%keyword - end function syntax_rule_get_key - -@ %def syntax_rule_get_key -<>= - public :: syntax_rule_get_separator - public :: syntax_rule_get_delimiter -<>= - function syntax_rule_get_separator (rule) result (separator) - type(string_t) :: separator - type(syntax_rule_t), intent(in) :: rule - separator = rule%separator - end function syntax_rule_get_separator - - function syntax_rule_get_delimiter (rule) result (delimiter) - type(string_t), dimension(2) :: delimiter - type(syntax_rule_t), intent(in) :: rule - delimiter = rule%delimiter - end function syntax_rule_get_delimiter - -@ %def syntax_rule_get_separator syntax_rule_get_delimiter -@ Accessing child rules. If we use [[syntax_rule_get_n_sub]] for determining -loop bounds, we do not need a check in the second routine. -<>= - public :: syntax_rule_get_n_sub - public :: syntax_rule_get_sub_ptr -<>= - function syntax_rule_get_n_sub (rule) result (n) - integer :: n - type(syntax_rule_t), intent(in) :: rule - if (allocated (rule%child)) then - n = size (rule%child) - else - n = 0 - end if - end function syntax_rule_get_n_sub - - function syntax_rule_get_sub_ptr (rule, i) result (sub) - type(syntax_rule_t), pointer :: sub - type(syntax_rule_t), intent(in), target :: rule - integer, intent(in) :: i - sub => rule%child(i)%p - end function syntax_rule_get_sub_ptr - - subroutine syntax_rule_set_sub (rule, i, sub) - type(syntax_rule_t), intent(inout) :: rule - integer, intent(in) :: i - type(syntax_rule_t), intent(in), target :: sub - rule%child(i)%p => sub - end subroutine syntax_rule_set_sub - -@ %def syntax_rule_get_n_sub syntax_rule_get_sub_ptr syntax_rule_set_sub -@ Return the modifier flags: -<>= - public :: syntax_rule_last_optional - public :: syntax_rule_last_repetitive -<>= - function syntax_rule_last_optional (rule) result (opt) - logical :: opt - type(syntax_rule_t), intent(in) :: rule - opt = rule%opt - end function syntax_rule_last_optional - function syntax_rule_last_repetitive (rule) result (rep) - logical :: rep - type(syntax_rule_t), intent(in) :: rule - rep = rule%rep - end function syntax_rule_last_repetitive - -@ %def syntax_rule_last_optional syntax_rule_last_repetitive -@ -Return true if the rule is atomic, i.e., logical, real, keyword etc. -<>= - public :: syntax_rule_is_atomic -<>= - function syntax_rule_is_atomic (rule) result (atomic) - logical :: atomic - type(syntax_rule_t), intent(in) :: rule - select case (rule%type) - case (S_LOGICAL, S_INTEGER, S_REAL, S_COMPLEX, S_IDENTIFIER, & - S_KEYWORD, S_QUOTED) - atomic = .true. - case default - atomic = .false. - end select - end function syntax_rule_is_atomic - -@ %def syntax_rule_is_atomic -@ \subsection{Syntax tables} A syntax table contains the tree of syntax rules and, for direct parser access, the list of valid keywords. \subsubsection{Types} The syntax contains an array of rules and a list of keywords. The array is actually used as a tree, where the top rule is the first array element, and the other rules are recursively pointed to by this first rule. (No rule should be used twice or be unused.) The keyword list is derived from the rule tree. Objects of this type need the target attribute if they are associated with a lexer. The keyword list will be pointed to by this lexer. <>= public :: syntax_t <>= type :: syntax_t private type(syntax_rule_t), dimension(:), allocatable :: rule type(keyword_list_t) :: keyword_list end type syntax_t @ %def syntax_t @ \subsubsection{Constructor/destructor} Initialize and finalize syntax tables <>= public :: syntax_init public :: syntax_final @ There are two ways to create a syntax: hard-coded from rules or dynamically from file. <>= interface syntax_init module procedure syntax_init_from_ifile end interface @ %def syntax_init @ The syntax definition is read from an [[ifile]] object which contains the syntax definitions in textual form, one rule per line. This interface allows for determining the number of rules beforehand. To parse the rule definitions, we make up a temporary lexer. Obviously, we cannot use a generic parser yet, so we have to hardcode the parsing process. -<>= - subroutine syntax_init_from_ifile (syntax, ifile) +<>= + module subroutine syntax_init_from_ifile (syntax, ifile) + type(syntax_t), intent(out), target :: syntax + type(ifile_t), intent(in) :: ifile + end subroutine syntax_init_from_ifile +<>= + module subroutine syntax_init_from_ifile (syntax, ifile) type(syntax_t), intent(out), target :: syntax type(ifile_t), intent(in) :: ifile type(lexer_t) :: lexer type(line_p) :: line type(string_t) :: string integer :: n_token integer :: i call lexer_init (lexer, & comment_chars = "", & quote_chars = "<'""", & quote_match = ">'""", & single_chars = "?*+|=,()", & special_class = ["."], & keyword_list = null ()) allocate (syntax%rule (ifile_get_length (ifile))) call line_init (line, ifile) do i = 1, size (syntax%rule) string = line_get_string_advance (line) call set_rule_type_and_key (syntax%rule(i), string, lexer) end do call line_init (line, ifile) do i = 1, size (syntax%rule) string = line_get_string_advance (line) select case (syntax%rule(i)%type) case (S_QUOTED, S_SEQUENCE, S_GROUP, S_LIST, S_ARGS, S_ALTERNATIVE) n_token = get_n_token (string, lexer) call set_rule_contents & (syntax%rule(i), syntax, n_token, string, lexer) end select end do call line_final (line) call lexer_final (lexer) call syntax_make_keyword_list (syntax) if (.not. all (syntax%rule%used)) then do i = 1, size (syntax%rule) if (.not. syntax%rule(i)%used) then call syntax_rule_write (syntax%rule(i), 6) end if end do call msg_bug (" Syntax table: unused rules") end if end subroutine syntax_init_from_ifile @ %def syntax_init_from_ifile @ For a given rule defined in the input, the first task is to determine its type and key. With these, we can initialize the rule in the table, postponing the association of children. -<>= +<>= subroutine set_rule_type_and_key (rule, string, lexer) type(syntax_rule_t), intent(inout) :: rule type(string_t), intent(in) :: string type(lexer_t), intent(inout) :: lexer type(stream_t), target :: stream type(lexeme_t) :: lexeme type(string_t) :: key character(2) :: type call lexer_clear (lexer) call stream_init (stream, string) call lexer_assign_stream (lexer, stream) call lex (lexeme, lexer) type = lexeme_get_string (lexeme) call lex (lexeme, lexer) key = lexeme_get_contents (lexeme) call stream_final (stream) if (trim (key) /= "") then select case (type) case ("IG"); call syntax_rule_init (rule, key, S_IGNORE) case ("LO"); call syntax_rule_init (rule, key, S_LOGICAL) case ("IN"); call syntax_rule_init (rule, key, S_INTEGER) case ("RE"); call syntax_rule_init (rule, key, S_REAL) case ("CO"); call syntax_rule_init (rule, key, S_COMPLEX) case ("ID"); call syntax_rule_init (rule, key, S_IDENTIFIER) case ("KE"); call syntax_rule_init (rule, key, S_KEYWORD) case ("QU"); call syntax_rule_init (rule, key, S_QUOTED) case ("SE"); call syntax_rule_init (rule, key, S_SEQUENCE) case ("GR"); call syntax_rule_init (rule, key, S_GROUP) case ("LI"); call syntax_rule_init (rule, key, S_LIST) case ("AR"); call syntax_rule_init (rule, key, S_ARGS) case ("AL"); call syntax_rule_init (rule, key, S_ALTERNATIVE) case default call lexer_show_location (lexer) call msg_bug (" Syntax definition: unknown type '" // type // "'") end select else print *, char (string) call msg_bug (" Syntax definition: empty rule key") end if end subroutine set_rule_type_and_key @ %def set_rule_type_and_key @ This function returns the number of tokens in an input line. -<>= +<>= function get_n_token (string, lexer) result (n) integer :: n type(string_t), intent(in) :: string type(lexer_t), intent(inout) :: lexer type(stream_t), target :: stream type(lexeme_t) :: lexeme integer :: i call lexer_clear (lexer) call stream_init (stream, string) call lexer_assign_stream (lexer, stream) i = 0 do call lex (lexeme, lexer) if (lexeme_is_break (lexeme)) exit i = i + 1 end do n = i call stream_final (stream) -end function get_n_token + end function get_n_token @ %def get_n_token +@ Assign the pointer to the rule associated with a given key (assumes +that the rule array is allocated) +<>= + public :: syntax_get_rule_ptr +<>= + module function syntax_get_rule_ptr (syntax, key) result (rule) + type(syntax_rule_t), pointer :: rule + type(syntax_t), intent(in), target :: syntax + type(string_t), intent(in) :: key + end function syntax_get_rule_ptr +<>= + module function syntax_get_rule_ptr (syntax, key) result (rule) + type(syntax_rule_t), pointer :: rule + type(syntax_t), intent(in), target :: syntax + type(string_t), intent(in) :: key + integer :: i + do i = 1, size (syntax%rule) + if (syntax%rule(i)%keyword == key) then + rule => syntax%rule(i) + return + end if + end do + call msg_bug (" Syntax table: Rule " // char (key) // " not found") + end function syntax_get_rule_ptr + +@ %def syntax_get_rule_ptr @ This subroutine extracts the rule contents for an input line. There are three tasks: (1) determine the number of children, depending on the rule type; (2) find and set the separator and delimiter strings, if required; (3) scan the child rules, find them in the syntax table and associate the parent rule with them. -<>= +<>= subroutine set_rule_contents (rule, syntax, n_token, string, lexer) type(syntax_rule_t), intent(inout) :: rule type(syntax_t), intent(in), target :: syntax integer, intent(in) :: n_token type(string_t), intent(in) :: string type(lexer_t), intent(inout) :: lexer type(stream_t), target :: stream type(lexeme_t), dimension(n_token) :: lexeme integer :: i, n_children call lexer_clear (lexer) call stream_init (stream, string) call lexer_assign_stream (lexer, stream) do i = 1, n_token call lex (lexeme(i), lexer) end do call stream_final (stream) n_children = get_n_children () call set_delimiters if (n_children > 1) call set_separator if (n_children > 0) call set_children contains function get_n_children () result (n_children) integer :: n_children select case (rule%type) case (S_QUOTED) if (n_token /= 6) call broken_rule (rule) n_children = 0 case (S_GROUP) if (n_token /= 6) call broken_rule (rule) n_children = 1 case (S_SEQUENCE) if (is_modifier (lexeme_get_string (lexeme(n_token)))) then if (n_token <= 4) call broken_rule (rule) call syntax_rule_set_modifier & (rule, lexeme_get_string (lexeme(n_token))) n_children = n_token - 4 else if (n_token <= 3) call broken_rule (rule) n_children = n_token - 3 end if case (S_LIST) if (is_modifier (lexeme_get_string (lexeme(n_token)))) then if (n_token <= 4 .or. mod (n_token, 2) /= 1) & call broken_rule (rule) call syntax_rule_set_modifier & (rule, lexeme_get_string (lexeme(n_token))) else if (n_token <= 3 .or. mod (n_token, 2) /= 0) then call broken_rule (rule) end if n_children = (n_token - 2) / 2 case (S_ARGS) if (is_modifier (lexeme_get_string (lexeme(n_token-1)))) then if (n_token <= 6 .or. mod (n_token, 2) /= 1) & call broken_rule (rule) call syntax_rule_set_modifier & (rule, lexeme_get_string (lexeme(n_token-1))) else if (n_token <= 5 .or. mod (n_token, 2) /= 0) then call broken_rule (rule) end if n_children = (n_token - 4) / 2 case (S_ALTERNATIVE) if (n_token <= 3 .or. mod (n_token, 2) /= 0) call broken_rule (rule) n_children = (n_token - 2) / 2 end select end function get_n_children subroutine set_delimiters type(string_t), dimension(2) :: delimiter select case (rule%type) case (S_QUOTED, S_GROUP, S_ARGS) delimiter(1) = lexeme_get_contents (lexeme(4)) delimiter(2) = lexeme_get_contents (lexeme(n_token)) call syntax_rule_set_delimiter (rule, delimiter) end select end subroutine set_delimiters subroutine set_separator type(string_t) :: separator select case (rule%type) case (S_LIST) separator = lexeme_get_contents (lexeme(5)) call syntax_rule_set_separator (rule, separator) case (S_ARGS) separator = lexeme_get_contents (lexeme(6)) call syntax_rule_set_separator (rule, separator) end select end subroutine set_separator subroutine set_children allocate (rule%child(n_children)) select case (rule%type) case (S_GROUP) call syntax_rule_set_sub (rule, 1, syntax_get_rule_ptr (syntax, & lexeme_get_contents (lexeme(5)))) case (S_SEQUENCE) do i = 1, n_children call syntax_rule_set_sub (rule, i, syntax_get_rule_ptr (syntax, & lexeme_get_contents (lexeme(i+3)))) end do case (S_LIST, S_ALTERNATIVE) do i = 1, n_children call syntax_rule_set_sub (rule, i, syntax_get_rule_ptr (syntax, & lexeme_get_contents (lexeme(2*i+2)))) end do case (S_ARGS) do i = 1, n_children call syntax_rule_set_sub (rule, i, syntax_get_rule_ptr (syntax, & lexeme_get_contents (lexeme(2*i+3)))) end do end select end subroutine set_children subroutine broken_rule (rule) type(syntax_rule_t), intent(in) :: rule call lexer_show_location (lexer) call msg_bug (" Syntax definition: broken rule '" & // char (wkey (rule)) // "'") end subroutine broken_rule end subroutine set_rule_contents @ %def set_rule_contents @ This routine completes the syntax table object. We assume that the rule array is set up. We associate the top rule with the first entry in the rule array and build up the keyword list. The keyword list includes delimiters and separators. Filling it can only be done after all rules are set. We scan the rule tree. For each keyword that we find, we try to add it to the keyword list; the pointer to the last element is carried along with the recursive scanning. Before appending a keyword, we check whether it is already in the list. -<>= +<>= subroutine syntax_make_keyword_list (syntax) type(syntax_t), intent(inout), target :: syntax type(syntax_rule_t), pointer :: rule rule => syntax%rule(1) call rule_scan_rec (rule, syntax%keyword_list) contains recursive subroutine rule_scan_rec (rule, keyword_list) type(syntax_rule_t), pointer :: rule type(keyword_list_t), intent(inout) :: keyword_list integer :: i if (rule%used) return rule%used = .true. select case (rule%type) case (S_UNKNOWN) call msg_bug (" Syntax: rule tree contains undefined rule") case (S_KEYWORD) call keyword_list_add (keyword_list, rule%keyword) end select select case (rule%type) case (S_LIST, S_ARGS) call keyword_list_add (keyword_list, rule%separator) end select select case (rule%type) case (S_GROUP, S_ARGS) call keyword_list_add (keyword_list, rule%delimiter(1)) call keyword_list_add (keyword_list, rule%delimiter(2)) end select select case (rule%type) case (S_SEQUENCE, S_GROUP, S_LIST, S_ARGS, S_ALTERNATIVE) if (.not. allocated (rule%child)) & call msg_bug (" Syntax: Non-terminal rule without children") case default if (allocated (rule%child)) & call msg_bug (" Syntax: Terminal rule with children") end select if (allocated (rule%child)) then do i = 1, size (rule%child) call rule_scan_rec (rule%child(i)%p, keyword_list) end do end if end subroutine rule_scan_rec end subroutine syntax_make_keyword_list @ %def syntax_make_keyword_list @ The finalizer deallocates the rule pointer array and deletes the keyword list. -<>= - subroutine syntax_final (syntax) +<>= + module subroutine syntax_final (syntax) + type(syntax_t), intent(inout) :: syntax + end subroutine syntax_final +<>= + module subroutine syntax_final (syntax) type(syntax_t), intent(inout) :: syntax if (allocated (syntax%rule)) deallocate (syntax%rule) call keyword_list_final (syntax%keyword_list) end subroutine syntax_final @ %def syntax_final @ \subsection{Accessing the syntax table} Return a pointer to the top rule <>= public :: syntax_get_top_rule_ptr -<>= - function syntax_get_top_rule_ptr (syntax) result (rule) +<>= + module function syntax_get_top_rule_ptr (syntax) result (rule) + type(syntax_rule_t), pointer :: rule + type(syntax_t), intent(in), target :: syntax + end function syntax_get_top_rule_ptr +<>= + module function syntax_get_top_rule_ptr (syntax) result (rule) type(syntax_rule_t), pointer :: rule type(syntax_t), intent(in), target :: syntax if (allocated (syntax%rule)) then rule => syntax%rule(1) else rule => null () end if end function syntax_get_top_rule_ptr @ %def syntax_get_top_rule_ptr -@ Assign the pointer to the rule associated with a given key (assumes -that the rule array is allocated) -<>= - public :: syntax_get_rule_ptr -<>= - function syntax_get_rule_ptr (syntax, key) result (rule) - type(syntax_rule_t), pointer :: rule - type(syntax_t), intent(in), target :: syntax - type(string_t), intent(in) :: key - integer :: i - do i = 1, size (syntax%rule) - if (syntax%rule(i)%keyword == key) then - rule => syntax%rule(i) - return - end if - end do - call msg_bug (" Syntax table: Rule " // char (key) // " not found") - end function syntax_get_rule_ptr - -@ %def syntax_get_rule_ptr @ Return a pointer to the keyword list <>= public :: syntax_get_keyword_list_ptr -<>= - function syntax_get_keyword_list_ptr (syntax) result (keyword_list) +<>= + module function syntax_get_keyword_list_ptr (syntax) result (keyword_list) + type(keyword_list_t), pointer :: keyword_list + type(syntax_t), intent(in), target :: syntax + end function syntax_get_keyword_list_ptr +<>= + module function syntax_get_keyword_list_ptr (syntax) result (keyword_list) type(keyword_list_t), pointer :: keyword_list type(syntax_t), intent(in), target :: syntax keyword_list => syntax%keyword_list end function syntax_get_keyword_list_ptr @ %def syntax_get_keyword_list_ptr @ \subsection{I/O} Write a readable representation of the syntax table <>= public :: syntax_write -<>= - subroutine syntax_write (syntax, unit) +<>= + module subroutine syntax_write (syntax, unit) + type(syntax_t), intent(in) :: syntax + integer, intent(in), optional :: unit + end subroutine syntax_write +<>= + module subroutine syntax_write (syntax, unit) type(syntax_t), intent(in) :: syntax integer, intent(in), optional :: unit integer :: u integer :: i u = given_output_unit (unit); if (u < 0) return write (u, "(A)") "Syntax table:" if (allocated (syntax%rule)) then do i = 1, size (syntax%rule) call syntax_rule_write (syntax%rule(i), u) end do else write (u, "(1x,A)") "[not allocated]" end if call keyword_list_write (syntax%keyword_list, u) end subroutine syntax_write @ %def syntax_write @ \clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{The parser} On a small scale, the parser interprets the string tokens returned by the lexer; they are interpreted as numbers, keywords and such and stored as a typed object. On a large scale, a text is read, parsed, and a syntax rule set is applied such that the tokens are stored as a parse tree. Syntax errors are spotted in this process, so the resulting parse tree is syntactically correct by definition. <<[[parser.f90]]>>= <> module parser <> <> - use io_units - use format_defs, only: FMT_19 - use system_defs, only: DIGITS - use diagnostics - use md5 use lexers use syntax_rules <> <> <> <> -contains - -<> + interface +<> + end interface end module parser @ %def parser @ +<<[[parser_sub.f90]]>>= +<> + +submodule (parser) parser_s + + use io_units + use diagnostics + use md5 + use system_defs, only: DIGITS + use format_defs, only: FMT_19 + + implicit none + +contains + +<> + +end submodule parser_s + +@ %def parser_s \subsection{The token type} Tokens are elements of the parsed input that carry a value: logical, integer, real, quoted string, (unquoted) identifier, or known keyword. Note that non-keyword tokens also have an abstract key attached to them. This is an obvious candidate for polymorphism. <>= type :: token_t private integer :: type = S_UNKNOWN logical, pointer :: lval => null () integer, pointer :: ival => null () real(default), pointer :: rval => null () complex(default), pointer :: cval => null () type(string_t), pointer :: sval => null () type(string_t), pointer :: kval => null () type(string_t), dimension(:), pointer :: quote => null () end type token_t @ %def token_t @ Create a token from the lexeme returned by the lexer: Allocate storage and try to interpret the lexeme according to the type that is requested by the parser. For a keyword token, match the lexeme against the requested key. If successful, set the token type, value, and key. Otherwise, set the type to [[S_UNKNOWN]]. <>= subroutine token_init (token, lexeme, requested_type, key) type(token_t), intent(out) :: token type(lexeme_t), intent(in) :: lexeme integer, intent(in) :: requested_type type(string_t), intent(in) :: key integer :: type type = lexeme_get_type (lexeme) token%type = S_UNKNOWN select case (requested_type) case (S_LOGICAL) if (type == T_IDENTIFIER) call read_logical & (char (lexeme_get_string (lexeme))) case (S_INTEGER) if (type == T_NUMERIC) call read_integer & (char (lexeme_get_string (lexeme))) case (S_REAL) if (type == T_NUMERIC) call read_real & (char (lexeme_get_string (lexeme))) case (S_COMPLEX) if (type == T_NUMERIC) call read_complex & (char (lexeme_get_string (lexeme))) case (S_IDENTIFIER) if (type == T_IDENTIFIER) call read_identifier & (lexeme_get_string (lexeme)) case (S_KEYWORD) if (type == T_KEYWORD) call check_keyword & (lexeme_get_string (lexeme), key) case (S_QUOTED) if (type == T_QUOTED) call read_quoted & (lexeme_get_contents (lexeme), lexeme_get_delimiters (lexeme)) case default print *, requested_type call msg_bug (" Invalid token type code requested by the parser") end select if (token%type /= S_UNKNOWN) then allocate (token%kval) token%kval = key end if contains subroutine read_logical (s) character(*), intent(in) :: s select case (s) case ("t", "T", "true", "TRUE", "y", "Y", "yes", "YES") allocate (token%lval) token%lval = .true. token%type = S_LOGICAL case ("f", "F", "false", "FALSE", "n", "N", "no", "NO") allocate (token%lval) token%lval = .false. token%type = S_LOGICAL end select end subroutine read_logical subroutine read_integer (s) character(*), intent(in) :: s integer :: tmp, iostat if (verify (s, DIGITS) == 0) then read (s, *, iostat=iostat) tmp if (iostat == 0) then allocate (token%ival) token%ival = tmp token%type = S_INTEGER end if end if end subroutine read_integer subroutine read_real (s) character(*), intent(in) :: s real(default) :: tmp integer :: iostat read (s, *, iostat=iostat) tmp if (iostat == 0) then allocate (token%rval) token%rval = tmp token%type = S_REAL end if end subroutine read_real subroutine read_complex (s) character(*), intent(in) :: s complex(default) :: tmp integer :: iostat read (s, *, iostat=iostat) tmp if (iostat == 0) then allocate (token%cval) token%cval = tmp token%type = S_COMPLEX end if end subroutine read_complex subroutine read_identifier (s) type(string_t), intent(in) :: s allocate (token%sval) token%sval = s token%type = S_IDENTIFIER end subroutine read_identifier subroutine check_keyword (s, key) type(string_t), intent(in) :: s type(string_t), intent(in) :: key if (key == s) token%type = S_KEYWORD end subroutine check_keyword subroutine read_quoted (s, del) type(string_t), intent(in) :: s type(string_t), dimension(2), intent(in) :: del allocate (token%sval, token%quote(2)) token%sval = s token%quote(1) = del(1) token%quote(2) = del(2) token%type = S_QUOTED end subroutine read_quoted end subroutine token_init @ %def token_init @ Manually set a token to a keyword. <>= subroutine token_init_key (token, key) type(token_t), intent(out) :: token type(string_t), intent(in) :: key token%type = S_KEYWORD allocate (token%kval) token%kval = key end subroutine token_init_key @ %def token_init_key @ Reset a token to an empty state, freeing allocated memory, and deallocate the token itself. <>= subroutine token_final (token) type(token_t), intent(inout) :: token token%type = S_UNKNOWN if (associated (token%lval)) deallocate (token%lval) if (associated (token%ival)) deallocate (token%ival) if (associated (token%rval)) deallocate (token%rval) if (associated (token%sval)) deallocate (token%sval) if (associated (token%kval)) deallocate (token%kval) if (associated (token%quote)) deallocate (token%quote) end subroutine token_final @ %def token_final @ Check for empty=valid token: <>= function token_is_valid (token) result (valid) logical :: valid type(token_t), intent(in) :: token valid = token%type /= S_UNKNOWN end function token_is_valid @ %def token_is_valid @ Write the contents of a token. <>= subroutine token_write (token, unit) type(token_t), intent(in) :: token integer, intent(in), optional :: unit integer :: u u = given_output_unit (unit); if (u < 0) return select case (token%type) case (S_LOGICAL) write (u, "(L1)") token%lval case (S_INTEGER) write (u, "(I0)") token%ival case (S_REAL) write (u, "(" // FMT_19 // ")") token%rval case (S_COMPLEX) write (u, "('('," // FMT_19 // ",','," // FMT_19 // ",')')") token%cval case (S_IDENTIFIER) write (u, "(A)") char (token%sval) case (S_KEYWORD) write (u, "(A,A)") '[keyword] ' // char (token%kval) case (S_QUOTED) write (u, "(A)") & char (token%quote(1)) // char (token%sval) // char (token%quote(2)) case default write (u, "(A)") '[empty]' end select end subroutine token_write @ %def token_write @ Token assignment via deep copy. This is useful to avoid confusion when the token is transferred to some parse-tree node. <>= interface assignment(=) module procedure token_assign module procedure token_assign_integer module procedure token_assign_real module procedure token_assign_complex module procedure token_assign_logical module procedure token_assign_string end interface @ %def = @ We need to copy only the contents that are actually assigned, the other pointers remain disassociated. +<>= + module subroutine token_assign (token, token_in) + type(token_t), intent(out) :: token + type(token_t), intent(in) :: token_in + end subroutine token_assign <>= - subroutine token_assign (token, token_in) + module subroutine token_assign (token, token_in) type(token_t), intent(out) :: token type(token_t), intent(in) :: token_in token%type = token_in%type select case (token%type) case (S_LOGICAL); allocate (token%lval); token%lval = token_in%lval case (S_INTEGER); allocate (token%ival); token%ival = token_in%ival case (S_REAL); allocate (token%rval); token%rval = token_in%rval case (S_COMPLEX); allocate (token%cval); token%cval = token_in%cval case (S_IDENTIFIER); allocate (token%sval); token%sval = token_in%sval case (S_QUOTED); allocate (token%sval); token%sval = token_in%sval allocate (token%quote(2)); token%quote = token_in%quote end select if (token%type /= S_UNKNOWN) then allocate (token%kval); token%kval = token_in%kval end if end subroutine token_assign @ %def token_assign @ We need to copy only the contents that are actually assigned, the other pointers remain disassociated. +<>= + module subroutine token_assign_integer (token, ival) + type(token_t), intent(out) :: token + integer, intent(in) :: ival + end subroutine token_assign_integer + module subroutine token_assign_real (token, rval) + type(token_t), intent(out) :: token + real(default), intent(in) :: rval + end subroutine token_assign_real + module subroutine token_assign_complex (token, cval) + type(token_t), intent(out) :: token + complex(default), intent(in) :: cval + end subroutine token_assign_complex + module subroutine token_assign_logical (token, lval) + type(token_t), intent(out) :: token + logical, intent(in) :: lval + end subroutine token_assign_logical + module subroutine token_assign_string (token, sval) + type(token_t), intent(out) :: token + type(string_t), intent(in) :: sval + end subroutine token_assign_string <>= - subroutine token_assign_integer (token, ival) + module subroutine token_assign_integer (token, ival) type(token_t), intent(out) :: token integer, intent(in) :: ival token%type = S_INTEGER allocate (token%ival) token%ival = ival end subroutine token_assign_integer - subroutine token_assign_real (token, rval) + module subroutine token_assign_real (token, rval) type(token_t), intent(out) :: token real(default), intent(in) :: rval token%type = S_REAL allocate (token%rval) token%rval = rval end subroutine token_assign_real - subroutine token_assign_complex (token, cval) + module subroutine token_assign_complex (token, cval) type(token_t), intent(out) :: token complex(default), intent(in) :: cval token%type = S_COMPLEX allocate (token%cval) token%cval = cval end subroutine token_assign_complex - subroutine token_assign_logical (token, lval) + module subroutine token_assign_logical (token, lval) type(token_t), intent(out) :: token logical, intent(in) :: lval token%type = S_LOGICAL allocate (token%lval) token%lval = lval end subroutine token_assign_logical - subroutine token_assign_string (token, sval) + module subroutine token_assign_string (token, sval) type(token_t), intent(out) :: token type(string_t), intent(in) :: sval token%type = S_QUOTED allocate (token%sval) token%sval = sval allocate (token%quote(2)); token%quote = '"' end subroutine token_assign_string @ %def token_assign @ \subsection{Retrieve token contents} These functions all do a trivial sanity check that should avoid crashes. <>= function token_get_logical (token) result (lval) logical :: lval type(token_t), intent(in) :: token if (associated (token%lval)) then lval = token%lval else call token_mismatch (token, "logical") end if end function token_get_logical function token_get_integer (token) result (ival) integer :: ival type(token_t), intent(in) :: token if (associated (token%ival)) then ival = token%ival else call token_mismatch (token, "integer") end if end function token_get_integer function token_get_real (token) result (rval) real(default) :: rval type(token_t), intent(in) :: token if (associated (token%rval)) then rval = token%rval else call token_mismatch (token, "real") end if end function token_get_real function token_get_cmplx (token) result (cval) complex(default) :: cval type(token_t), intent(in) :: token if (associated (token%cval)) then cval = token%cval else call token_mismatch (token, "complex") end if end function token_get_cmplx function token_get_string (token) result (sval) type(string_t) :: sval type(token_t), intent(in) :: token if (associated (token%sval)) then sval = token%sval else call token_mismatch (token, "string") end if end function token_get_string function token_get_key (token) result (kval) type(string_t) :: kval type(token_t), intent(in) :: token if (associated (token%kval)) then kval = token%kval else call token_mismatch (token, "keyword") end if end function token_get_key function token_get_quote (token) result (quote) type(string_t), dimension(2) :: quote type(token_t), intent(in) :: token if (associated (token%quote)) then quote = token%quote else call token_mismatch (token, "quote") end if end function token_get_quote @ %def token_get_logical token_get_integer token_get_real @ %def token_get_string token_get_key token_get_quote <>= subroutine token_mismatch (token, type) type(token_t), intent(in) :: token character(*), intent(in) :: type write (6, "(A)", advance="no") "Token: " call token_write (token) call msg_bug (" Token type mismatch; value required as " // type) end subroutine token_mismatch @ %def token_mismatch @ \subsection{The parse tree: nodes} The parser will generate a parse tree from the input stream. Each node in this parse tree points to the syntax rule that was applied. (Since syntax rules are stored in a pointer-type array within the syntax table, they qualify as targets.) A leaf node contains a token. A branch node has subnodes. The subnodes are stored as a list, so each node also has a [[next]] pointer. <>= public :: parse_node_t <>= type :: parse_node_t private type(syntax_rule_t), pointer :: rule => null () type(token_t) :: token integer :: n_sub = 0 type(parse_node_t), pointer :: sub_first => null () type(parse_node_t), pointer :: sub_last => null () type(parse_node_t), pointer :: next => null () contains <> end type parse_node_t @ %def parse_node_t @ Container for parse node pointers, useful for creating pointer arrays: <>= public :: parse_node_p <>= type :: parse_node_p type(parse_node_t), pointer :: ptr => null () end type parse_node_p @ %def parse_node_p @ Output. The first version writes a node together with its sub-node tree, organized by indentation. <>= procedure :: write => parse_node_write_rec <>= public :: parse_node_write_rec +<>= + recursive module subroutine parse_node_write_rec (node, unit, short, depth) + class(parse_node_t), intent(in), target :: node + integer, intent(in), optional :: unit + logical, intent(in), optional :: short + integer, intent(in), optional :: depth + end subroutine parse_node_write_rec <>= - recursive subroutine parse_node_write_rec (node, unit, short, depth) + recursive module subroutine parse_node_write_rec (node, unit, short, depth) class(parse_node_t), intent(in), target :: node integer, intent(in), optional :: unit logical, intent(in), optional :: short integer, intent(in), optional :: depth integer :: u, d type(parse_node_t), pointer :: current u = given_output_unit (unit); if (u < 0) return d = 0; if (present (depth)) d = depth call parse_node_write (node, u, short=short) current => node%sub_first do while (associated (current)) write (u, "(A)", advance = "no") repeat ("| ", d) call parse_node_write_rec (current, unit, short, d+1) current => current%next end do end subroutine parse_node_write_rec @ %def parse_node_write_rec @ This does the actual output for a single node, without recursion. <>= public :: parse_node_write +<>= + module subroutine parse_node_write (node, unit, short) + class(parse_node_t), intent(in) :: node + integer, intent(in), optional :: unit + logical, intent(in), optional :: short + end subroutine parse_node_write <>= - subroutine parse_node_write (node, unit, short) + module subroutine parse_node_write (node, unit, short) class(parse_node_t), intent(in) :: node integer, intent(in), optional :: unit logical, intent(in), optional :: short integer :: u type(parse_node_t), pointer :: current u = given_output_unit (unit); if (u < 0) return write (u, "('+ ')", advance = "no") if (associated (node%rule)) then call syntax_rule_write (node%rule, u, & short=short, key_only=.true., advance=.false.) if (token_is_valid (node%token)) then write (u, "(' = ')", advance="no") call token_write (node%token, u) else if (associated (node%sub_first)) then write (u, "(' = ')", advance="no") current => node%sub_first do while (associated (current)) call syntax_rule_write (current%rule, u, & short=.true., key_only=.true., advance=.false.) current => current%next end do write (u, *) else write (u, *) end if else write (u, *) "[empty]" end if end subroutine parse_node_write @ %def parse_node_write @ Finalize the token and recursively finalize and deallocate all sub-nodes. <>= public :: parse_node_final +<>= + recursive module subroutine parse_node_final (node, recursive) + type(parse_node_t), intent(inout) :: node + logical, intent(in), optional :: recursive + end subroutine parse_node_final <>= - recursive subroutine parse_node_final (node, recursive) + recursive module subroutine parse_node_final (node, recursive) type(parse_node_t), intent(inout) :: node type(parse_node_t), pointer :: current logical, intent(in), optional :: recursive logical :: rec rec = .true.; if (present (recursive)) rec = recursive call token_final (node%token) if (rec) then do while (associated (node%sub_first)) current => node%sub_first node%sub_first => node%sub_first%next call parse_node_final (current) deallocate (current) end do end if end subroutine parse_node_final @ %def parse_node_final @ \subsection{Filling nodes} The constructors allocate and initialize the node. There are two possible initializers (in a later version, should correspond to different type extensions). First, leaf (terminal) nodes. The token constructor does the actual work, looking at the requested type and key for the given rule and matching against the lexeme contents. If it fails, the token will keep the type [[S_UNKNOWN]] and remain empty. Otherwise, we have a valid node which contains the new token. If the lexeme argument is absent, the token is left empty. <>= subroutine parse_node_create_leaf (node, rule, lexeme) type(parse_node_t), pointer :: node type(syntax_rule_t), intent(in), target :: rule type(lexeme_t), intent(in) :: lexeme allocate (node) node%rule => rule call token_init (node%token, lexeme, & syntax_rule_get_type (rule), syntax_rule_get_key (rule)) if (.not. token_is_valid (node%token)) deallocate (node) end subroutine parse_node_create_leaf @ %def parse_node_create_leaf @ This version allows us to manually create a leaf node that holds a keyword. <>= public :: parse_node_create_key +<>= + module subroutine parse_node_create_key (node, rule) + type(parse_node_t), intent(out) :: node + type(syntax_rule_t), intent(in), target :: rule + end subroutine parse_node_create_key <>= - subroutine parse_node_create_key (node, rule) + module subroutine parse_node_create_key (node, rule) type(parse_node_t), intent(out) :: node type(syntax_rule_t), intent(in), target :: rule node%rule => rule call token_init_key (node%token, syntax_rule_get_key (rule)) end subroutine parse_node_create_key @ %def parse_node_create_key @ This version allows us to manually create a leaf node that holds a fixed value. Only one of the optional values should be provided. <>= public :: parse_node_create_value +<>= + module subroutine parse_node_create_value (node, rule, ival, rval, cval, sval, lval) + type(parse_node_t), intent(out) :: node + type(syntax_rule_t), intent(in), target :: rule + integer, intent(in), optional :: ival + real(default), intent(in), optional :: rval + complex(default), intent(in), optional :: cval + type(string_t), intent(in), optional :: sval + logical, intent(in), optional :: lval + end subroutine parse_node_create_value <>= - subroutine parse_node_create_value (node, rule, ival, rval, cval, sval, lval) + module subroutine parse_node_create_value (node, rule, ival, rval, cval, sval, lval) type(parse_node_t), intent(out) :: node type(syntax_rule_t), intent(in), target :: rule integer, intent(in), optional :: ival real(default), intent(in), optional :: rval complex(default), intent(in), optional :: cval type(string_t), intent(in), optional :: sval logical, intent(in), optional :: lval node%rule => rule call parse_node_set_value (node, ival, rval, cval, sval, lval) end subroutine parse_node_create_value @ %def parse_node_create_value @ Directly set the value without changing anything else. <>= public :: parse_node_set_value +<>= + module subroutine parse_node_set_value (node, ival, rval, cval, sval, lval) + type(parse_node_t), intent(inout) :: node + integer, intent(in), optional :: ival + real(default), intent(in), optional :: rval + complex(default), intent(in), optional :: cval + type(string_t), intent(in), optional :: sval + logical, intent(in), optional :: lval + end subroutine parse_node_set_value <>= - subroutine parse_node_set_value (node, ival, rval, cval, sval, lval) + module subroutine parse_node_set_value (node, ival, rval, cval, sval, lval) type(parse_node_t), intent(inout) :: node integer, intent(in), optional :: ival real(default), intent(in), optional :: rval complex(default), intent(in), optional :: cval type(string_t), intent(in), optional :: sval logical, intent(in), optional :: lval if (present (ival)) then node%token = ival else if (present (rval)) then node%token = rval else if (present (cval)) then node%token = cval else if (present (lval)) then node%token = lval else if (present (sval)) then node%token = sval end if end subroutine parse_node_set_value @ %def parse_node_set_value @ Second, branch nodes. We first assign the rule: <>= public :: parse_node_create_branch +<>= + module subroutine parse_node_create_branch (node, rule) + type(parse_node_t), pointer :: node + type(syntax_rule_t), intent(in), target :: rule + end subroutine parse_node_create_branch <>= - subroutine parse_node_create_branch (node, rule) + module subroutine parse_node_create_branch (node, rule) type(parse_node_t), pointer :: node type(syntax_rule_t), intent(in), target :: rule allocate (node) node%rule => rule end subroutine parse_node_create_branch @ %def parse_node_create_branch @ Copy a node. This is a shallow copy. Note that we have to nullify the [[next]] pointer if we don't want to inherit the context of the original node. <>= procedure :: copy => parse_node_copy +<>= + module subroutine parse_node_copy (node, copy) + class(parse_node_t), intent(in) :: node + type(parse_node_t), pointer, intent(out) :: copy + end subroutine parse_node_copy <>= - subroutine parse_node_copy (node, copy) + module subroutine parse_node_copy (node, copy) class(parse_node_t), intent(in) :: node type(parse_node_t), pointer, intent(out) :: copy allocate (copy) select type (node) type is (parse_node_t) copy = node end select copy%next => null () end subroutine parse_node_copy @ %def parse_node_copy @ Append a sub-node. The sub-node must exist and be a valid target, otherwise nothing is done. <>= public :: parse_node_append_sub <>= procedure :: append_sub => parse_node_append_sub +<>= + module subroutine parse_node_append_sub (node, sub) + class(parse_node_t), intent(inout) :: node + type(parse_node_t), pointer :: sub + end subroutine parse_node_append_sub <>= - subroutine parse_node_append_sub (node, sub) + module subroutine parse_node_append_sub (node, sub) class(parse_node_t), intent(inout) :: node type(parse_node_t), pointer :: sub if (associated (sub)) then if (associated (node%sub_last)) then node%sub_last%next => sub else node%sub_first => sub end if node%sub_last => sub end if end subroutine parse_node_append_sub @ %def parse_node_append_sub @ For easy access, once the list is complete we count the number of sub-nodes. If there are no subnodes, the whole node is deleted. <>= public :: parse_node_freeze_branch +<>= + module subroutine parse_node_freeze_branch (node) + type(parse_node_t), pointer :: node + end subroutine parse_node_freeze_branch <>= - subroutine parse_node_freeze_branch (node) + module subroutine parse_node_freeze_branch (node) type(parse_node_t), pointer :: node type(parse_node_t), pointer :: current node%n_sub = 0 current => node%sub_first do while (associated (current)) node%n_sub = node%n_sub + 1 current => current%next end do if (node%n_sub == 0) deallocate (node) end subroutine parse_node_freeze_branch @ %def parse_node_freeze_branch @ Replace the syntax rule. This makes sense only if the parse node adheres to the syntax of the new rule. <>= public :: parse_node_replace_rule +<>= + module subroutine parse_node_replace_rule (node, rule) + type(parse_node_t), pointer :: node + type(syntax_rule_t), intent(in), target :: rule + end subroutine parse_node_replace_rule <>= - subroutine parse_node_replace_rule (node, rule) + module subroutine parse_node_replace_rule (node, rule) type(parse_node_t), pointer :: node type(syntax_rule_t), intent(in), target :: rule node%rule => rule end subroutine parse_node_replace_rule @ %def parse_node_replace_rule @ Replace the last subnode by the target node. Since the subnodes are stored as a linked list, we can do this only if we copy the subnodes. Furthermore, the target node must also be copied, and the [[next]] pointer of the copy is nullified. This ensures that we cannot modify the originals at the subnode level. All copies are shallow copies. This implies that further modifications at the sub-subnode level will affect the original nodes and must therefore be forbidden. Use with care, this invites to memory mismanagement. The copy nodes can be deallocated, but not finalized, since its subnodes are the same objects as the subnodes of the target node. <>= public :: parse_node_replace_last_sub +<>= + module subroutine parse_node_replace_last_sub (node, pn_target) + type(parse_node_t), intent(inout), target :: node + type(parse_node_t), intent(in), target :: pn_target + end subroutine parse_node_replace_last_sub <>= - subroutine parse_node_replace_last_sub (node, pn_target) + module subroutine parse_node_replace_last_sub (node, pn_target) type(parse_node_t), intent(inout), target :: node type(parse_node_t), intent(in), target :: pn_target type(parse_node_t), pointer :: current, current_copy, previous integer :: i select case (node%n_sub) case (1) allocate (current_copy) current_copy = pn_target node%sub_first => current_copy case (2:) current => node%sub_first allocate (current_copy) current_copy = current node%sub_first => current_copy previous => current_copy do i = 1, node%n_sub - 2 current => current%next allocate (current_copy) current_copy = current previous%next => current_copy previous => current_copy end do allocate (current_copy) current_copy = pn_target previous%next => current_copy case default call parse_node_write (node) call msg_bug ("'replace_last_sub' called for non-branch parse node") end select current_copy%next => null () node%sub_last => current_copy end subroutine parse_node_replace_last_sub @ %def parse_node_replace_last_sub @ \subsection{Accessing nodes} Return the node contents. Check if pointers are associated. No check when accessing a sub-node; assume that [[parse_node_n_sub]] is always used for the upper bound. The token extractor returns a pointer. <>= public :: parse_node_get_rule_ptr public :: parse_node_get_n_sub public :: parse_node_get_sub_ptr public :: parse_node_get_next_ptr public :: parse_node_get_last_sub_ptr <>= procedure :: get_rule_ptr => parse_node_get_rule_ptr procedure :: get_n_sub => parse_node_get_n_sub procedure :: get_sub_ptr => parse_node_get_sub_ptr procedure :: get_next_ptr => parse_node_get_next_ptr +<>= + module function parse_node_get_rule_ptr (node) result (rule) + class(parse_node_t), intent(in) :: node + type(syntax_rule_t), pointer :: rule + end function parse_node_get_rule_ptr + module function parse_node_get_last_sub_ptr (node, tag, required) result (sub) + type(parse_node_t), pointer :: sub + type(parse_node_t), intent(in), target :: node + character(*), intent(in), optional :: tag + logical, intent(in), optional :: required + end function parse_node_get_last_sub_ptr + module function parse_node_get_n_sub (node) result (n) + class(parse_node_t), intent(in) :: node + integer :: n + end function parse_node_get_n_sub + module function parse_node_get_sub_ptr (node, n, tag, required) result (sub) + class(parse_node_t), intent(in), target :: node + type(parse_node_t), pointer :: sub + integer, intent(in), optional :: n + character(*), intent(in), optional :: tag + logical, intent(in), optional :: required + end function parse_node_get_sub_ptr + module function parse_node_get_next_ptr (sub, n, tag, required) result (next) + class(parse_node_t), intent(in), target :: sub + type(parse_node_t), pointer :: next + integer, intent(in), optional :: n + character(*), intent(in), optional :: tag + logical, intent(in), optional :: required + end function parse_node_get_next_ptr <>= - function parse_node_get_rule_ptr (node) result (rule) + module function parse_node_get_rule_ptr (node) result (rule) class(parse_node_t), intent(in) :: node type(syntax_rule_t), pointer :: rule if (associated (node%rule)) then rule => node%rule else rule => null () call parse_node_undefined (node, "rule") end if end function parse_node_get_rule_ptr - function parse_node_get_n_sub (node) result (n) + module function parse_node_get_n_sub (node) result (n) class(parse_node_t), intent(in) :: node integer :: n n = node%n_sub end function parse_node_get_n_sub - function parse_node_get_sub_ptr (node, n, tag, required) result (sub) + module function parse_node_get_sub_ptr (node, n, tag, required) result (sub) class(parse_node_t), intent(in), target :: node type(parse_node_t), pointer :: sub integer, intent(in), optional :: n character(*), intent(in), optional :: tag logical, intent(in), optional :: required integer :: i sub => node%sub_first if (present (n)) then do i = 2, n if (associated (sub)) then sub => sub%next else return end if end do end if call parse_node_check (sub, tag, required) end function parse_node_get_sub_ptr - function parse_node_get_next_ptr (sub, n, tag, required) result (next) + module function parse_node_get_next_ptr (sub, n, tag, required) result (next) class(parse_node_t), intent(in), target :: sub type(parse_node_t), pointer :: next integer, intent(in), optional :: n character(*), intent(in), optional :: tag logical, intent(in), optional :: required integer :: i next => sub%next if (present (n)) then do i = 2, n if (associated (next)) then next => next%next else exit end if end do end if call parse_node_check (next, tag, required) end function parse_node_get_next_ptr - function parse_node_get_last_sub_ptr (node, tag, required) result (sub) + module function parse_node_get_last_sub_ptr (node, tag, required) result (sub) type(parse_node_t), pointer :: sub type(parse_node_t), intent(in), target :: node character(*), intent(in), optional :: tag logical, intent(in), optional :: required sub => node%sub_last call parse_node_check (sub, tag, required) end function parse_node_get_last_sub_ptr @ %def parse_node_get_rule_ptr @ %def parse_node_get_n_sub parse_node_get_sub_ptr @ %def parse_node_get_next_ptr @ %def parse_node_get_last_sub_ptr <>= subroutine parse_node_undefined (node, obj) type(parse_node_t), intent(in) :: node character(*), intent(in) :: obj call parse_node_write (node, 6) call msg_bug (" Parse-tree node: " // obj // " requested, but undefined") end subroutine parse_node_undefined @ %def parse_node_undefined @ Check if a parse node has a particular tag, and if it is associated: <>= public :: parse_node_check +<>= + module subroutine parse_node_check (node, tag, required) + type(parse_node_t), pointer :: node + character(*), intent(in), optional :: tag + logical, intent(in), optional :: required + end subroutine parse_node_check <>= - subroutine parse_node_check (node, tag, required) + module subroutine parse_node_check (node, tag, required) type(parse_node_t), pointer :: node character(*), intent(in), optional :: tag logical, intent(in), optional :: required if (associated (node)) then if (present (tag)) then if (parse_node_get_rule_key (node) /= tag) & call parse_node_mismatch (tag, node) end if else if (present (required)) then if (required) & call msg_bug (" Missing node, expected <" // tag // ">") end if end if end subroutine parse_node_check @ %def parse_node_check @ This is called by a parse-tree scanner if the expected and the actual nodes do not match <>= public :: parse_node_mismatch +<>= + module subroutine parse_node_mismatch (string, parse_node) + character(*), intent(in) :: string + type(parse_node_t), intent(in) :: parse_node + end subroutine parse_node_mismatch <>= - subroutine parse_node_mismatch (string, parse_node) + module subroutine parse_node_mismatch (string, parse_node) character(*), intent(in) :: string type(parse_node_t), intent(in) :: parse_node call parse_node_write (parse_node) call msg_bug (" Syntax mismatch, expected <" // string // ">.") end subroutine parse_node_mismatch @ %def parse_node_mismatch @ The following functions are wrappers for extracting the token contents. <>= public :: parse_node_get_logical public :: parse_node_get_integer public :: parse_node_get_real public :: parse_node_get_cmplx public :: parse_node_get_string public :: parse_node_get_key public :: parse_node_get_rule_key <>= procedure :: get_logical => parse_node_get_logical procedure :: get_integer => parse_node_get_integer procedure :: get_real => parse_node_get_real procedure :: get_cmplx => parse_node_get_cmplx procedure :: get_string => parse_node_get_string procedure :: get_key => parse_node_get_key procedure :: get_rule_key => parse_node_get_rule_key +<>= + module function parse_node_get_logical (node) result (lval) + class(parse_node_t), intent(in), target :: node + logical :: lval + end function parse_node_get_logical + module function parse_node_get_integer (node) result (ival) + class(parse_node_t), intent(in), target :: node + integer :: ival + end function parse_node_get_integer + module function parse_node_get_real (node) result (rval) + class(parse_node_t), intent(in), target :: node + real(default) :: rval + end function parse_node_get_real + module function parse_node_get_cmplx (node) result (cval) + class(parse_node_t), intent(in), target :: node + complex(default) :: cval + end function parse_node_get_cmplx + module function parse_node_get_string (node) result (sval) + class(parse_node_t), intent(in), target :: node + type(string_t) :: sval + end function parse_node_get_string + module function parse_node_get_key (node) result (kval) + class(parse_node_t), intent(in), target :: node + type(string_t) :: kval + end function parse_node_get_key + module function parse_node_get_rule_key (node) result (kval) + class(parse_node_t), intent(in), target :: node + type(string_t) :: kval + end function parse_node_get_rule_key + module function parse_node_get_token_ptr (node) result (token) + type(token_t), pointer :: token + type(parse_node_t), intent(in), target :: node + end function parse_node_get_token_ptr <>= - function parse_node_get_logical (node) result (lval) + module function parse_node_get_logical (node) result (lval) class(parse_node_t), intent(in), target :: node logical :: lval lval = token_get_logical (parse_node_get_token_ptr (node)) end function parse_node_get_logical - function parse_node_get_integer (node) result (ival) + module function parse_node_get_integer (node) result (ival) class(parse_node_t), intent(in), target :: node integer :: ival ival = token_get_integer (parse_node_get_token_ptr (node)) end function parse_node_get_integer - function parse_node_get_real (node) result (rval) + module function parse_node_get_real (node) result (rval) class(parse_node_t), intent(in), target :: node real(default) :: rval rval = token_get_real (parse_node_get_token_ptr (node)) end function parse_node_get_real - function parse_node_get_cmplx (node) result (cval) + module function parse_node_get_cmplx (node) result (cval) class(parse_node_t), intent(in), target :: node complex(default) :: cval cval = token_get_cmplx (parse_node_get_token_ptr (node)) end function parse_node_get_cmplx - function parse_node_get_string (node) result (sval) + module function parse_node_get_string (node) result (sval) class(parse_node_t), intent(in), target :: node type(string_t) :: sval sval = token_get_string (parse_node_get_token_ptr (node)) end function parse_node_get_string - function parse_node_get_key (node) result (kval) + module function parse_node_get_key (node) result (kval) class(parse_node_t), intent(in), target :: node type(string_t) :: kval kval = token_get_key (parse_node_get_token_ptr (node)) end function parse_node_get_key - function parse_node_get_rule_key (node) result (kval) + module function parse_node_get_rule_key (node) result (kval) class(parse_node_t), intent(in), target :: node type(string_t) :: kval kval = syntax_rule_get_key (parse_node_get_rule_ptr (node)) end function parse_node_get_rule_key - function parse_node_get_token_ptr (node) result (token) + module function parse_node_get_token_ptr (node) result (token) type(token_t), pointer :: token type(parse_node_t), intent(in), target :: node if (token_is_valid (node%token)) then token => node%token else call parse_node_undefined (node, "token") end if end function parse_node_get_token_ptr @ %def parse_node_get_logical parse_node_get_integer parse_node_get_real @ %def parse_node_get_string parse_node_get_key parse_node_get_rule_key @ %def parse_node_get_token_ptr @ Return a MD5 sum for a parse node. The method is to write the node to a scratch file and to evaluate the MD5 sum of that file. <>= public :: parse_node_get_md5sum +<>= + module function parse_node_get_md5sum (pn) result (md5sum_pn) + character(32) :: md5sum_pn + type(parse_node_t), intent(in) :: pn + end function parse_node_get_md5sum <>= - function parse_node_get_md5sum (pn) result (md5sum_pn) + module function parse_node_get_md5sum (pn) result (md5sum_pn) character(32) :: md5sum_pn type(parse_node_t), intent(in) :: pn integer :: u u = free_unit () open (unit = u, status = "scratch", action = "readwrite") call parse_node_write_rec (pn, unit=u) rewind (u) md5sum_pn = md5sum (u) close (u) end function parse_node_get_md5sum @ %def parse_node_get_md5sum @ \subsection{The parse tree} The parse tree is a tree of nodes, where leaf nodes hold a valid token, while branch nodes point to a list of sub-nodes. <>= public :: parse_tree_t <>= type :: parse_tree_t private type(parse_node_t), pointer :: root_node => null () contains <> end type parse_tree_t @ %def parse_tree_t @ The parser. Its arguments are the parse tree (which should be empty initially), the lexer (which should be already set up), the syntax table (which should be valid), and the input stream. The input stream is completely parsed, using the lexer setup and the syntax rules as given, and the parse tree is built accordingly. If [[check_eof]] is absent or true, the parser will complain about trailing garbage. Otherwise, it will ignore it. By default, the input stream is matched against the top rule in the specified syntax. If [[key]] is given, it is matched against the rule with the specified key instead. Failure at the top level means that no rule could match at all; in this case the error message will show the top rule. <>= public :: parse_tree_init <>= procedure :: parse => parse_tree_init +<>= + module subroutine parse_tree_init & + (parse_tree, syntax, lexer, key, check_eof) + class(parse_tree_t), intent(inout) :: parse_tree + type(lexer_t), intent(inout) :: lexer + type(syntax_t), intent(in), target :: syntax + type(string_t), intent(in), optional :: key + logical, intent(in), optional :: check_eof + end subroutine parse_tree_init <>= - subroutine parse_tree_init & + module subroutine parse_tree_init & (parse_tree, syntax, lexer, key, check_eof) class(parse_tree_t), intent(inout) :: parse_tree type(lexer_t), intent(inout) :: lexer type(syntax_t), intent(in), target :: syntax type(string_t), intent(in), optional :: key logical, intent(in), optional :: check_eof type(syntax_rule_t), pointer :: rule type(lexeme_t) :: lexeme type(parse_node_t), pointer :: node logical :: ok, check check = .true.; if (present (check_eof)) check = check_eof call lexer_clear (lexer) if (present (key)) then rule => syntax_get_rule_ptr (syntax, key) else rule => syntax_get_top_rule_ptr (syntax) end if if (associated (rule)) then call parse_node_match_rule (node, rule, ok) if (ok) then parse_tree%root_node => node else call parse_error (rule, lexeme) end if if (check) then call lex (lexeme, lexer) if (.not. lexeme_is_eof (lexeme)) then call lexer_show_location (lexer) call msg_fatal (" Syntax error " & // "(at or before the location indicated above)") end if end if else call msg_bug (" Parser failed because syntax is empty") end if contains <> end subroutine parse_tree_init @ %def parse @ The parser works recursively, following the rule tree, building the tree of nodes on the fly. If the given rule matches, the node is filled on return. If not, the node remains empty. <>= recursive subroutine parse_node_match_rule (node, rule, ok) type(parse_node_t), pointer :: node type(syntax_rule_t), intent(in), target :: rule logical, intent(out) :: ok logical, parameter :: debug = .false. integer :: type if (debug) write (6, "(A)", advance="no") "Parsing rule: " if (debug) call syntax_rule_write (rule, 6) node => null () type = syntax_rule_get_type (rule) if (syntax_rule_is_atomic (rule)) then call lex (lexeme, lexer) if (debug) write (6, "(A)", advance="no") "Token: " if (debug) call lexeme_write (lexeme, 6) call parse_node_create_leaf (node, rule, lexeme) ok = associated (node) if (.not. ok) call lexer_put_back (lexer, lexeme) else select case (type) case (S_ALTERNATIVE); call parse_alternative (node, rule, ok) case (S_GROUP); call parse_group (node, rule, ok) case (S_SEQUENCE); call parse_sequence (node, rule, .false., ok) case (S_LIST); call parse_sequence (node, rule, .true., ok) case (S_ARGS); call parse_args (node, rule, ok) case (S_IGNORE); call parse_ignore (node, ok) end select end if if (debug) then if (ok) then write (6, "(A)", advance="no") "Matched rule: " else write (6, "(A)", advance="no") "Failed rule: " end if call syntax_rule_write (rule) if (associated (node)) call parse_node_write (node) end if end subroutine parse_node_match_rule @ %def parse_node_match_rule @ Parse an alternative: try each case. If the match succeeds, the node has been filled, so return. If nothing works, return failure. <>= recursive subroutine parse_alternative (node, rule, ok) type(parse_node_t), pointer :: node type(syntax_rule_t), intent(in), target :: rule logical, intent(out) :: ok integer :: i do i = 1, syntax_rule_get_n_sub (rule) call parse_node_match_rule (node, syntax_rule_get_sub_ptr (rule, i), ok) if (ok) return end do ok = .false. end subroutine parse_alternative @ %def parse_alternative @ Parse a group: the first and third lexemes have to be the delimiters, the second one is parsed as the actual node, using now the child rule. If the first match fails, return with failure. If the other matches fail, issue an error, since we cannot lex back more than one item. <>= recursive subroutine parse_group (node, rule, ok) type(parse_node_t), pointer :: node type(syntax_rule_t), intent(in), target :: rule logical, intent(out) :: ok type(string_t), dimension(2) :: delimiter delimiter = syntax_rule_get_delimiter (rule) call lex (lexeme, lexer) if (lexeme_get_string (lexeme) == delimiter(1)) then call parse_node_match_rule (node, syntax_rule_get_sub_ptr (rule, 1), ok) if (ok) then call lex (lexeme, lexer) if (lexeme_get_string (lexeme) == delimiter(2)) then ok = .true. else call parse_error (rule, lexeme) end if else call parse_error (rule, lexeme) end if else call lexer_put_back (lexer, lexeme) ok = .false. end if end subroutine parse_group @ %def parse_group @ Parsing a sequence. The last rule element may be special: optional and/or repetitive. Each sub-node that matches is appended to the sub-node list of the parent node. If [[sep]] is true, we look for a separator after each element. <>= recursive subroutine parse_sequence (node, rule, sep, ok) type(parse_node_t), pointer :: node type(syntax_rule_t), intent(in), target :: rule logical, intent(in) :: sep logical, intent(out) :: ok type(parse_node_t), pointer :: current integer :: i, n logical :: opt, rep, cont type(string_t) :: separator call parse_node_create_branch (node, rule) if (sep) separator = syntax_rule_get_separator (rule) n = syntax_rule_get_n_sub (rule) opt = syntax_rule_last_optional (rule) rep = syntax_rule_last_repetitive (rule) ok = .true. cont = .true. SCAN_RULE: do i = 1, n call parse_node_match_rule & (current, syntax_rule_get_sub_ptr (rule, i), cont) if (cont) then call parse_node_append_sub (node, current) if (sep .and. (i>= recursive subroutine parse_args (node, rule, ok) type(parse_node_t), pointer :: node type(syntax_rule_t), intent(in), target :: rule logical, intent(out) :: ok type(string_t), dimension(2) :: delimiter delimiter = syntax_rule_get_delimiter (rule) call lex (lexeme, lexer) if (lexeme_get_string (lexeme) == delimiter(1)) then call parse_sequence (node, rule, .true., ok) if (ok) then call lex (lexeme, lexer) if (lexeme_get_string (lexeme) == delimiter(2)) then ok = .true. else call parse_error (rule, lexeme) end if else call parse_error (rule, lexeme) end if else call lexer_put_back (lexer, lexeme) ok = .false. end if end subroutine parse_args @ %def parse_args @ The IGNORE syntax reads one lexeme and discards it if it is numeric, logical or string/identifier (but not a keyword). This is a successful match. Otherwise, the match fails. The node pointer is returned disassociated in any case. <>= subroutine parse_ignore (node, ok) type(parse_node_t), pointer :: node logical, intent(out) :: ok call lex (lexeme, lexer) select case (lexeme_get_type (lexeme)) case (T_NUMERIC, T_IDENTIFIER, T_QUOTED) ok = .true. case default ok = .false. end select node => null () end subroutine parse_ignore @ %def parse_ignore @ If the match fails and we cannot step back: <>= subroutine parse_error (rule, lexeme) type(syntax_rule_t), intent(in) :: rule type(lexeme_t), intent(in) :: lexeme character(80) :: buffer integer :: u, iostat call lexer_show_location (lexer) u = free_unit () open (u, status = "scratch") write (u, "(A)", advance="no") "Expected syntax:" call syntax_rule_write (rule, u) write (u, "(A)", advance="no") "Found token:" call lexeme_write (lexeme, u) rewind (u) do read (u, "(A)", iostat=iostat) buffer if (iostat /= 0) exit call msg_message (trim (buffer)) end do call msg_fatal (" Syntax error " & // "(at or before the location indicated above)") end subroutine parse_error @ %def parse_error @ The finalizer recursively deallocates all nodes and their contents. For each node, [[parse_node_final]] is called on the sub-nodes, which in turn deallocates the token or sub-node array contained within each of them. At the end, only the top node remains to be deallocated. <>= public :: parse_tree_final <>= procedure :: final => parse_tree_final +<>= + module subroutine parse_tree_final (parse_tree) + class(parse_tree_t), intent(inout) :: parse_tree + end subroutine parse_tree_final <>= - subroutine parse_tree_final (parse_tree) + module subroutine parse_tree_final (parse_tree) class(parse_tree_t), intent(inout) :: parse_tree if (associated (parse_tree%root_node)) then call parse_node_final (parse_tree%root_node) deallocate (parse_tree%root_node) end if end subroutine parse_tree_final @ %def parse_tree_final @ Print the parse tree. Print one token per line, indented according to the depth of the node. The [[verbose]] version includes type identifiers for the nodes. <>= public :: parse_tree_write <>= procedure :: write => parse_tree_write +<>= + module subroutine parse_tree_write (parse_tree, unit, verbose) + class(parse_tree_t), intent(in) :: parse_tree + integer, intent(in), optional :: unit + logical, intent(in), optional :: verbose + end subroutine parse_tree_write <>= - subroutine parse_tree_write (parse_tree, unit, verbose) + module subroutine parse_tree_write (parse_tree, unit, verbose) class(parse_tree_t), intent(in) :: parse_tree integer, intent(in), optional :: unit logical, intent(in), optional :: verbose integer :: u logical :: short u = given_output_unit (unit); if (u < 0) return short = .true.; if (present (verbose)) short = .not. verbose write (u, "(A)") "Parse tree:" if (associated (parse_tree%root_node)) then call parse_node_write_rec (parse_tree%root_node, unit, short, 1) else write (u, *) "[empty]" end if end subroutine parse_tree_write @ %def parse_tree_write @ This is a generic error that can be issued if the parse tree does not meet the expectaions of the parser. This most certainly indicates a bug. <>= public :: parse_tree_bug +<>= + module subroutine parse_tree_bug (node, keys) + type(parse_node_t), intent(in) :: node + character(*), intent(in) :: keys + end subroutine parse_tree_bug <>= - subroutine parse_tree_bug (node, keys) + module subroutine parse_tree_bug (node, keys) type(parse_node_t), intent(in) :: node character(*), intent(in) :: keys call parse_node_write (node) call msg_bug (" Inconsistency in parse tree: expected " // keys) end subroutine parse_tree_bug @ %def parse_tree_bug @ \subsection{Access the parser} For scanning the parse tree we give access to the top node, as a node pointer. Of course, there should be no write access. <>= procedure :: get_root_ptr => parse_tree_get_root_ptr +<>= + module function parse_tree_get_root_ptr (parse_tree) result (node) + class(parse_tree_t), intent(in) :: parse_tree + type(parse_node_t), pointer :: node + end function parse_tree_get_root_ptr <>= - function parse_tree_get_root_ptr (parse_tree) result (node) + module function parse_tree_get_root_ptr (parse_tree) result (node) class(parse_tree_t), intent(in) :: parse_tree type(parse_node_t), pointer :: node node => parse_tree%root_node end function parse_tree_get_root_ptr @ %def parse_tree_get_root_ptr @ \subsection{Tools} This operation traverses the parse tree and simplifies any occurences of a set of syntax rules. If such a parse node has only one sub-node, it is replaced by that subnode. (This makes sense only of the rules to eliminate have no meaningful token.) <>= public :: parse_tree_reduce +<>= + module subroutine parse_tree_reduce (parse_tree, rule_key) + type(parse_tree_t), intent(inout) :: parse_tree + type(string_t), dimension(:), intent(in) :: rule_key + end subroutine parse_tree_reduce <>= - subroutine parse_tree_reduce (parse_tree, rule_key) + module subroutine parse_tree_reduce (parse_tree, rule_key) type(parse_tree_t), intent(inout) :: parse_tree type(string_t), dimension(:), intent(in) :: rule_key type(parse_node_t), pointer :: pn pn => parse_tree%root_node if (associated (pn)) then call parse_node_reduce (pn, null(), null()) end if contains recursive subroutine parse_node_reduce (pn, pn_prev, pn_parent) type(parse_node_t), intent(inout), pointer :: pn type(parse_node_t), intent(in), pointer :: pn_prev, pn_parent type(parse_node_t), pointer :: pn_sub, pn_sub_prev, pn_tmp pn_sub_prev => null () pn_sub => pn%sub_first do while (associated (pn_sub)) call parse_node_reduce (pn_sub, pn_sub_prev, pn) pn_sub_prev => pn_sub pn_sub => pn_sub%next end do if (parse_node_get_n_sub (pn) == 1) then if (matches (parse_node_get_rule_key (pn), rule_key)) then pn_tmp => pn pn => pn%sub_first if (associated (pn_prev)) then pn_prev%next => pn else if (associated (pn_parent)) then pn_parent%sub_first => pn else parse_tree%root_node => pn end if if (associated (pn_tmp%next)) then pn%next => pn_tmp%next else if (associated (pn_parent)) then pn_parent%sub_last => pn end if call parse_node_final (pn_tmp, recursive=.false.) deallocate (pn_tmp) end if end if end subroutine parse_node_reduce function matches (key, key_list) result (flag) logical :: flag type(string_t), intent(in) :: key type(string_t), dimension(:), intent(in) :: key_list integer :: i flag = .true. do i = 1, size (key_list) if (key == key_list(i)) return end do flag = .false. end function matches end subroutine parse_tree_reduce @ %def parse_tree_reduce @ \subsection{Applications} For a file of the form \begin{verbatim} process foo, bar process xyz \end{verbatim} get the \verb|| entry node for the first matching process tag. If no matching entry is found, the node pointer remains unassociated. <>= public :: parse_tree_get_process_ptr +<>= + module function parse_tree_get_process_ptr (parse_tree, process) result (node) + type(parse_node_t), pointer :: node + type(parse_tree_t), intent(in), target :: parse_tree + type(string_t), intent(in) :: process + end function parse_tree_get_process_ptr <>= - function parse_tree_get_process_ptr (parse_tree, process) result (node) + module function parse_tree_get_process_ptr (parse_tree, process) result (node) type(parse_node_t), pointer :: node type(parse_tree_t), intent(in), target :: parse_tree type(string_t), intent(in) :: process type(parse_node_t), pointer :: node_root, node_process_def type(parse_node_t), pointer :: node_process_phs, node_process_list integer :: j node_root => parse_tree%get_root_ptr () if (associated (node_root)) then node_process_phs => parse_node_get_sub_ptr (node_root) SCAN_FILE: do while (associated (node_process_phs)) node_process_def => parse_node_get_sub_ptr (node_process_phs) node_process_list => parse_node_get_sub_ptr (node_process_def, 2) do j = 1, parse_node_get_n_sub (node_process_list) if (parse_node_get_string & (parse_node_get_sub_ptr (node_process_list, j)) & == process) then node => parse_node_get_next_ptr (node_process_def) return end if end do node_process_phs => parse_node_get_next_ptr (node_process_phs) end do SCAN_FILE node => null () else node => null () end if end function parse_tree_get_process_ptr @ %def parse_tree_get_process_ptr @ \subsection{Unit tests} Test module, followed by the corresponding implementation module. <<[[parser_ut.f90]]>>= <> module parser_ut use unit_tests use parser_uti <> <> contains <> end module parser_ut @ %def parser_ut @ <<[[parser_uti.f90]]>>= <> module parser_uti use syntax_rules use parser <> <> contains <> end module parser_uti @ %def parser_ut @ API: driver for the unit tests below. <>= public :: parse_test <>= subroutine parse_test (u, results) integer, intent(in) :: u type(test_results_t), intent(inout) :: results <> end subroutine parse_test @ %def parse_test @ This checks the parser. <>= call test (parse_1, "parse_1", & "check the parser", & u, results) <>= public :: parse_1 <>= subroutine parse_1 (u) use ifiles use lexers integer, intent(in) :: u type(ifile_t) :: ifile type(syntax_t), target :: syntax type(lexer_t) :: lexer type(stream_t), target :: stream type(parse_tree_t), target :: parse_tree write (u, "(A)") "* Test output: Parsing" write (u, "(A)") "* Purpose: test parse routines" write (u, "(A)") call ifile_append (ifile, "SEQ expr = term addition*") call ifile_append (ifile, "SEQ addition = plus_or_minus term") call ifile_append (ifile, "SEQ term = factor multiplication*") call ifile_append (ifile, "SEQ multiplication = times_or_over factor") call ifile_append (ifile, "SEQ factor = atom exponentiation*") call ifile_append (ifile, "SEQ exponentiation = '^' atom") call ifile_append (ifile, "ALT atom = real | delimited_expr") call ifile_append (ifile, "GRO delimited_expr = ( expr )") call ifile_append (ifile, "ALT plus_or_minus = '+' | '-'") call ifile_append (ifile, "ALT times_or_over = '*' | '/'") call ifile_append (ifile, "KEY '+'") call ifile_append (ifile, "KEY '-'") call ifile_append (ifile, "KEY '*'") call ifile_append (ifile, "KEY '/'") call ifile_append (ifile, "KEY '^'") call ifile_append (ifile, "REA real") write (u, "(A)") "* File contents (syntax definition):" call ifile_write (ifile, u) write (u, "(A)") "EOF" write (u, "(A)") call syntax_init (syntax, ifile) call ifile_final (ifile) call syntax_write (syntax, u) write (u, "(A)") call lexer_init (lexer, & comment_chars = "", & quote_chars = "'", & quote_match = "'", & single_chars = "+-*/^()", & special_class = [""] , & keyword_list = syntax_get_keyword_list_ptr (syntax)) call lexer_write_setup (lexer, u) write (u, "(A)") call ifile_append (ifile, "(27+8^3-2/3)*(4+7)^2*99") write (u, "(A)") "* File contents (input file):" call ifile_write (ifile, u) write (u, "(A)") "EOF" print * call stream_init (stream, ifile) call lexer_assign_stream (lexer, stream) call parse_tree_init (parse_tree, syntax, lexer) call stream_final (stream) call parse_tree_write (parse_tree, u, .true.) print * write (u, "(A)") "* Cleanup, everything should now be empty:" write (u, "(A)") call parse_tree_final (parse_tree) call parse_tree_write (parse_tree, u, .true.) write (u, "(A)") call lexer_final (lexer) call lexer_write_setup (lexer, u) write (u, "(A)") call ifile_final (ifile) write (u, "(A)") "* File contents:" call ifile_write (ifile, u) write (u, "(A)") "EOF" write (u, "(A)") call syntax_final (syntax) call syntax_write (syntax, u) write (u, "(A)") write (u, "(A)") "* Test output end: parser_1" end subroutine parse_1 @ %def parse_1 @ \clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{XML Parser} The XML parser is actually independent from the previous modules of lexer and parser. However, for a generic I/O interface we make use of the [[stream_t]] facility. We need the XML parser for reading and writing LHEF data files. Only a subset of XML is actually relevant. The parser is of the ``pull'' type, i.e., the program steers the reading of XML data in a context-sensitive manner. <<[[xml.f90]]>>= <> module xml <> - use io_units - use system_defs, only: BLANK, TAB - use diagnostics - use ifiles use lexers <> <> <> -contains - -<> + interface +<> + end interface end module xml @ %def xml @ +<<[[xml_sub.f90]]>>= +<> + +submodule (xml) xml_s + + use io_units + use system_defs, only: BLANK, TAB + use diagnostics + use ifiles + + implicit none + +contains + +<> + +end submodule xml_s + +@ %def xml_s \subsection{Cached Stream} The stream type as defined in the [[lexer]] module is versatile regarding the choice of input channel, but it does not allow reading a section more than once. Here, we define an extension where we can return a string to the stream, which is stored in a cache variable, and presented to the caller for the next read. <>= public :: cstream_t <>= type, extends (stream_t) :: cstream_t logical :: cache_is_empty = .true. type(string_t) :: cache contains <> end type cstream_t @ %def cached_stream @ The initializers are simply inherited. Finalizer: also inherited, in essence: <>= procedure :: final => cstream_final +<>= + module subroutine cstream_final (stream) + class(cstream_t), intent(inout) :: stream + end subroutine cstream_final <>= - subroutine cstream_final (stream) + module subroutine cstream_final (stream) class(cstream_t), intent(inout) :: stream stream%cache_is_empty = .true. call stream%stream_t%final () end subroutine cstream_final @ %def cstream_final @ Get record: now, if there is a cache string, return this instead of the record from the stream. <>= procedure :: get_record => cstream_get_record +<>= + module subroutine cstream_get_record (cstream, string, iostat) + class(cstream_t), intent(inout) :: cstream + type(string_t), intent(out) :: string + integer, intent(out) :: iostat + end subroutine cstream_get_record <>= - subroutine cstream_get_record (cstream, string, iostat) + module subroutine cstream_get_record (cstream, string, iostat) class(cstream_t), intent(inout) :: cstream type(string_t), intent(out) :: string integer, intent(out) :: iostat if (cstream%cache_is_empty) then call stream_get_record (cstream%stream_t, string, iostat) else string = cstream%cache cstream%cache_is_empty = .true. iostat = 0 end if end subroutine cstream_get_record @ %def cstream_get_record @ Revert: return the (partially read) record to the stream, putting it in the cache. <>= procedure :: revert_record => cstream_revert_record +<>= + module subroutine cstream_revert_record (cstream, string) + class(cstream_t), intent(inout) :: cstream + type(string_t), intent(in) :: string + end subroutine cstream_revert_record <>= - subroutine cstream_revert_record (cstream, string) + module subroutine cstream_revert_record (cstream, string) class(cstream_t), intent(inout) :: cstream type(string_t), intent(in) :: string if (cstream%cache_is_empty) then cstream%cache = string cstream%cache_is_empty = .false. else call msg_bug ("CStream: attempt to revert twice") end if end subroutine cstream_revert_record @ %def cstream_revert_record @ \subsection{Attributes} A tag attribute has a name and a value; both are strings. When the attribute is defined, the [[known]] flag indicates this. <>= type :: attribute_t type(string_t) :: name type(string_t) :: value logical :: known = .false. contains <> end type attribute_t @ %def attribute_t @ Output in standard format, non-advancing. (If the value is unknown, we indicate this by a question mark, which is non-standard.) <>= procedure :: write => attribute_write +<>= + module subroutine attribute_write (object, unit) + class(attribute_t), intent(in) :: object + integer, intent(in), optional :: unit + end subroutine attribute_write <>= - subroutine attribute_write (object, unit) + module subroutine attribute_write (object, unit) class(attribute_t), intent(in) :: object integer, intent(in), optional :: unit integer :: u u = given_output_unit (unit) write (u, "(A,'=')", advance = "no") char (object%name) if (object%known) then write (u, "(A,A,A)", advance = "no") '"', char (object%value), '"' else write (u, "('?')", advance = "no") end if end subroutine attribute_write @ %def attribute_write @ This is a genuine constructor. The value is optional. <>= public :: xml_attribute +<>= + module function xml_attribute (name, value) result (attribute) + type(string_t), intent(in) :: name + type(string_t), intent(in), optional :: value + type(attribute_t) :: attribute + end function xml_attribute <>= - function xml_attribute (name, value) result (attribute) + module function xml_attribute (name, value) result (attribute) type(string_t), intent(in) :: name type(string_t), intent(in), optional :: value type(attribute_t) :: attribute attribute%name = name if (present (value)) then attribute%value = value attribute%known = .true. else attribute%known = .false. end if end function xml_attribute @ %def xml_attribute @ Set a value explicitly. <>= procedure :: set_value => attribute_set_value +<>= + module subroutine attribute_set_value (attribute, value) + class(attribute_t), intent(inout) :: attribute + type(string_t), intent(in) :: value + end subroutine attribute_set_value <>= - subroutine attribute_set_value (attribute, value) + module subroutine attribute_set_value (attribute, value) class(attribute_t), intent(inout) :: attribute type(string_t), intent(in) :: value attribute%value = value attribute%known = .true. end subroutine attribute_set_value @ %def attribute_set_value @ Extract a value. If unset, return [["?"]] <>= procedure :: get_value => attribute_get_value +<>= + module function attribute_get_value (attribute) result (value) + class(attribute_t), intent(in) :: attribute + type(string_t) :: value + end function attribute_get_value <>= - function attribute_get_value (attribute) result (value) + module function attribute_get_value (attribute) result (value) class(attribute_t), intent(in) :: attribute type(string_t) :: value if (attribute%known) then value = attribute%value else value = "?" end if end function attribute_get_value @ %def attribute_get_value @ \subsection{The Tag Type} The basic entity is the internal representation of an XML tag. The tag has a name, a well-defined set of attributes which may be mandatory or optional, and a flag that determines whether there is content or not. The content itself is not stored in the data structure. <>= public :: xml_tag_t <>= type :: xml_tag_t type(string_t) :: name type(attribute_t), dimension(:), allocatable :: attribute logical :: has_content = .false. contains <> end type xml_tag_t @ %def xml_tag_t @ Initialization. There are different versions, depending on content. <>= generic :: init => init_no_attributes procedure :: init_no_attributes => xml_tag_init_no_attributes +<>= + module subroutine xml_tag_init_no_attributes (tag, name, has_content) + class(xml_tag_t), intent(out) :: tag + type(string_t), intent(in) :: name + logical, intent(in), optional :: has_content + end subroutine xml_tag_init_no_attributes <>= - subroutine xml_tag_init_no_attributes (tag, name, has_content) + module subroutine xml_tag_init_no_attributes (tag, name, has_content) class(xml_tag_t), intent(out) :: tag type(string_t), intent(in) :: name logical, intent(in), optional :: has_content tag%name = name allocate (tag%attribute (0)) if (present (has_content)) tag%has_content = has_content end subroutine xml_tag_init_no_attributes @ %def xml_tag_init_no_attributes @ This version sets attributes. <>= generic :: init => init_with_attributes procedure :: init_with_attributes => xml_tag_init_with_attributes +<>= + module subroutine xml_tag_init_with_attributes (tag, name, attribute, has_content) + class(xml_tag_t), intent(out) :: tag + type(string_t), intent(in) :: name + type(attribute_t), dimension(:), intent(in) :: attribute + logical, intent(in), optional :: has_content + end subroutine xml_tag_init_with_attributes <>= - subroutine xml_tag_init_with_attributes (tag, name, attribute, has_content) + module subroutine xml_tag_init_with_attributes (tag, name, attribute, has_content) class(xml_tag_t), intent(out) :: tag type(string_t), intent(in) :: name type(attribute_t), dimension(:), intent(in) :: attribute logical, intent(in), optional :: has_content tag%name = name allocate (tag%attribute (size (attribute))) tag%attribute = attribute if (present (has_content)) tag%has_content = has_content end subroutine xml_tag_init_with_attributes @ %def xml_tag_init_with_attributes @ Set an attribute value explicitly. <>= procedure :: set_attribute => xml_tag_set_attribute +<>= + module subroutine xml_tag_set_attribute (tag, i, value) + class(xml_tag_t), intent(inout) :: tag + integer, intent(in) :: i + type(string_t), intent(in) :: value + end subroutine xml_tag_set_attribute <>= - subroutine xml_tag_set_attribute (tag, i, value) + module subroutine xml_tag_set_attribute (tag, i, value) class(xml_tag_t), intent(inout) :: tag integer, intent(in) :: i type(string_t), intent(in) :: value call tag%attribute(i)%set_value (value) end subroutine xml_tag_set_attribute @ %def xml_tag_set_attribute @ Get an attribute value. <>= procedure :: get_attribute => xml_tag_get_attribute +<>= + module function xml_tag_get_attribute (tag, i) result (value) + class(xml_tag_t), intent(in) :: tag + integer, intent(in) :: i + type(string_t) :: value + end function xml_tag_get_attribute <>= - function xml_tag_get_attribute (tag, i) result (value) + module function xml_tag_get_attribute (tag, i) result (value) class(xml_tag_t), intent(in) :: tag integer, intent(in) :: i type(string_t) :: value value = tag%attribute(i)%get_value () end function xml_tag_get_attribute @ %def xml_tag_set_attribute @ Output to an I/O unit, default STDOUT. We use non-advancing output. <>= generic :: write => write_without_content procedure :: write_without_content => xml_tag_write +<>= + module subroutine xml_tag_write (tag, unit) + class(xml_tag_t), intent(in) :: tag + integer, intent(in), optional :: unit + end subroutine xml_tag_write <>= - subroutine xml_tag_write (tag, unit) + module subroutine xml_tag_write (tag, unit) class(xml_tag_t), intent(in) :: tag integer, intent(in), optional :: unit integer :: u, i u = given_output_unit (unit) write (u, "('<',A)", advance = "no") char (tag%name) do i = 1, size (tag%attribute) write (u, "(1x)", advance = "no") call tag%attribute(i)%write (u) end do if (tag%has_content) then write (u, "('>')", advance = "no") else write (u, "(' />')", advance = "no") end if end subroutine xml_tag_write @ %def xml_tag_write @ If there is content, we should write the context next (arbitrary format), the write the corresponding closing tag. Again, non-advancing. <>= procedure :: close => xml_tag_close +<>= + module subroutine xml_tag_close (tag, unit) + class(xml_tag_t), intent(in) :: tag + integer, intent(in), optional :: unit + end subroutine xml_tag_close <>= - subroutine xml_tag_close (tag, unit) + module subroutine xml_tag_close (tag, unit) class(xml_tag_t), intent(in) :: tag integer, intent(in), optional :: unit integer :: u u = given_output_unit (unit) write (u, "('')", advance = "no") char (tag%name) end subroutine xml_tag_close @ %def xml_tag_close @ Given content as a single string, we can write tag, content, and closing at once <>= generic :: write => write_with_content procedure :: write_with_content => xml_tag_write_with_content +<>= + module subroutine xml_tag_write_with_content (tag, content, unit) + class(xml_tag_t), intent(in) :: tag + type(string_t), intent(in) :: content + integer, intent(in), optional :: unit + end subroutine xml_tag_write_with_content <>= - subroutine xml_tag_write_with_content (tag, content, unit) + module subroutine xml_tag_write_with_content (tag, content, unit) class(xml_tag_t), intent(in) :: tag type(string_t), intent(in) :: content integer, intent(in), optional :: unit integer :: u u = given_output_unit (unit) call tag%write (u) write (u, "(A)", advance = "no") char (content) call tag%close (u) end subroutine xml_tag_write_with_content @ %def xml_tag_write_with_content @ Input from stream. We know what we are looking for, so we check if the name matches, then fill attributes. We report an error if (a) an I/O error occurs, (b) we reach EOF before encountering the tag, (c) if the tag is incomplete. Trailing text after reading a tag is put back to the input stream. We assume that the tag is not broken across records, and that there is only one tag within the record. This is more restricted than standard XML. <>= procedure :: read => xml_tag_read +<>= + module subroutine xml_tag_read (tag, cstream, success) + class(xml_tag_t), intent(inout) :: tag + type(cstream_t), intent(inout) :: cstream + logical, intent(out) :: success + end subroutine xml_tag_read <>= - subroutine xml_tag_read (tag, cstream, success) + module subroutine xml_tag_read (tag, cstream, success) class(xml_tag_t), intent(inout) :: tag type(cstream_t), intent(inout) :: cstream logical, intent(out) :: success type(string_t) :: string integer :: iostat, p1, p2 character(2), parameter :: WS = BLANK // TAB logical :: done ! Skip comments and blank lines FIND_NON_COMMENT: do FIND_NONEMPTY_RECORD: do call cstream%get_record (string, iostat) if (iostat /= 0) call err_io () p1 = verify (string, WS) if (p1 > 0) exit FIND_NONEMPTY_RECORD end do FIND_NONEMPTY_RECORD ! Look for comment beginning p2 = p1 + 3 if (extract (string, p1, p2) /= "") then ! Return trailing text to the stream string = extract (string, p2 + 1) if (string /= "") call cstream%revert_record (string) exit FIND_COMMENT_END end if end do call cstream%get_record (string, iostat) if (iostat /= 0) call err_io () end do FIND_COMMENT_END end do FIND_NON_COMMENT ! Look for opening < p2 = p1 if (extract (string, p1, p2) /= "<") then call cstream%revert_record (string) success = .false.; return else ! Look for tag name string = extract (string, p2 + 1) p1 = verify (string, WS); if (p1 == 0) call err_incomplete () p2 = p1 + len (tag%name) - 1 if (extract (string, p1, p2) /= tag%name) then call cstream%revert_record ("<" // string) success = .false.; return else ! Look for attributes string = extract (string, p2 + 1) READ_ATTRIBUTES: do call tag%read_attribute (string, done) if (done) exit READ_ATTRIBUTES end do READ_ATTRIBUTES ! Look for closing > p1 = verify (string, WS); if (p1 == 0) call err_incomplete () p2 = p1 if (extract (string, p1, p1) == ">") then tag%has_content = .true. else ! Look for closing /> p2 = p1 + 1 if (extract (string, p1, p2) /= "/>") call err_incomplete () end if ! Return trailing text to the stream string = extract (string, p2 + 1) if (string /= "") call cstream%revert_record (string) success = .true. end if end if contains subroutine err_io () select case (iostat) case (:-1) call msg_fatal ("XML: Error reading tag '" // char (tag%name) & // "': end of file") case (1:) call msg_fatal ("XML: Error reading tag '" // char (tag%name) & // "': I/O error") end select success = .false. end subroutine err_io subroutine err_incomplete () call msg_fatal ("XML: Error reading tag '" // char (tag%name) & // "': tag incomplete") success = .false. end subroutine err_incomplete end subroutine xml_tag_read @ %def xml_tag_read @ Read a single attribute. If the attribute is valid, assign the value. Setting a value twice should be an error, but is not detected. If the attribute is unknown, ignore it. If we reach the closing sign, report this. <>= procedure :: read_attribute => xml_tag_read_attribute +<>= + module subroutine xml_tag_read_attribute (tag, string, done) + class(xml_tag_t), intent(inout) :: tag + type(string_t), intent(inout) :: string + logical, intent(out) :: done + end subroutine xml_tag_read_attribute <>= - subroutine xml_tag_read_attribute (tag, string, done) + module subroutine xml_tag_read_attribute (tag, string, done) class(xml_tag_t), intent(inout) :: tag type(string_t), intent(inout) :: string logical, intent(out) :: done character(2), parameter :: WS = BLANK // TAB type(string_t) :: name, value integer :: p1, p2, i p1 = verify (string, WS); if (p1 == 0) call err () p2 = p1 ! Look for first terminating '>' or '/>' if (extract (string, p1, p2) == ">") then done = .true. else p2 = p1 + 1 if (extract (string, p1, p2) == "/>") then done = .true. else ! Look for '=' p2 = scan (string, '=') if (p2 == 0) call err () name = trim (extract (string, p1, p2 - 1)) ! Look for '"' string = extract (string, p2 + 1) p1 = verify (string, WS); if (p1 == 0) call err () p2 = p1 if (extract (string, p1, p2) /= '"') call err () ! Look for matching '"' and get value string = extract (string, p2 + 1) p1 = 1 p2 = scan (string, '"') if (p2 == 0) call err () value = extract (string, p1, p2 - 1) SCAN_KNOWN_ATTRIBUTES: do i = 1, size (tag%attribute) if (name == tag%attribute(i)%name) then call tag%attribute(i)%set_value (value) exit SCAN_KNOWN_ATTRIBUTES end if end do SCAN_KNOWN_ATTRIBUTES string = extract (string, p2 + 1) done = .false. end if end if contains subroutine err () call msg_fatal ("XML: Error reading attributes of '" // char (tag%name) & // "': syntax error") end subroutine err end subroutine xml_tag_read_attribute @ %def xml_tag_read_attribute @ Read the content string of a tag. We check for the appropriate closing tag and report it. If a closing tag does not match in name, ignore it. Note: this assumes that no tag with the same name is embedded in the current content. Also, we do not check for XML validity inside the content. <>= procedure :: read_content => xml_tag_read_content +<>= + module subroutine xml_tag_read_content (tag, cstream, content, closing) + class(xml_tag_t), intent(in) :: tag + type(cstream_t), intent(inout) :: cstream + type(string_t), intent(out) :: content + logical, intent(out) :: closing + end subroutine xml_tag_read_content <>= - subroutine xml_tag_read_content (tag, cstream, content, closing) + module subroutine xml_tag_read_content (tag, cstream, content, closing) class(xml_tag_t), intent(in) :: tag type(cstream_t), intent(inout) :: cstream type(string_t), intent(out) :: content type(string_t) :: string logical, intent(out) :: closing integer :: iostat integer :: p0, p1, p2 character(2), parameter :: WS = BLANK // TAB call cstream%get_record (content, iostat) if (iostat /= 0) call err_io () closing = .false. FIND_CLOSING: do p0 = 1, len (content) - 1 ! Look for terminating string = extract (string, p2 + 1) p1 = verify (string, WS); if (p1 == 0) call err_incomplete () p2 = p1 if (extract (string, p1, p2) /= ">") call err_incomplete () ! Return trailing text to the stream string = extract (string, p2 + 1) if (string /= "") call cstream%revert_record (string) content = extract (content, 1, p0 -1) closing = .true. exit FIND_CLOSING end if end if end do FIND_CLOSING contains subroutine err_io () select case (iostat) case (:-1) call msg_fatal ("XML: Error reading content of '" // char (tag%name) & // "': end of file") case (1:) call msg_fatal ("XML: Error reading content of '" // char (tag%name) & // "': I/O error") end select closing = .false. end subroutine err_io subroutine err_incomplete () call msg_fatal ("XML: Error reading content '" // char (tag%name) & // "': closing tag incomplete") closing = .false. end subroutine err_incomplete end subroutine xml_tag_read_content @ %def xml_tag_read_content @ \subsection{Unit tests} Test module, followed by the corresponding implementation module. <<[[xml_ut.f90]]>>= <> module xml_ut use unit_tests use xml_uti <> <> contains <> end module xml_ut @ %def xml_ut @ <<[[xml_uti.f90]]>>= <> module xml_uti <> use io_units use xml <> <> contains <> <> end module xml_uti @ %def xml_ut @ API: driver for the unit tests below. <>= public :: xml_test <>= subroutine xml_test (u, results) integer, intent(in) :: u type(test_results_t), intent(inout) :: results <> end subroutine xml_test @ %def xml_test @ \subsection{Auxiliary Routines} Show the contents of a temporary file, i.e., open unit. <>= subroutine show (u_tmp, u) integer, intent(in) :: u_tmp, u character (80) :: buffer integer :: iostat write (u, "(A)") "File content:" rewind (u_tmp) do read (u_tmp, "(A)", iostat = iostat) buffer if (iostat /= 0) exit write (u, "(A)") trim (buffer) end do rewind (u_tmp) end subroutine show @ %def show @ \subsection{Basic Tag I/O} Write a tag and read it again, using a temporary file. <>= call test (xml_1, "xml_1", & "basic I/O", & u, results) <>= public :: xml_1 <>= subroutine xml_1 (u) integer, intent(in) :: u type(xml_tag_t), allocatable :: tag integer :: u_tmp type(cstream_t) :: cstream logical :: success write (u, "(A)") "* Test output: xml_1" write (u, "(A)") "* Purpose: write and read tag" write (u, "(A)") write (u, "(A)") "* Empty tag" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag) call tag%init (var_str ("tagname")) call tag%write (u_tmp) write (u_tmp, *) deallocate (tag) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag) call tag%init (var_str ("tagname")) call tag%read (cstream, success) call tag%write (u) write (u, *) write (u, "(A,L1)") "success = ", success deallocate (tag) close (u_tmp) call cstream%final () write (u, *) write (u, "(A)") "* Tag with preceding blank lines" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag) call tag%init (var_str ("tagname")) write (u_tmp, *) write (u_tmp, "(A)") " " write (u_tmp, "(2x)", advance = "no") call tag%write (u_tmp) write (u_tmp, *) deallocate (tag) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag) call tag%init (var_str ("tagname")) call tag%read (cstream, success) call tag%write (u) write (u, *) write (u, "(A,L1)") "success = ", success deallocate (tag) close (u_tmp) call cstream%final () write (u, *) write (u, "(A)") "* Tag with preceding comments" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag) call tag%init (var_str ("tagname")) write (u_tmp, "(A)") "" write (u_tmp, *) write (u_tmp, "(A)") "" call tag%write (u_tmp) write (u_tmp, *) deallocate (tag) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag) call tag%init (var_str ("tagname")) call tag%read (cstream, success) call tag%write (u) write (u, *) write (u, "(A,L1)") "success = ", success close (u_tmp) deallocate (tag) call cstream%final () write (u, *) write (u, "(A)") "* Tag with name mismatch" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag) call tag%init (var_str ("wrongname")) call tag%write (u_tmp) write (u_tmp, *) deallocate (tag) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag) call tag%init (var_str ("tagname")) call tag%read (cstream, success) call tag%write (u) write (u, *) write (u, "(A,L1)") "success = ", success deallocate (tag) close (u_tmp) call cstream%final () write (u, "(A)") write (u, "(A)") "* Test output end: xml_1" end subroutine xml_1 @ %def xml_1 @ \subsection{Optional Tag} Write and read two tags, one of them optional. <>= call test (xml_2, "xml_2", & "optional tag", & u, results) <>= public :: xml_2 <>= subroutine xml_2 (u) integer, intent(in) :: u type(xml_tag_t), allocatable :: tag1, tag2 integer :: u_tmp type(cstream_t) :: cstream logical :: success write (u, "(A)") "* Test output: xml_2" write (u, "(A)") "* Purpose: handle optional tag" write (u, "(A)") write (u, "(A)") "* Optional tag present" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag1) call tag1%init (var_str ("option")) call tag1%write (u_tmp) write (u_tmp, *) allocate (tag2) call tag2%init (var_str ("tagname")) call tag2%write (u_tmp) write (u_tmp, *) deallocate (tag1, tag2) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag1) call tag1%init (var_str ("option")) call tag1%read (cstream, success) call tag1%write (u) write (u, *) write (u, "(A,L1)") "success = ", success write (u, *) allocate (tag2) call tag2%init (var_str ("tagname")) call tag2%read (cstream, success) call tag2%write (u) write (u, *) write (u, "(A,L1)") "success = ", success deallocate (tag1, tag2) close (u_tmp) call cstream%final () write (u, *) write (u, "(A)") "* Optional tag absent" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag2) call tag2%init (var_str ("tagname")) call tag2%write (u_tmp) write (u_tmp, *) deallocate (tag2) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag1) call tag1%init (var_str ("option")) call tag1%read (cstream, success) call tag1%write (u) write (u, *) write (u, "(A,L1)") "success = ", success write (u, *) allocate (tag2) call tag2%init (var_str ("tagname")) call tag2%read (cstream, success) call tag2%write (u) write (u, *) write (u, "(A,L1)") "success = ", success deallocate (tag1, tag2) close (u_tmp) call cstream%final () write (u, "(A)") write (u, "(A)") "* Test output end: xml_2" end subroutine xml_2 @ %def xml_2 @ \subsection{Optional Tag} Write and read a tag with single-line content. <>= call test (xml_3, "xml_3", & "content", & u, results) <>= public :: xml_3 <>= subroutine xml_3 (u) integer, intent(in) :: u type(xml_tag_t), allocatable :: tag integer :: u_tmp type(cstream_t) :: cstream logical :: success, closing type(string_t) :: content write (u, "(A)") "* Test output: xml_3" write (u, "(A)") "* Purpose: handle tag with content" write (u, "(A)") write (u, "(A)") "* Tag without content" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag) call tag%init (var_str ("tagname")) call tag%write (u_tmp) write (u_tmp, *) deallocate (tag) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag) call tag%init (var_str ("tagname")) call tag%read (cstream, success) call tag%write (u) write (u, *) write (u, "(A,L1)") "success = ", success write (u, "(A,L1)") "content = ", tag%has_content write (u, *) deallocate (tag) close (u_tmp) call cstream%final () write (u, "(A)") "* Tag with content" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag) call tag%init (var_str ("tagname"), has_content = .true.) call tag%write (var_str ("Content text"), u_tmp) write (u_tmp, *) deallocate (tag) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag) call tag%init (var_str ("tagname")) call tag%read (cstream, success) call tag%read_content (cstream, content, closing) call tag%write (u) write (u, "(A)", advance = "no") char (content) call tag%close (u) write (u, *) write (u, "(A,L1)") "success = ", success write (u, "(A,L1)") "content = ", tag%has_content write (u, "(A,L1)") "closing = ", closing deallocate (tag) close (u_tmp) call cstream%final () write (u, *) write (u, "(A)") "* Tag with multiline content" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag) call tag%init (var_str ("tagname"), has_content = .true.) call tag%write (u_tmp) write (u_tmp, *) write (u_tmp, "(A)") "Line 1" write (u_tmp, "(A)") "Line 2" call tag%close (u_tmp) write (u_tmp, *) deallocate (tag) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag) call tag%init (var_str ("tagname")) call tag%read (cstream, success) call tag%write (u) write (u, *) do call tag%read_content (cstream, content, closing) if (closing) exit write (u, "(A)") char (content) end do call tag%close (u) write (u, *) write (u, "(A,L1)") "success = ", success write (u, "(A,L1)") "content = ", tag%has_content deallocate (tag) close (u_tmp) call cstream%final () write (u, "(A)") write (u, "(A)") "* Test output end: xml_3" end subroutine xml_3 @ %def xml_3 @ \subsection{Basic Tag I/O} Write a tag and read it again, using a temporary file. <>= call test (xml_4, "xml_4", & "attributes", & u, results) <>= public :: xml_4 <>= subroutine xml_4 (u) integer, intent(in) :: u type(xml_tag_t), allocatable :: tag integer :: u_tmp type(cstream_t) :: cstream logical :: success write (u, "(A)") "* Test output: xml_4" write (u, "(A)") "* Purpose: handle tag with attributes" write (u, "(A)") write (u, "(A)") "* Tag with one mandatory and one optional attribute," write (u, "(A)") "* unknown attribute ignored" write (u, *) u_tmp = free_unit () open (u_tmp, status = "scratch", action = "readwrite") allocate (tag) call tag%init (var_str ("tagname"), & [xml_attribute (var_str ("a1"), var_str ("foo")), & xml_attribute (var_str ("a3"), var_str ("gee"))]) call tag%write (u_tmp) deallocate (tag) call show (u_tmp, u) write (u, *) write (u, "(A)") "Result from read:" call cstream%init (u_tmp) allocate (tag) call tag%init (var_str ("tagname"), & [xml_attribute (var_str ("a1")), & xml_attribute (var_str ("a2"), var_str ("bar"))]) call tag%read (cstream, success) call tag%write (u) write (u, *) deallocate (tag) close (u_tmp) call cstream%final () write (u, "(A)") write (u, "(A)") "* Test output end: xml_4" end subroutine xml_4 @ %def xml_4 Index: trunk/src/parsing/Makefile.am =================================================================== --- trunk/src/parsing/Makefile.am (revision 8774) +++ trunk/src/parsing/Makefile.am (revision 8775) @@ -1,198 +1,217 @@ ## Makefile.am -- Makefile for WHIZARD ## ## Process this file with automake to produce Makefile.in # # Copyright (C) 1999-2021 by # Wolfgang Kilian # Thorsten Ohl # Juergen Reuter # with contributions from # cf. main AUTHORS file # # WHIZARD is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # WHIZARD is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ######################################################################## ## The files in this directory implement text-handling and parsing utilities. ## We create a library which is still to be combined with auxiliary libs. noinst_LTLIBRARIES = libparsing.la check_LTLIBRARIES = libparsing_ut.la libparsing_la_SOURCES = \ + $(PARSING_MODULES) \ + $(PARSING_SUBMODULES) + +PARSING_MODULES = \ ifiles.f90 \ lexers.f90 \ syntax_rules.f90 \ parser.f90 \ xml.f90 +PARSING_SUBMODULES = \ + ifiles_sub.f90 \ + lexers_sub.f90 \ + syntax_rules_sub.f90 \ + parser_sub.f90 \ + xml_sub.f90 + libparsing_ut_la_SOURCES = \ lexers_uti.f90 lexers_ut.f90 \ parser_uti.f90 parser_ut.f90 \ xml_uti.f90 xml_ut.f90 ## Omitting this would exclude it from the distribution dist_noinst_DATA = parsing.nw # Modules and installation # Dump module names into file Modules execmoddir = $(fmoddir)/whizard nodist_execmod_HEADERS = \ - ${libparsing_la_SOURCES:.f90=.$(FCMOD)} + ${PARSING_MODULES:.f90=.$(FCMOD)} +# Submodules must not be included here libparsing_Modules = \ - ${libparsing_la_SOURCES:.f90=} \ + ${PARSING_MODULES:.f90=} \ ${libparsing_ut_la_SOURCES:.f90=} Modules: Makefile @for module in $(libparsing_Modules); do \ echo $$module >> $@.new; \ done @if diff $@ $@.new -q >/dev/null; then \ rm $@.new; \ else \ mv $@.new $@; echo "Modules updated"; \ fi BUILT_SOURCES = Modules ## Fortran module dependencies # Get module lists from other directories module_lists = \ ../basics/Modules \ ../utilities/Modules \ ../testing/Modules \ ../system/Modules \ ../combinatorics/Modules $(module_lists): $(MAKE) -C `dirname $@` Modules Module_dependencies.sed: $(libparsing_la_SOURCES) $(libparsing_ut_la_SOURCES) Module_dependencies.sed: $(module_lists) @rm -f $@ echo 's/, *only:.*//' >> $@ echo 's/, *&//' >> $@ echo 's/, *.*=>.*//' >> $@ echo 's/$$/.lo/' >> $@ for list in $(module_lists); do \ dir="`dirname $$list`"; \ for mod in `cat $$list`; do \ echo 's!: '$$mod'.lo$$!': $$dir/$$mod'.lo!' >> $@; \ done \ done DISTCLEANFILES = Module_dependencies.sed # The following line just says # include Makefile.depend # but in a portable fashion (depending on automake's AM_MAKE_INCLUDE @am__include@ @am__quote@Makefile.depend@am__quote@ Makefile.depend: Module_dependencies.sed Makefile.depend: $(libparsing_la_SOURCES) $(libparsing_ut_la_SOURCES) @rm -f $@ for src in $^; do \ module="`basename $$src | sed 's/\.f[90][0358]//'`"; \ grep '^ *use ' $$src \ | grep -v '!NODEP!' \ | sed -e 's/^ *use */'$$module'.lo: /' \ -f Module_dependencies.sed; \ done > $@ DISTCLEANFILES += Makefile.depend # Fortran90 module files are generated at the same time as object files .lo.$(FCMOD): @: # touch $@ AM_FCFLAGS = -I../basics -I../utilities -I../testing -I../system -I../combinatorics +######################################################################## +# For the moment, the submodule dependencies will be hard-coded +ifiles_sub.lo: ifiles.lo +lexers_sub.lo: lexers.lo +syntax_rules_sub.lo: syntax_rules.lo +parser_sub.lo: parser.lo +xml_sub.lo: xml.lo ######################################################################## ## Default Fortran compiler options ## Profiling if FC_USE_PROFILING AM_FCFLAGS += $(FCFLAGS_PROFILING) endif ## OpenMP if FC_USE_OPENMP AM_FCFLAGS += $(FCFLAGS_OPENMP) endif # MPI if FC_USE_MPI AM_FCFLAGS += $(FCFLAGS_MPI) endif ######################################################################## ## Non-standard targets and dependencies ## (Re)create F90 sources from NOWEB source. if NOWEB_AVAILABLE PRELUDE = $(top_srcdir)/src/noweb-frame/whizard-prelude.nw POSTLUDE = $(top_srcdir)/src/noweb-frame/whizard-postlude.nw parsing.stamp: $(PRELUDE) $(srcdir)/parsing.nw $(POSTLUDE) @rm -f parsing.tmp @touch parsing.tmp for src in $(libparsing_la_SOURCES) $(libparsing_ut_la_SOURCES); do \ $(NOTANGLE) -R[[$$src]] $^ | $(CPIF) $$src; \ done @mv -f parsing.tmp parsing.stamp $(libparsing_la_SOURCES) $(libparsing_ut_la_SOURCES): parsing.stamp ## Recover from the removal of $@ @if test -f $@; then :; else \ rm -f parsing.stamp; \ $(MAKE) $(AM_MAKEFLAGS) parsing.stamp; \ fi endif ######################################################################## ## Non-standard cleanup tasks ## Remove sources that can be recreated using NOWEB if NOWEB_AVAILABLE maintainer-clean-noweb: -rm -f *.f90 *.c endif .PHONY: maintainer-clean-noweb ## Remove those sources also if builddir and srcdir are different if NOWEB_AVAILABLE clean-noweb: test "$(srcdir)" != "." && rm -f *.f90 *.c || true endif .PHONY: clean-noweb ## Remove F90 module files clean-local: clean-noweb -rm -f parsing.stamp parsing.tmp -rm -f *.$(FCMOD) if FC_SUBMODULES - -rm -f *.smod + -rm -f *.smod *.sub endif ## Remove backup files maintainer-clean-backup: -rm -f *~ .PHONY: maintainer-clean-backup ## Register additional clean targets maintainer-clean-local: maintainer-clean-noweb maintainer-clean-backup