% \iffalse
% vim: set expandtab:
% vim: set shiftwidth=2:
% vim: set tabstop=2:
% \fi
% \iffalse meta-comment
%
% Copyright (C) 2026 by Lukas Heindl <oss.heindl+latex@protonmail.com>
% ---------------------------------------------------------------------------
% This work may be distributed and/or modified under the
% conditions of the LaTeX Project Public License, either version 1.3c
% of this license or (at your option) any later version.
% The latest version of this license is in
%   http://www.latex-project.org/lppl.txt
% and version 1.3c or later is part of all distributions of LaTeX
% version 2008/05/04 or later.
%
% This work has the LPPL maintenance status `maintained'.
%
% The Current Maintainer of this work is Lukas Heindl.
%
% This work consists of all files listed in manifest.txt.
%
% \fi
%
% \iffalse
%<*driver>
\ProvidesFile{hexdumptikz-parser-hd.dtx}
%</driver>
%<package>\NeedsTeXFormat{LaTeX2e}[2022-06-01]
%
%<*driver>
\begin{document}
  \DocInput{\jobname.dtx}
  \PrintChanges
  \PrintIndex
\end{document}
%
% \changes{v0.0.0}{2026-05-14}{First draft}
%
%</driver>
% \fi
%
% \iffalse
%<*package>
%<@@=hexdumptikz_parser_hd>
% \fi
%
% \maketitle
%
% \begin{abstract}
% Parse files with classical hexdump format such as the one produced by \texttt{od}, \texttt{hd} or \emph{Wireshark}.
% \end{abstract}
%
% Identify the package and give the over all version information.
%    \begin{macrocode}
\ProvidesExplPackage {hexdumptikz-parser-hd} {2026-06-16} {1.0.0}
  {Printing and annotating hexdumps with TikZ}
%    \end{macrocode}
%
% Load dependencies
%    \begin{macrocode}
\RequirePackage { hexdumptikz-common }
%    \end{macrocode}
%
% \subsection{Public}
% \begin{fn}{\hexdumptikz_parser_hd:nN}
% Public entrance function for parsing a hexdump
% \begin{sideeffects}
%   \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_ior} \\
%   \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_line\_tl} \\
%   \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_line\_int} \\
%   \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_offset\_tl} \\
%   \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_bytes\_seq} \\
%   \sclobber & \sdir & \texttt{l\_hexdumptikz\_parser\_finished\_bool} \\
% \end{sideeffects}
% \begin{args}
%   1 & \ain & filename/-path to parse \\
%   2 & \ain & callback function \\
% \end{args}
%    \begin{macrocode}
\cs_new_protected:Npn \hexdumptikz_parser_hd:nN #1 #2
{
%    \end{macrocode}
% Initialization
%    \begin{macrocode}
  \bool_set_true:N \l_hexdumptikz_parser_finished_bool
  \int_zero:N      \l_hexdumptikz_parser_line_int
%    \end{macrocode}
%
% Open the file for reading
%    \begin{macrocode}
  \ior_open:Nn \l_hexdumptikz_parser_ior { #1 }
%    \end{macrocode}
%
% Iterate over the lines of the input file
%    \begin{macrocode}
  \ior_map_variable:NNn
  \l_hexdumptikz_parser_ior
  \l_hexdumptikz_parser_line_tl
  {
    \tl_set:Ne \l_hexdumptikz_parser_line_tl
      { \tl_trim_spaces:e { \l_hexdumptikz_parser_line_tl } }
%    \end{macrocode}
%
% Silently ignore empty lines
%    \begin{macrocode}
    \tl_if_blank:nF { \l_hexdumptikz_parser_line_tl }
    {
%    \end{macrocode}
%
% Do the actual parsing
%    \begin{macrocode}
      \@@_normalize_line:NNN
        \l_hexdumptikz_parser_line_tl
        \l_hexdumptikz_parser_offset_tl
        \l_hexdumptikz_parser_bytes_seq
%    \end{macrocode}
%
% Pass the parsed data to the callback
%    \begin{macrocode}
      #2
        \l_hexdumptikz_parser_line_int
        \l_hexdumptikz_parser_offset_tl
        \l_hexdumptikz_parser_bytes_seq
        \l_hexdumptikz_parser_finished_bool
%    \end{macrocode}
%
% Enable the callback to stop the parsing.
%    \begin{macrocode}
      \bool_if:NT \l_hexdumptikz_parser_finished_bool
      { \ior_map_break: }
    }
%    \end{macrocode}
%
% Count all lines including the empty ones.
%    \begin{macrocode}
    \int_incr:N \l_hexdumptikz_parser_line_int
  }
%    \end{macrocode}
%
% Close the input file again
%    \begin{macrocode}
  \ior_close:N \l_hexdumptikz_parser_ior
}
%    \end{macrocode}
% \end{fn}
%
% \subsection{Helpers}
% \begin{fn}{\@@_normalize_line:NNN}
% Normalize a single parsed line (tl) from the slightly different input formats supported by this parser to a unified sequence.
% \begin{sideeffects}
%   \sclobber & \sdir & \texttt{l\_tmpa\_seq} \\
% \end{sideeffects}
% \begin{args}
%   1 & \ain  & parsed line (tl) \\
%   2 & \aout & offset (tl) \\
%   3 & \aout & bytes (seq) \\
% \end{args}
%    \begin{macrocode}
\cs_new_protected:Npn \@@_normalize_line:NNN #1 #2 #3
{
%    \end{macrocode}
% Extract the offset / address from the start of the line
%    \begin{macrocode}
  \regex_extract_once:NVNF
    \c_hexdumptikz_parser_addr_regex
    #1
    \l_tmpa_seq
    {
      \msg_critical:nnV
      { hexdumptikz-parser }
      { no-valid-offset }
      #1
    }
%    \end{macrocode}
% The regex matched without the \texttt{0x} (in order to make the prefix optional) $\to$ add it here for normalization purposes
%    \begin{macrocode}
  \tl_set:Ne #2
    { 0x \seq_item:Nn \l_tmpa_seq { 2 } }
%    \end{macrocode}
%
% Remove the leading offset / address so the hex-digits forming the \enquote{values} are at the start of the string
%    \begin{macrocode}
  \regex_replace_once:NnN
  \c_hexdumptikz_parser_addr_regex
  { }
  #1
%    \end{macrocode}
%
% Convert the series of hex-digits to a sequence of bytes
%    \begin{macrocode}
  \@@_hexcompact_to_seq:NNN
    #1
    #3
    #2
}
%    \end{macrocode}
% \end{fn}
%
% \begin{fn}{\@@_hexcompact_to_seq:NNN}
% Convert/Sanitize a series of hex digits to a sequence of bytes
% \begin{sideeffects}
%   \sclobber & \sdir & \texttt{l\_tmpa\_str} \\
% \end{sideeffects}
% \begin{args}
%   1 & \ain  & series of hex-digits (tl) \\
%   2 & \aout & sequence (tl) \\
%   3 & \ain & offset (only used to generate nicer error messages which indicate the location of the error in the file) \\
%   - & \ain & \texttt{l\_hexdumptikz\_parser\_strict\_byte\_num\_bool} \\
%   - & \ain & \texttt{l\_hexdumptikz\_common\_bytes\_per\_row\_int} \\
%   - & \ain & \texttt{l\_hexdumptikz\_parser\_leading\_base\_bool} \\
%   - & \ain/\aout & \texttt{l\_hexdumptikz\_parser\_last\_line\_seen\_bool} \\
%   - & \ain & \texttt{l\_hexdumptikz\_parser\_strict\_hex\_bool} \\
% \end{args}
%    \begin{macrocode}
\cs_new_protected:Npn \@@_hexcompact_to_seq:NNN #1 #2 #3
{
%    \end{macrocode}
% First some optional checks and transformations:
%
% Check the number of parsed bytes
%    \begin{macrocode}
  \bool_if:NT \l_hexdumptikz_parser_strict_byte_num_bool
  {
    \bool_if:NT \l_hexdumptikz_parser_last_line_seen_bool {
      \msg_critical:nneV
      { hexdumptikz-parser }
      { weird-byte-count }
      { \int_use:N \l_hexdumptikz_common_bytes_per_row_int }
      #3
    }
  }
%    \end{macrocode}
% remove a leading \texttt{0x} indicating the hexadecimal base once
%    \begin{macrocode}
  \bool_if:NT \l_hexdumptikz_parser_leading_base_bool
  {
    \regex_replace_once:NnNF
    \c_hexdumptikz_common_leading_hex_base_regex
    { }
    #1
    {
      \msg_critical:nnV
      { hexdumptikz-parser }
      { leading-base-missing }
      #1
    }
  }
%    \end{macrocode}
% First clean up and so some initialization
%    \begin{macrocode}
  \seq_clear:N #2
  \str_set:Ne \l_tmpa_str { #1 }
%    \end{macrocode}
%
% Loop over the string and remove the parsed bytes step by step in the process.
%    \begin{macrocode}
  \bool_while_do:nn
  { ! \str_if_empty_p:N \l_tmpa_str }
  {
%    \end{macrocode}
%
% Check the number of bytes parsed from the current line.
%
% Either throw a critical error on violation or just silently stop the parsing of this line.
% Silently stopping the parsing is needed for rudimentary support of the \emph{canonical} format which also shows the ASCII representation at the end of each line.
% The support is not full though, as this fails if the last row/line is not fully populated (since this parser is very liberal in terms of where and how many spaces can be present).
%    \begin{macrocode}
    \int_compare:nNnT { \l_hexdumptikz_common_bytes_per_row_int } > { 0 }
    {
      \int_compare:nNnF
      { \seq_count:N #2 }
      <
      { \l_hexdumptikz_common_bytes_per_row_int }
      {
        \bool_if:NT \l_hexdumptikz_parser_strict_byte_num_bool
        {
          \msg_critical:nnVeV
          { hexdumptikz-parser }
          { too-many-bytes }
          #3
          { \int_use:N \l_hexdumptikz_common_bytes_per_row_int }
          \l_tmpa_str
        }
%    \end{macrocode}
% basically stop gracefully parsing the current line
%    \begin{macrocode}
        \str_set:Nn \l_tmpa_str { }
      }
    }
    \str_if_empty:NF \l_tmpa_str
    {
%    \end{macrocode}
%
% optionally check for valid hex characters. In principle there is no issue if non-hex characters are present as the content is not interpreted.
%    \begin{macrocode}
      \bool_if:NT \l_hexdumptikz_parser_strict_hex_bool
      {
        \regex_if_match:NVF
        \c_hexdumptikz_parser_leading_hex_byte_regex
        \l_tmpa_str
        {
          \msg_critical:nnV
          { hexdumptikz-parser }
          { invalid-hex-digits }
          \l_tmpa_str
        }
      }
%    \end{macrocode}
%
% actual core parsing logic
%    \begin{macrocode}
      \seq_put_right:Ne #2
      {
        \str_range_ignore_spaces:Vnn \l_tmpa_str { 1 } { 2 }
      }
      \str_set:Ne
      \l_tmpa_str
      { \str_range_ignore_spaces:Vnn \l_tmpa_str { 3 } { -1 } }
    }
  }
%    \end{macrocode}
% keep track if we expect this is the last line in the input (the last line normally is the only line which is not fully populated)
%    \begin{macrocode}
  \int_compare:nNnT
  { \seq_count:N #2 }
  <
  { \l_hexdumptikz_common_bytes_per_row_int }
  {
    \bool_set_true:N \l_hexdumptikz_parser_last_line_seen_bool
  }
}
%    \end{macrocode}
% \end{fn}
%
% \begin{fn}{\hexdumptikz_parser_dbg:NNNN}
% Dummy callback which just prints its arguments and can be used for debugging.
% \begin{sideeffects}
% \end{sideeffects}
% \begin{args}
%   1 & \ain & line-index \\
%   2 & \ain & parsed offset/address \\
%   3 & \ain & parsed bytes (seq) \\
%   4 & \aout & variable to signal whether printing / the selection has finished \\
% \end{args}
%    \begin{macrocode}
\cs_new_protected:Npn \hexdumptikz_parser_dbg:NNNN #1 #2 #3 #4
{
  \iow_term:x { line idx:~\int_use:N #1 }
  \iow_term:x { offset:~~~\tl_use:N #2 }
  \iow_term:x { bytes:~~~~\seq_use:Nn #3 {~|~} }
  \iow_term:x { }
}
%    \end{macrocode}
% \end{fn}
%
% \begin{macro}{\hexdumptikzParserDbg}
% Short macro which can aid in debugging.
%    \begin{macrocode}
\NewDocumentCommand { \hexdumptikzParserDbg } { m } {
  \hexdumptikz_parser_hd:nN
  { #1 }
  \hexdumptikz_parser_dbg:NNNN
}
%    \end{macrocode}
% \end{macro}
% \iffalse
%</package>
% \fi
%
% \Finale
