Home > Software engineering >  Pass nonconsecutive values as arguments to bash / awk script
Pass nonconsecutive values as arguments to bash / awk script

Time:08-22

Link to input file for testing is samples.bin
I can pass ranges of values by delimiting two integers with a - in my script:

[root@usreliance Biorad]# ./sample12.sh -s 0-9 -c 0-3
                Ch0     Ch1     Ch2     Ch3
Sample 0:       0x1a03  0x1a03  0x4a03  0x5703
Sample 1:       0x4b03  0x4403  0x1e03  0x0904
Sample 2:       0x1003  0x1903  0x4003  0xae03
Sample 3:       0x1e03  0x2603  0x3303  0xad03
Sample 4:       0x1003  0x8403  0x4303  0x6203
Sample 5:       0xe003  0x1603  0x3403  0xc403
Sample 6:       0xf802  0x3b03  0x5303  0x6103
Sample 7:       0x1003  0x1503  0x4203  0x5803
Sample 8:       0x2303  0x1f03  0x5703  0x6203
Sample 9:       0x1703  0x7303  0x3103  0x3303

[root@usreliance Biorad]# ./sample12.sh -s 0-9 -c 2-3
                Ch2     Ch3
Sample 0:       0x4a03  0x5703
Sample 1:       0x1e03  0x0904
Sample 2:       0x4003  0xae03
Sample 3:       0x3303  0xad03
Sample 4:       0x4303  0x6203
Sample 5:       0x3403  0xc403
Sample 6:       0x5303  0x6103
Sample 7:       0x4203  0x5803
Sample 8:       0x5703  0x6203
Sample 9:       0x3103  0x3303

But I want to be able to pass nonconsecutive values delimited by , commas:

[root@usreliance Biorad]# ./sample12.sh -s 1,3,5 -c 0-3
                Ch0     Ch1     Ch2     Ch3
Sample 1:       0x4b03  0x4403  0x1e03  0x0904
Sample 3:       0x1e03  0x2603  0x3303  0xad03
Sample 5:       0xe003  0x1603  0x3403  0xc403

[root@usreliance Biorad]# ./sample12.sh -s 0-9 -c 0,3
                Ch0     Ch3
Sample 0:       0x1a03  0x5703
Sample 1:       0x4b03  0x0904
Sample 2:       0x1003  0xae03
Sample 3:       0x1e03  0xad03
Sample 4:       0x1003  0x6203
Sample 5:       0xe003  0xc403
Sample 6:       0xf802  0x6103
Sample 7:       0x1003  0x5803
Sample 8:       0x2303  0x6203
Sample 9:       0x1703  0x3303

Here is my script in its current state right now:

#!/usr/bin/env bash

samps=""
chans=""
total=false

while getopts ':c:s:t' opt; do
    case $opt in
        s) samps="$OPTARG" ;;
        c) chans="$OPTARG" ;;
        t) total=true ;;
        *) printf 'Unrecognized option "%s"\n' "$opt" >&2
    esac
done
shift $(( OPTIND - 1 ))

if [[ $total == true ]]; then
  printf "Total Samples: "$(hexdump -v -e '8/1 "x " "\n"' samples.bin | wc -l)"\n"
else {
  hexdump -v -e '8/1 "x " "\n"' samples.bin |
  awk -v samps="$samps" -v chans="$chans" '
    BEGIN {
      # split sample string to arrays using "-" as delimiter
      split(samps, srange, "-")
      # split channel string
      split(chans, crange, "-")

      # arbitrary INT_MAX
      int_max=2^52
      # default 4 channels as per prerequisite example
      chan_default=4

      # set default samples
      if (!srange[1]) srange[1] = 0
      if (!srange[2]) srange[2] = int_max
      # set default channels
      if (!crange[1]) crange[1] = 0
      if (!crange[2]) crange[2] = crange[1]   chan_default-1

      # print channel header row
      printf "\t\t"
      for (i=crange[1]; i<=crange[2]; i  ) {
        printf("Ch%d%s", i, (i==crange[2]?"\n":"\t"))
      }
    }
    {
      if(NR >= srange[1]   1 && NR <= srange[2]   1) {
        start=(crange[1]   1) * 2 - 1
        end=(crange[2]   1 ) * 2

        # print sample range
        printf("Sample %d:\t", NR-1)

        # print channel range in sample line
        for (i = start; i <= end; i =2) {
            j = i   1
            printf("0x%s%s%s", $i, $j, (i==end||j==end?"\n":"\t"))
        }
      }
    }
  '
}
fi

The user should be able to pass as many arguments after -s or -c as they like, i,e..,

[root@usreliance Biorad]# ./sample12.sh -s 0,7,23,44 -c 0,2

EXTRA CREDIT: Pass multiple range and nonconsecutive values in one pass, i.e..,

[root@usreliance Biorad]# ./sample12.sh -s 0,7,23,44-99,214-300 -c 0,2-3

CodePudding user response:

I see you are using awk/split to process args. You may use the same split method here but with , instead if - . Then loop through that array to fetch values

CodePudding user response:

One idea is to expand the range of numbers into the list of rows/columns to be printed in advance, and print the matched rows/columns while processing the input. Then would you please try:

#!/usr/bin/env bash

samps=""
chans=""
total="false"

while getopts ':c:s:t' opt; do
    case $opt in
        s) samps="$OPTARG" ;;
        c) chans="$OPTARG" ;;
        t) total="true" ;;
        *) printf 'Unrecognized option "%s"\n' "$opt" >&2
    esac
done
shift $(( OPTIND - 1 ))

# if input_file is not specified, print usage and exit
if (( $# == 0 )); then
  echo "usage: $0 ([-s samples] [-c channels] | -t) file"
  exit 1
fi

infile=$1
read -r size _ < <(wc -c "$infile")
lines=$(( (size - 1) / 8 ))             # last line number

if [[ $total == "true" ]]; then
  printf "Total Samples: "$(hexdump -v -e '8/1 "x " "\n"' "$infile" | wc -l)"\n"
else
  hexdump -v -e '8/1 "x " "\n"' "$infile" |
  awk -v samps="$samps" -v chans="$chans" -v lines="$lines" '

  # expand comma-separated range parameters into individual numbers
  # assigning indexes of array "a"
  # omitted range parameters default to min or max individually
  function expn(str, a, min, max,     i, j, b, c, l, last) {
    if (str == "") {                            # if "str" is empty
      for (i = min; i <= max; i  ) a[i]         # then set full range
      last = max
    } else {
      gsub(/[^0-9,-]/, "", str)                 # remove irregular characters
      split(str, b, /,/)                        # split on ","
        for (i in b) {                          # loop over csv
          l = split(b[i], c, /-/)               # split on "-"
          if (l == 1) a[c[1]]                   # single number
          else if (l == 2) {                    # dash-ranged numbers
            if (c[1] == "") c[1] = min          # default to "min"
            if (c[2] == "") c[2] = max          # default to "max"
              for (j = c[1]; j <= c[2]; j  ) a[j]
          }
          if (last < c[2]) last = c[2]
        }
      }
      return last                               # last line number to process
    }

    BEGIN {
      # expand sample string to array "srange"
      last = expn(samps, srange, 0, lines)
      # expand channel string to array "crange"
      expn(chans, crange, 0, 3)

      # print channel header row
      printf "\t"
      for (c = 0; c <= 3; c  ) {
        if (c in crange) {
          printf("\tCh%d", c)
        }
      }
      print ""
    }
    {
      if (NR-1 > last) exit             # exit earlier if remaining are out of interest
      if (NR-1 in srange) {
        # print sample range
        printf("Sample %d:", NR-1)

        # print channel range in sample line
        for (c = 0; c <= 3; c  ) {
          if (c in crange) {
            i = c * 2   1
            j = i   1
            printf("\t0x%s%s", $i, $j)
          }
        }
        print ""
      }
    }
  '
fi
  • The function expn expands the string e.g. 0,7,23-25 into an array 0,7,23,24,25. You can even omit the start/end number such as -10,100- which expands to 0-10,100-lastline.
  • As the indexes are created as many as the specified range, it may be slow if the file is large.
  • The bash variable lines is assigned to the total (maximum) line number and passed to awk.
  • As it is not recommended to embed a filename in the script, I've modified the usage to pass the filename as the last argument.

Example of execution:

$ ./sample12.sh -s 1,3,5,7-9 -c 0,2-3 samples.bin
                Ch0     Ch2     Ch3
Sample 1:       0x4b03  0x1e03  0x0904
Sample 3:       0x1e03  0x3303  0xad03
Sample 5:       0xe003  0x3403  0xc403
Sample 7:       0x1003  0x4203  0x5803
Sample 8:       0x2303  0x5703  0x6203
Sample 9:       0x1703  0x3103  0x3303
  • Related