Home > database >  How to convert between the unicode forms: string, name, number
How to convert between the unicode forms: string, name, number

Time:12-10

I have been lately using unicode more often and wondered if there is a command line tool to convert unicode between its forms.

Would be nice to be able to say:

uni_convert "☃" --string

And know that the string is defined in unicode as a "SNOWMAN".

CodePudding user response:

uni_convert(){
   perl -Mutf8 -CDAS -Mcharnames=:full -E '
      use strict;
      use warnings;
      use Getopt::Long;
      use Mojo::Util    qw/ dumper /;
      use List::Util    qw/ max /;


      #-----------------------------------------------------------------------
      #                              COLORS
      #-----------------------------------------------------------------------

      my $RED     = "\e[31m";
      my $YELLOW  = "\e[33m";
      my $TEAL    = "\e[35m";
      my $BG_GREY = "\e[100m";
      my $RESTORE = "\e[0m";


      #-----------------------------------------------------------------------
      #                                SUBS
      #-----------------------------------------------------------------------

      sub define_spec {
         {
            "name"         => {
                                 desc      => "Input is a name.",
                                 exclusive => 1,

                              },
            "string"       => {
                                 desc      => "Input is a string.",
                                 exclusive => 1,
                              },
            "num"          => {
                                 desc      => "Input is a number.",
                                 exclusive => 1,
                              },
            "help"         => {
                                 desc      => "Show this help section.",
                              },
            "debug"        => {
                                 desc      => "Show debugging information.",
                              },
            "list_options" => {
                                 desc      => "List available options.",
                              },
         }
      }

      sub build_spec_names {
         keys define_spec()->%*;
      }

      sub build_exclusive_options {
      my $spec = define_spec();
         grep { $spec->{$_}{exclusive} } keys %$spec;
      }

      sub list_options {
         say for
         sort
         map {
            s/ (?=^\w{2,}) /--/x;   # Long options.
            s/ (?=^\w$)     /-/x;   # Short options.
            $_;
         }
         map {
            split /\|/, $_
         }
         keys define_spec()->%*;

         exit 1;
      }

      sub build_help_options {
         my $spec   = define_spec();
         my $indent = " " x 6;

         join "\n$indent", map {
            my $opt = join ", ",
            map {
               s/ (?=^\w{2,}) /--/x;   # Long options.
               s/ (?=^\w$)     /-/x;   # Short options.
               $_;
            }
            split /\|/, $_;

            sprintf "%-20s %s", $opt, $spec->{$_}{desc};
         } sort keys %$spec;
      }

      sub show_help {

         my $YELLOW  = "\e[33m";
         my $RESTORE = "\e[0m";
         my $self    = "${YELLOW}uni_convert$RESTORE";
         my $options = build_help_options();

         say <<~HERE;

            $self [options]

            Options:
               $options
         HERE

         exit 1;
      }

      sub r {
         say dumper @_;
      }

      sub get_options {
         my $opts = {};

         GetOptions($opts, build_spec_names()) or die $!;

         r $opts if $opts->{debug}; 

         list_options() if $opts->{list_options};

         my @exclusive_opts = build_exclusive_options();

         show_help() if not keys %$opts
            or $opts->{help}
            or 1 != grep {defined} @$opts{ @exclusive_opts }
            or not @ARGV;

         $opts;
      }

      sub get_input {
         @ARGV;
      }

      sub get_name {
         my ($opts,$in) = @_;

         my $name = do {
            if(   $opts->{name})   {          uc $in  }
            elsif($opts->{string}) { string2name($in) }
            elsif($opts->{num})    {   code2name($in) }
            else{ show_help() }
         };

         unless ( defined $name ) {
            die "${YELLOW}Name cannot be determined from: $RED$in$RESTORE\n\n";
         }

         $name;
      }

      sub name_2_line {
         my ($name) = @_;
         my $string  = name2string($name) // die "${YELLOW}Invalid name: $RED$name$RESTORE\n\n";
         my $uni     = name2uni($name);
         my $hex     = name2hex($name);
         my $dec     = name2dec($name);

         # Make white space and escapes easier to see.
         $string =~ s/[\s\p{xPosixCntrl}]/ /g;

         [
            $string,
            $name,
            $uni,
            $hex,
            $dec,
         ];
      }

      sub get_max_length {
         my $last_row    = $#_;
         my $last_column = $_[0]->$#*;

         my @max = map {
            my $col = $_;
            max map {
               length $_[$_][$col];
            } 0 .. $last_row;
         } 0 .. $last_column;

         \@max;
      }

      sub define_raw_format {
         "$RED%%%ss $YELLOW%%-%ss$RESTORE $BG_GREY%%-%ss %%-%ss %%%ss$RESTORE\n";
      }

      sub render_lines {
         my $max    = get_max_length(@_);
         my $format = sprintf define_raw_format(), @$max;

         for ( @_ ) {
            printf $format, @$_;
         }
      }

      sub string2name{ charnames::viacode ord shift }          # ❄                      -> SNOWFLAKE
      sub code2name  { charnames::viacode( shift )  }          # U 2744, 0x2744, 10052  -> SNOWFLAKE
      sub name2string{ charnames::string_vianame( shift ) }    # SNOWFLAKE              -> ❄
      sub name2uni   { sprintf "U %x", name2dec( shift )  }    # SNOWFLAKE              -> U 2744
      sub name2hex   { sprintf "%#x",  name2dec( shift )  }    # SNOWFLAKE              -> 0x2744
      sub name2dec   { charnames::vianame( shift ) }           # SNOWFLAKE              -> 10052
                     

      #-----------------------------------------------------------------------
      #                                MAIN
      #-----------------------------------------------------------------------

      my $opts    = get_options();
      my ($input) = get_input();
      my @process = $opts->{string} ? (split //, $input//"") : ($input);
      my @lines;
   
      say "";
      for ( @process ) {
         my $name = get_name($opts,$_);   # Normalize to a name.
         push @lines, name_2_line($name); # Build other parts.
      }
      render_lines @lines;                # Format and render.
      say "";

      #-----------------------------------------------------------------------
      #                                END
      #-----------------------------------------------------------------------

   ' -- "$@"
}

CodePudding user response:

Here is an awk to do that.

Download this file from unicode.org that provides the latest names.

Then:

q=$(printf '%x\n' \'☃)
awk '/^[[:xdigit:]] /{
    str=$0
    sub(/^[[:xdigit:]] [[:blank:]] /,"",str)
    names[$1]=str
}
END{ print names[q] }
' q="$q" names.txt

Prints:

SNOWMAN

If you want to go the other way:

cp=$(awk '/^[[:xdigit:]] /{
    str=$0
    sub(/^[[:xdigit:]] [[:blank:]] /,"",str)
    other_names[str]=$1
}
END{ print other_names[q] }
' q="SNOWMAN" names.txt)

echo -e "\u${cp}"

Prints:

If you have GNU awk you can easily convert the hex index into decimal and can print from within. This allows a single source file to be used and go one way or the other by defining q or r:

gawk '/^[[:xdigit:]] /{
    str=$0
    sub(/^[[:xdigit:]] [[:blank:]] /,"",str)
    names[$1]=str
    other_names[str]=$1
}
END{ print q ? names[q] : sprintf("%c", strtonum("0x" other_names[r])) }
' r='SNOWMAN' names.txt
☃

gawk '/^[[:xdigit:]] /{
    str=$0
    sub(/^[[:xdigit:]] [[:blank:]] /,"",str)
    names[$1]=str
    other_names[str]=$1
}
END{ print q ? names[q] : sprintf("%c", strtonum("0x" other_names[r])) }
' q=$(printf '%x\n' \'☃) names.txt
SNOWMAN
  • Related