I have been given an n
length string and I need to find its rank in the alphabetical ordering of all the n
length strings. Like for example, let's say I have been given,
"ABC"
then as n=3
then ordering will be as {ABC, ABD, ABE ....}
thus the rank of "ABC"
is 0
(considering we start from 0). Similarly, for "ABD"
it will be 1. Now,
how do I find the rank of "ZXY"
?
On first sight, it looks a recursive problem where we can create a dictionary for the given length of the string (like n=3
for "ZXY"
and dict={ABC, ABD, ....}
) and then search for the string in the dictionary and return the index as it dict
is ordered. My problem is how do I code this up? and Is there a better way to go about this?
Edit: the listing in the question is the alphabetical listing of all n-length strings of letters from {A,...,Z}
composed of distinct letters.
CodePudding user response:
Okay, as I mentioned above this is a "print all base 26 numbers that are n
digits wide" problem.
Although we can work with strings (e.g. ABC
, DEF
, etc.) it is easier [IMO] to convert to base 26 numbers.
You want the digits in the number to be unique. That is, combinations rather than permutations of the digits.
Permutations is actually easier, so let's start with that ...
Here is a program that does brute force generation of all possible permutations. It is [heavily] annotated:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define sysfault(_fmt...) \
do { \
printf(_fmt); \
exit(1); \
} while (0)
int ndig; // number of digits in string
// the alphabet
#define ALFMAX 26
const char *alf = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
int opt_v; // verbose mode
// diginc -- increment the base 26 number (ndig digits wide)
// RETURNS: carry out
int
diginc(char *digits)
// digits -- base 26 number (ndig digits wide)
{
int cout = 0;
for (int idx = ndig - 1; idx >= 0; --idx) {
int curdig = digits[idx];
curdig = 1;
// did we overflow/wrap the digit?
cout = (curdig >= ALFMAX);
// no -- store new number and exit
if (! cout) {
digits[idx] = curdig;
break;
}
// wrap digit back to zero
digits[idx] = 0;
}
return cout;
}
// digdecode -- printable string (A-Z) to base 26 number (0-25)
void
digdecode(char *digits,const char *str)
// digits -- base 26 number (ndig digits wide)
// str -- string (A-Z)
{
for (int idx = 0; idx < ndig; idx)
digits[idx] = str[idx] - 'A';
}
// digprint -- convert base 26 number to printable form (e.g. A-Z)
// RETURNS: pointer to string
char *
digprint(const char *digits)
// digits -- base 26 number (ndig digits wide)
{
int idx;
static char str[1000];
for (idx = 0; idx < ndig; idx)
str[idx] = digits[idx] 'A';
str[idx] = 0;
return str;
}
// digmatch -- compare two base 26 numbers for equality
// RETURNS: 1=match, 0=mismatch
int
digmatch(const char *diglhs,const char *digrhs)
// diglhs -- base 26 number (ndig digits wide)
// digrhs -- base 26 number (ndig digits wide)
{
int match = 0;
for (int idx = 0; idx < ndig; idx) {
match = (diglhs[idx] == digrhs[idx]);
if (! match)
break;
}
return match;
}
int
main(int argc,char **argv)
{
--argc;
argv;
setlinebuf(stdout);
// get options
for (; argc > 0; --argc, argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp = 2;
switch (cp[-1]) {
case 'v':
opt_v = ! opt_v;
break;
}
}
// candidate string (e.g. ABD or ZYX)
const char *str;
ndig = -1;
switch (argc) {
case 2:
ndig = atoi(argv[1]);
// fall through
case 1:
str = argv[0];
break;
default:
sysfault("usage: <str> [n]\n");
break;
}
// number of digits in string
if (ndig < 0)
ndig = strlen(str);
printf("ndig=%d str='%s'\n",ndig,str);
// convert string to base 26 digit vector
char expdig[ndig];
digdecode(expdig,str);
// sequence index
int seqidx = 0;
// current base 26 number
char curdig[ndig];
// initialize to all zeros
for (int idx = 0; idx < ndig; idx)
curdig[idx] = 0;
while (1) {
// does the current digit vect match the expected/desired vector?
int match = digmatch(curdig,expdig);
// yes [or verbose mode] -- print the entry
if (match || opt_v) {
printf("d: %s",seqidx,digprint(curdig));
if (match)
printf(" MATCH");
printf("\n");
}
// increment the base 26 number
if (diginc(curdig))
break;
seqidx;
}
return 0;
}
Here is the output for ABD
:
ndig=3 str='ABD'
29: ABD MATCH
Here is the output for ZYX
:
ndig=3 str='ZYX'
17547: ZYX MATCH
Note although a combinatorics guru might have a better/faster algorithm, we can get get combinations by calculating a frequency table and rejecting any numbers that have duplicate digits. So, this is a slight extension of the previous example:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define sysfault(_fmt...) \
do { \
printf(_fmt); \
exit(1); \
} while (0)
int ndig; // number of digits in string
// the alphabet
#define ALFMAX 26
const char *alf = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
int opt_u; // unique (combination) mode
int opt_v; // verbose mode
// freqtab -- create frequency table
// RETURNS: 1=duplicate detected
int
freqtab(const char *digits,int *freq)
{
int dupflg = 0;
for (int idx = 0; idx < ndig; idx) {
int curdig = digits[idx];
dupflg = freq[curdig];
if (dupflg)
break;
freq[curdig] = 1;
}
return dupflg;
}
// reject -- reject number if digits are duplicated
// RETURNS: 1=reject/dup, 0=unique
int
reject(const char *digits,int *freq)
{
if (freq == NULL) {
freq = alloca(sizeof(int) * ALFMAX);
for (int idx = 0; idx < ALFMAX; idx)
freq[idx] = 0;
}
// create frequency table
int dupflg = freqtab(digits,freq);
return dupflg;
}
// diginc -- increment the base 26 number (ndig digits wide)
// RETURNS: carry out
int
diginc(char *digits)
// digits -- base 26 number (ndig digits wide)
{
int cout = 0;
for (int idx = ndig - 1; idx >= 0; --idx) {
int curdig = digits[idx];
curdig = 1;
// did we overflow/wrap the digit?
cout = (curdig >= ALFMAX);
// no -- store new number and exit
if (! cout) {
digits[idx] = curdig;
break;
}
// wrap digit back to zero
digits[idx] = 0;
}
return cout;
}
// digdecode -- printable string (A-Z) to base 26 number (0-25)
void
digdecode(char *digits,const char *str)
// digits -- base 26 number (ndig digits wide)
// str -- string (A-Z)
{
for (int idx = 0; idx < ndig; idx)
digits[idx] = str[idx] - 'A';
}
// digprint -- convert base 26 number to printable form (e.g. A-Z)
// RETURNS: pointer to string
char *
digprint(const char *digits)
// digits -- base 26 number (ndig digits wide)
{
int idx;
static char str[1000];
for (idx = 0; idx < ndig; idx)
str[idx] = digits[idx] 'A';
str[idx] = 0;
return str;
}
// digmatch -- compare two base 26 numbers for equality
// RETURNS: 1=match, 0=mismatch
int
digmatch(const char *diglhs,const char *digrhs)
// diglhs -- base 26 number (ndig digits wide)
// digrhs -- base 26 number (ndig digits wide)
{
int match = 0;
for (int idx = 0; idx < ndig; idx) {
match = (diglhs[idx] == digrhs[idx]);
if (! match)
break;
}
return match;
}
int
main(int argc,char **argv)
{
--argc;
argv;
setlinebuf(stdout);
// get options
for (; argc > 0; --argc, argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp = 2;
switch (cp[-1]) {
case 'u':
opt_u = (*cp != 0) ? atoi(cp) : 1;
break;
case 'v':
opt_v = (*cp != 0) ? atoi(cp) : 1;
break;
}
}
// candidate string (e.g. ABD or ZYX)
const char *str;
ndig = -1;
switch (argc) {
case 2:
ndig = atoi(argv[1]);
// fall through
case 1:
str = argv[0];
break;
default:
sysfault("usage: <str> [n]\n");
break;
}
// number of digits in string
if (ndig < 0)
ndig = strlen(str);
printf("ndig=%d str='%s'\n",ndig,str);
// convert string to base 26 digit vector
char expdig[ndig];
digdecode(expdig,str);
// sequence index
int seqidx = 0;
// current base 26 number
char curdig[ndig];
// initialize to all zeros
for (int idx = 0; idx < ndig; idx)
curdig[idx] = 0;
int rejflg = 0;
int match = 0;
while (1) {
if (opt_u)
rejflg = reject(curdig,NULL);
// does the current digit vect match the expected/desired vector?
if (rejflg)
match = 0;
else
match = digmatch(curdig,expdig);
// yes [or verbose mode] -- print the entry
do {
// don't print rejections
if (opt_v < 2) {
if (rejflg)
break;
}
// don't print unless we have a match or using verbose mode
if (! (match || opt_v))
break;
printf("d: %s",seqidx,digprint(curdig));
if (match)
printf(" MATCH");
if (rejflg)
printf(" REJECT");
printf("\n");
} while (0);
// increment the base 26 number
if (diginc(curdig))
break;
// increment sequence index if (unique)
seqidx = (! rejflg) || (opt_v >= 2);
}
return 0;
}
Here is the output for -u ABD
:
ndig=3 str='ABD'
1: ABD MATCH
Here is the output for -u ZYX
:
ndig=3 str='ZYX'
15599: ZYX MATCH
Note that the above is a bit inefficient. That's because the frequency table is recalculated from scratch on each iteration.
Note that freqtab
and reject
take an extra freq
argument. That's planning for the future.
If we pass down an actual array, with some modifications to the functions and two additional ones, we can quickly adjust the frequency table without having to create it from scratch each time.
CodePudding user response:
Craig Estey already came up with a wonderful answer, but I thought I'd add to his permutation example. If you want to find the alphabetical place in which an n-length word goes, you don't have to increment the base 26 number.
Here is some code that finds the correct index in which the n-length word would go:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
/* Convert base 26 to base 10
* Base 26 is represented here as A (0), B (1) ... Z (25)
*/
unsigned ts_to_ten(char *buf, size_t len)
{
unsigned ret = 0;
for (size_t i = 0; i < len; i ) {
ret *= 26;
ret = (buf[i] - 'a');
}
return ret;
}
int main(int argc, char **argv)
{
if (argc != 3) {
fprintf(stderr, "fatal: requires 3 arguments.\n");
exit(EXIT_FAILURE);
}
char *start_text = argv[1];
char *end_text = argv[2];
size_t n;
assert((n = strlen(start_text)) == strlen(end_text));
unsigned start = ts_to_ten(start_text, n);
unsigned end = ts_to_ten(end_text, n);
printf("%u\n", end - start);
}
This program takes in the starting word and the ending word. The starting word is the word that is considered the first iteration (or 0), or 'abc' in the question's case. The ending word would be the word you want to get the iteration of, or 'zxy' according to the question.
So, if you convert both words into a number and subtract the start from the end, you will obtain how far the end word needs to be from the start word in alphabetical order.