Home > Software engineering >  Remove rows of multidimensional structures
Remove rows of multidimensional structures

Time:02-16

I need to remove rows which represent dots of multidimensional structure, which are closest to each other.

For example, if structure is:

struct Point dot[100] = {
    {{1, 1, 1, 1}},
    {{2, 2, 2, 2}},
    {{1.3, 1.3, 1.3, 1.3}}
};

After applying Euclidean distance formula, distances are:

d12=sqrt((1-2)^2 (1-2)^2 (1-2)^2 (1-2)^2))=2

d13=sqrt((1-1.3)^2 (1-1.3)^2 (1-1.3)^2 (1-1.3)^2)=0.6

d23=sqrt((2-3)^2 (2-3)^2 (2-3)^2 (2-3)^2))=2

Closest points are the ones that represent row1 and row3, and they should be removed from structure.

OUTPUT should be: (2,2,2,2)

#include <stdio.h>
struct Point {
    double coordinate[100];
};
void remove_closest(struct Point dot[], int n, int dimension){
int i,j;
for(i=0;i<n;i  ){
    for(j=0;j<dimension-1;j  ){
      // 
    }
}
}
int main() {
struct Point dot[100] = {
    {{1, 1, 1, 1}},
    {{2, 2, 2, 2}},
    {{1.3, 1.3, 1.3, 1.3}}
};
 int i,j,dimension=4;
 remove_closest(dot, 3, dimension);

 for (i=0; i<3; i  ) {
    printf("(");
    for (j=0; j<dimension-1; j  )
        printf("%g,", dot[i].coordinate[j]);
    printf("%g)\n", dot[i].coordinate[dimension-1]);
}
    return 0;
}

Structures are new to me, and multidimensional array makes it even harder. Could you help me with this task?

More example of input/output

  • Note: auxiliary arrays are not allowed

CodePudding user response:

Okay, based on all of the criteria, here is what I'd do. I've generalized the code a bit. And I've renamed a few variables to be more descriptive. The code is annotated:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

// number of elements in an array
#define COUNTOF(_arr)           (sizeof(_arr) / sizeof(_arr[0]))

// maximum number of dimensions -- adjust to suit
#define MAXDIM      4

struct point {
    double coordinate[MAXDIM];
};

// prtpoint -- print single point
void
prtpoint(const struct point *ptcur,int ndim)
{
    int sep = '(';

    for (int idxdim = 0; idxdim < ndim;    idxdim) {
        printf("%c%g",sep,ptcur->coordinate[idxdim]);
        sep = ',';
    }

    printf(")\n");
}

// prtlist -- print all points
void
prtlist(const struct point *points,int npoint,int ndim,const char *title)
{

    if (title != NULL)
        printf("%s: %d\n",title,npoint);

    for (int idxdot = 0;  idxdot < npoint;    idxdot)
        prtpoint(&points[idxdot],ndim);
}

double
square(double x)
{

    return x * x;
}

double
euclidean_distance(const struct point *points,int ndim,int idxlo,int idxhi)
{
    double dist = 0.0;
    const struct point *ptlo = &points[idxlo];
    const struct point *pthi = &points[idxhi];

    // sum of the squares of all the dimensions
    for (int idxdim = 0;  idxdim < ndim;    idxdim)
        dist  = square(ptlo->coordinate[idxdim] - pthi->coordinate[idxdim]);

    dist = sqrt(dist);

    return dist;
}

// remove_closest -- remove closest points
int
remove_closest(struct point *points, int npoint, int ndim)
{
    int minlo = -1;
    int minhi = -1;
    double mindist = 0.0;

    // find the indexes of the two points that have the minimum distance
    for (int idxlo = 0;  idxlo < (npoint - 1);    idxlo) {
        for (int idxhi = idxlo   1;  idxhi < npoint;    idxhi) {
            double curdist = euclidean_distance(points,ndim,idxlo,idxhi);

            // (1) grab the first distance we calculate as the minimum
            // (2) get the "best" minimum
            if ((minlo < 0) || (curdist < mindist)) {
                minlo = idxlo;
                minhi = idxhi;
                mindist = curdist;
                continue;
            }
        }
    }

    // strip the two closest entries
    struct point *ptsrc = points;
    struct point *ptdst = points;
    for (int idxcur = 0;  idxcur < npoint;    idxcur,   ptsrc) {
        // skip either of the two lowest points
        if ((idxcur == minlo) || (idxcur == minhi)) {
#ifdef DEBUG
            printf("REMOVE: %d ",idxcur);
            prtpoint(ptsrc,ndim);
#endif
            continue;
        }

        // copy the point to fill the gap
        // NOTE: we _could_ just _always_ do the copy but doing this
        // conditionally is a speedup (i.e. avoid unnecessary copies)
        if (ptdst != ptsrc)
            *ptdst = *ptsrc;

          ptdst;
    }

    npoint -= 2;

    return npoint;
}

void
dolist(struct point *points,int npoint,int ndim)
{

    // should never happen [because if we get here, the compiler _should_
    // have complained in the definition of the array we've been passed]
    if (ndim > MAXDIM)
        exit(9);

    prtlist(points,npoint,ndim,"Before");
    npoint = remove_closest(points,npoint,ndim);
    prtlist(points,npoint,ndim,"After");
}

void
test01(void)
{
    struct point points[] = {
        {{1, 1, 1, 1}},
        {{2, 2, 2, 2}},
        {{1.3, 1.3, 1.3, 1.3}}
    };

    dolist(points,COUNTOF(points),4);
}

void
test02(void)
{
    struct point points[] = {
        // NOTE: duplicate points aren't handled the way this example implies
#if 0
        {{1, 1, 1, 1}},
#endif
        {{1, 1, 1, 1.00}},
        {{1, 1, 1, 1.2}},
        {{1, 1, 1, 1.145}},
        {{1, 1, 1, 1.31}},
        {{1, 1, 1, 1.1}},
    };

    dolist(points,COUNTOF(points),4);
}

int
main(void)
{

    test01();
    test02();

    return 0;
}

After compiling with -DDEBUG, here is the program output:

Before: 3
(1,1,1,1)
(2,2,2,2)
(1.3,1.3,1.3,1.3)
REMOVE: 0 (1,1,1,1)
REMOVE: 2 (1.3,1.3,1.3,1.3)
After: 1
(2,2,2,2)
Before: 5
(1,1,1,1)
(1,1,1,1.2)
(1,1,1,1.145)
(1,1,1,1.31)
(1,1,1,1.1)
REMOVE: 2 (1,1,1,1.145)
REMOVE: 4 (1,1,1,1.1)
After: 3
(1,1,1,1)
(1,1,1,1.2)
(1,1,1,1.31)

UPDATE:

thank you very much, could you just modify without typedef? – devec

Well, if you really wish ;-)

The original was:

typedef struct point {
    double coordinate[MAXDIM];
} point_t;

Note that if I wanted to be a "smarty pants", I could do this with a #define and still fulfill the requirement ;-):

struct point {
    double coordinate[MAXDIM];
};
#define point_t struct point

But, I've edited the above code to use just the normal struct. All I did was just do the simple struct definition. Then, just globally change all point_t into struct point. So, really not too difficult. (e.g.) Change:

double
euclidean_distance(const point_t *points,int ndim,int idxlo,int idxhi)

Into:

double
euclidean_distance(const struct point *points,int ndim,int idxlo,int idxhi)

UPDATE #2:

Although I touched on the briefly in the comments for test02 [taken from your linked data example], the code at present doesn't handle exact duplicate points too well. The data was the same for all points in all dimensions except the last one:

1
1.1
1.00
1.2
1.145
1.31
  1. As present, if we have two identical points, these are the ones that would be removed (i.e. their distance is 0.0). So, we'd remove 1 and 1.00
  2. This is contrary to your second example where the 1.1 and 1.00 points were removed.
  3. Even if we excluded/ignored points that are the same, it would be the first copy of 1.00 that would match. That is, the 1 point and the 1.1 point would be removed.
  4. I punted on this a bit by just removing one of the points that was identical in the input data.

After considering the above:

  1. In your example, the two points [you] removed were 1.1 and 1.00. The distance is: 0.1
  2. But, my code removed 1.1 and 1.145. The distance is: 0.045

So, I think your example was incorrect and you removed the wrong points. You may have to decide if this is a relevant issue for you.

  • Related