I need to remove rows which represent dots of multidimensional structure, which are closest to each other.
For example, if structure is:
struct Point dot[100] = {
{{1, 1, 1, 1}},
{{2, 2, 2, 2}},
{{1.3, 1.3, 1.3, 1.3}}
};
After applying Euclidean distance formula, distances are:
d12=sqrt((1-2)^2 (1-2)^2 (1-2)^2 (1-2)^2))=2
d13=sqrt((1-1.3)^2 (1-1.3)^2 (1-1.3)^2 (1-1.3)^2)=0.6
d23=sqrt((2-3)^2 (2-3)^2 (2-3)^2 (2-3)^2))=2
Closest points are the ones that represent row1 and row3, and they should be removed from structure.
OUTPUT should be: (2,2,2,2)
#include <stdio.h>
struct Point {
double coordinate[100];
};
void remove_closest(struct Point dot[], int n, int dimension){
int i,j;
for(i=0;i<n;i ){
for(j=0;j<dimension-1;j ){
//
}
}
}
int main() {
struct Point dot[100] = {
{{1, 1, 1, 1}},
{{2, 2, 2, 2}},
{{1.3, 1.3, 1.3, 1.3}}
};
int i,j,dimension=4;
remove_closest(dot, 3, dimension);
for (i=0; i<3; i ) {
printf("(");
for (j=0; j<dimension-1; j )
printf("%g,", dot[i].coordinate[j]);
printf("%g)\n", dot[i].coordinate[dimension-1]);
}
return 0;
}
Structures are new to me, and multidimensional array makes it even harder. Could you help me with this task?
More example of input/output
- Note: auxiliary arrays are not allowed
CodePudding user response:
Okay, based on all of the criteria, here is what I'd do. I've generalized the code a bit. And I've renamed a few variables to be more descriptive. The code is annotated:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// number of elements in an array
#define COUNTOF(_arr) (sizeof(_arr) / sizeof(_arr[0]))
// maximum number of dimensions -- adjust to suit
#define MAXDIM 4
struct point {
double coordinate[MAXDIM];
};
// prtpoint -- print single point
void
prtpoint(const struct point *ptcur,int ndim)
{
int sep = '(';
for (int idxdim = 0; idxdim < ndim; idxdim) {
printf("%c%g",sep,ptcur->coordinate[idxdim]);
sep = ',';
}
printf(")\n");
}
// prtlist -- print all points
void
prtlist(const struct point *points,int npoint,int ndim,const char *title)
{
if (title != NULL)
printf("%s: %d\n",title,npoint);
for (int idxdot = 0; idxdot < npoint; idxdot)
prtpoint(&points[idxdot],ndim);
}
double
square(double x)
{
return x * x;
}
double
euclidean_distance(const struct point *points,int ndim,int idxlo,int idxhi)
{
double dist = 0.0;
const struct point *ptlo = &points[idxlo];
const struct point *pthi = &points[idxhi];
// sum of the squares of all the dimensions
for (int idxdim = 0; idxdim < ndim; idxdim)
dist = square(ptlo->coordinate[idxdim] - pthi->coordinate[idxdim]);
dist = sqrt(dist);
return dist;
}
// remove_closest -- remove closest points
int
remove_closest(struct point *points, int npoint, int ndim)
{
int minlo = -1;
int minhi = -1;
double mindist = 0.0;
// find the indexes of the two points that have the minimum distance
for (int idxlo = 0; idxlo < (npoint - 1); idxlo) {
for (int idxhi = idxlo 1; idxhi < npoint; idxhi) {
double curdist = euclidean_distance(points,ndim,idxlo,idxhi);
// (1) grab the first distance we calculate as the minimum
// (2) get the "best" minimum
if ((minlo < 0) || (curdist < mindist)) {
minlo = idxlo;
minhi = idxhi;
mindist = curdist;
continue;
}
}
}
// strip the two closest entries
struct point *ptsrc = points;
struct point *ptdst = points;
for (int idxcur = 0; idxcur < npoint; idxcur, ptsrc) {
// skip either of the two lowest points
if ((idxcur == minlo) || (idxcur == minhi)) {
#ifdef DEBUG
printf("REMOVE: %d ",idxcur);
prtpoint(ptsrc,ndim);
#endif
continue;
}
// copy the point to fill the gap
// NOTE: we _could_ just _always_ do the copy but doing this
// conditionally is a speedup (i.e. avoid unnecessary copies)
if (ptdst != ptsrc)
*ptdst = *ptsrc;
ptdst;
}
npoint -= 2;
return npoint;
}
void
dolist(struct point *points,int npoint,int ndim)
{
// should never happen [because if we get here, the compiler _should_
// have complained in the definition of the array we've been passed]
if (ndim > MAXDIM)
exit(9);
prtlist(points,npoint,ndim,"Before");
npoint = remove_closest(points,npoint,ndim);
prtlist(points,npoint,ndim,"After");
}
void
test01(void)
{
struct point points[] = {
{{1, 1, 1, 1}},
{{2, 2, 2, 2}},
{{1.3, 1.3, 1.3, 1.3}}
};
dolist(points,COUNTOF(points),4);
}
void
test02(void)
{
struct point points[] = {
// NOTE: duplicate points aren't handled the way this example implies
#if 0
{{1, 1, 1, 1}},
#endif
{{1, 1, 1, 1.00}},
{{1, 1, 1, 1.2}},
{{1, 1, 1, 1.145}},
{{1, 1, 1, 1.31}},
{{1, 1, 1, 1.1}},
};
dolist(points,COUNTOF(points),4);
}
int
main(void)
{
test01();
test02();
return 0;
}
After compiling with -DDEBUG
, here is the program output:
Before: 3
(1,1,1,1)
(2,2,2,2)
(1.3,1.3,1.3,1.3)
REMOVE: 0 (1,1,1,1)
REMOVE: 2 (1.3,1.3,1.3,1.3)
After: 1
(2,2,2,2)
Before: 5
(1,1,1,1)
(1,1,1,1.2)
(1,1,1,1.145)
(1,1,1,1.31)
(1,1,1,1.1)
REMOVE: 2 (1,1,1,1.145)
REMOVE: 4 (1,1,1,1.1)
After: 3
(1,1,1,1)
(1,1,1,1.2)
(1,1,1,1.31)
UPDATE:
thank you very much, could you just modify without typedef? – devec
Well, if you really wish ;-)
The original was:
typedef struct point {
double coordinate[MAXDIM];
} point_t;
Note that if I wanted to be a "smarty pants", I could do this with a #define
and still fulfill the requirement ;-):
struct point {
double coordinate[MAXDIM];
};
#define point_t struct point
But, I've edited the above code to use just the normal struct
. All I did was just do the simple struct
definition. Then, just globally change all point_t
into struct point
. So, really not too difficult. (e.g.) Change:
double
euclidean_distance(const point_t *points,int ndim,int idxlo,int idxhi)
Into:
double
euclidean_distance(const struct point *points,int ndim,int idxlo,int idxhi)
UPDATE #2:
Although I touched on the briefly in the comments for test02
[taken from your linked data example], the code at present doesn't handle exact duplicate points too well. The data was the same for all points in all dimensions except the last one:
1
1.1
1.00
1.2
1.145
1.31
- As present, if we have two identical points, these are the ones that would be removed (i.e. their distance is 0.0). So, we'd remove
1
and1.00
- This is contrary to your second example where the
1.1
and1.00
points were removed. - Even if we excluded/ignored points that are the same, it would be the first copy of
1.00
that would match. That is, the1
point and the1.1
point would be removed. - I punted on this a bit by just removing one of the points that was identical in the input data.
After considering the above:
- In your example, the two points [you] removed were
1.1
and1.00
. The distance is:0.1
- But, my code removed
1.1
and1.145
. The distance is:0.045
So, I think your example was incorrect and you removed the wrong points. You may have to decide if this is a relevant issue for you.