In this code I want to make Kruskal's algorithm, which calculates a minimum spanning tree of a given graph. And I want to use min-heap and disjoint set at the code.
To make time complexity of O(e log n)
, where e is the number of edges and n is the number of vertices in the graph, I will use heap and disjoint set trees.
So the method I went with was:
- Check the numbers of vertices and edges in the given input file and make parent array and struct edge which can include at most
10000
vertices and50000000
edges. - Sort the edges by the weight in a min heap in descending order.
- Take out edges from min heap one by one and check whether it makes cycle until min heap is empty
- If the number of edges selected is
vertices-1
(if all vertices already connected ) break the while loop and print each edges and sum of weights. If all vertices can make minimum spanning tree it prints connected and if all vertices can not make minimum spanning tree it prints disconnected.
input (example)
7
9
0 1 28
0 5 10
1 2 16
1 6 14
2 3 12
3 4 22
3 6 18
4 5 25
4 6 24
result
0 5 10
2 3 12
1 6 14
1 2 16
3 4 22
4 5 25
99
CONNECTED
But I am suspicious whether min-heap in this code could store at most 50000000 edges! I just used dynamic allocation which allocates the numbers of edges in input file. In the above example, 9 is the numbers of edges but I am not sure it works when it is increased to 50000000. Or should I use dynamic allocation like below?
minheap=(edge*)malloc(sizeof(edge)*50000000);
It is the code I made! Can you give me help or advice ?
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<time.h>
#define maxvertice 10000
#define maxedge 50000000
typedef struct edge {//structure to store vertices and weight
int a,b ;
int w;
}edge;
int n=0; //numbers of edge in the minheap
int *parent;
//array to represent disjoint sets! parent which stores the vertice connected
//if it is not connected(independent) it's parent is -1
edge *minheap;
void insertheap(edge item, int *n); // arrange edges by the weight in descending order
edge deleteheap( int *n); //popping out from the root (in descending order)
void makeunion(int x, int y);
int findparent(int i);
int main(int argc, char* argv[]){
double start,end ;
int i, nv, ne, sumofweight=0;
int cnt_edge=0 ;
//////////////
if(argc !=2)
{
printf("usage: ./hw3 input_filename\n");
return 0;
}
FILE *fp= fopen(argv[1],"r");
if(fp==NULL){
printf("The input file does not exist.\n");
return 0 ;
}
// FILE *result= fopen("hw3_result.txt", "w");
FILE *result= fopen("hw3_result.txt","w");
start=(double)clock()/CLOCKS_PER_SEC;
fscanf(fp, "%d" , &nv);
fscanf(fp, "%d" , &ne);
parent= (int*)malloc(sizeof(int)*nv);
for(i=0 ; i<nv; i ){
parent[i]=-1;
}
minheap=(edge*)malloc(sizeof(edge)*ne);
for(i= 0 ; i< ne; i ){
int firstv, secondv, weight ;
edge newedge ;
fscanf(fp , "%d %d %d", &firstv, &secondv, &weight);
newedge.a=firstv;
newedge.b=secondv;
newedge.w=weight;
// get vertices and edge's weight from the input file and put in heap
insertheap(newedge, &n);
}
while(n>0){//pop out from the heap until mst is completed
edge item= deleteheap(&n);
int par1, par2;
int a= item.a;
int b= item.b;
par1= findparent(a);
par2= findparent(b);
if(par1!=par2){
makeunion(par1,par2);
fprintf(result,"%d %d %d\n", item.a, item.b, item.w);
printf("%d %d %d\n", item.a , item.b, item.w);
cnt_edge= cnt_edge 1;
sumofweight =item.w;
}
if(cnt_edge==nv-1)break ;
}
if(cnt_edge==nv-1){
// fprintf(result, "CONNECTED");
printf("%d\n", sumofweight);
printf("CONNECTED\n");
fprintf(result, "%d\n", sumofweight);
fprintf(result, "CONNECTED\n");
}
if(cnt_edge<nv-1){
// fprintf(result, "DISCONNECTED");
printf("DISCONNECTED\n");
fprintf(result,"DISCONNECTED\n");
}
end=(((double)clock())/CLOCKS_PER_SEC);
fclose(fp);
free(parent);
free(minheap);
// fclose(result);
// printf("output written to hw3_result.txt.\n");
fclose(result);
printf("output written to hw3_result.txt.\n");
printf("running time: ", (end-start));
printf(" seconds\n");
}
void makeunion(int x, int y){
parent[x]=y;
}
int findparent(int i ){
for(; parent[i]>=0; i=parent[i]);
return i ;
}
void insertheap(edge item, int *n){
int i ;
i =*n;
(*n) ;
while((i!=0)&&(item.w<minheap[i/2].w)){
minheap[i]=minheap[i/2];
i/=2 ;
}
minheap[i]=item ;
// printf("to test : the wieght %d is inserted in :%d \n",item.w, i );
}
edge deleteheap( int *n){
int parent, child ;
parent= 0;
child=1;
edge item, temp ;
item= minheap[0];
temp= minheap[(*n)-1];
(*n)--;
while(child<=*n){
if((child<*n)&&(minheap[child].w>minheap[child 1].w))child ;
if(temp.w<=minheap[child].w)break ;
minheap[parent]=minheap[child];
parent=child ;
child*=2;
}
minheap[parent]=temp ;
return item;
}
CodePudding user response:
I'm assuming that your code works as intended and you are only concerned about memory management issues.
I am suspicious whether min-heap in this code could store at most 50000000 edges
Your edge
structure should have a size of either 12 or 16 bytes (if the compiler decides to align it to 8 bytes, and assuming sizeof(int) == 4
which is true on most systems nowdays). In this case, with 50M edges this would mean 50M × 16 = around 763MiB of memory. Assuming your system has enough memory and your application is not restricted in some way, although this is a lot, it seems perfectly doable.
In the above example, 9 is the numbers of edges but I am not sure it works when it is increased to 50000000. Or should I use dynamic allocation like below?
minheap=(edge*)malloc(sizeof(edge)*50000000);
You definitely don't want to allocate such a large amount of memory if you don't need it. Only allocate the needed amount after reading it from the file, like you are doing right now. However I see that you are not checking for errors after malloc
, and you definitely should:
minheap = malloc(sizeof(edge) * ne);
if (minheap == NULL) {
perror("malloc failed");
return 1;
}
Judging from the text of your problem however it seems like you can simplify this. If the IDs of your vertices are integers between 0
and 10000
then you can simply use a uint16_t
(alias for unsigned short int
on most platforms), which should be 2 bytes instead of 4 and is capable of holding values in the range [0, 65535]
. The same reasoning goes for your weight. So in the end, your structure could become:
struct edge {
uint16_t a, b, w;
};
This should already cut memory usage by 50% as the above structure should normally have a size of 6 (or 8 when aligned by the compiler). You can also use compiler specific directives like __attribute__((packed))
to make sure the compiler does not waste space for alignment and creates a 6 bytes structure. This can make memory access a little bit slower because of alignment, but will reduce the maximum size of your heap down to 50M × 6 = around 286MiB.
Note that to read uint16_t
with fscanf
you need to use the SCNu16
macro (from inttypes.h
) instead of %d
:
uint16_t firstv, secondv, weight ;
edge newedge;
fscanf(fp , "%"SCNu16" %"SCNu16" %"SCNu16, &firstv, &secondv, &weight);
CodePudding user response:
If the requested malloc size is too large, the runtime system will quickly let you know !
Just allocate what you need, allocating this large maximum size would be inefficient and in no way helpful.
Also understand that C could not define a specific limit on the allocatable size, as this is machine-dependent.