I have sample.json file which includes code in the c lang dataset represented as a dictionary
{
"0_0":
"int curl_mvsprintf ( char * buffer , const char * format , va_list ap_save ) {\n int retcode ;\n retcode = dprintf_formatf ( & buffer , storebuffer , format , ap_save ) ;\n * buffer = 0 ;\n return retcode ;\n }",
"0_1":
"static int alloc_addbyter ( int output , FILE * data ) {\n struct asprintf * infop = ( struct asprintf * ) data ;\n unsigned char outc = ( unsigned char ) output ;\n if ( ! infop -> buffer ) {\n infop -> buffer = malloc ( 32 ) ;\n if ( ! infop -> buffer ) {\n infop -> fail = 1 ;\n return - 1 ;\n }\n infop -> alloc = 32 ;\n infop -> len = 0 ;\n }\n else if ( infop -> len 1 >= infop -> alloc ) {\n char * newptr ;\n newptr = realloc ( infop -> buffer , infop -> alloc * 2 ) ;\n if ( ! newptr ) {\n infop -> fail = 1 ;\n return - 1 ;\n }\n infop -> buffer = newptr ;\n infop -> alloc *= 2 ;\n }\n infop -> buffer [ infop -> len ] = outc ;\n infop -> len ;\n return outc ;\n }",
}
and wanna process it either using panda or python to get the following text for the classification task in python the separator should be tab '\t' and save it in sample.txt
if filename end with (0) like 0_0 then put lable1 (_0)
else lable2 like (0_1)or (_1)
label1 int curl_mvsprintf ( char * buffer , const char * format , va_list ap_save ) {\n int retcode ;\n retcode = dprintf_formatf ( & buffer , storebuffer , format , ap_save ) ;\n * buffer = 0 ;\n return retcode ;\n }
lable2 static int alloc_addbyter ( int output , FILE * data ) {\n struct asprintf * infop = ( struct asprintf * ) data ;\n unsigned char outc = ( unsigned char ) output ;\n if ( ! infop -> buffer ) {\n infop -> buffer = malloc ( 32 ) ;\n if ( ! infop -> buffer ) {\n infop -> fail = 1 ;\n return - 1 ;\n }\n infop -> alloc = 32 ;\n infop -> len = 0 ;\n }\n else if ( infop -> len 1 >= infop -> alloc ) {\n char * newptr ;\n newptr = realloc ( infop -> buffer , infop -> alloc * 2 ) ;\n if ( ! newptr ) {\n infop -> fail = 1 ;\n return - 1 ;\n }\n infop -> buffer = newptr ;\n infop -> alloc *= 2 ;\n }\n infop -> buffer [ infop -> len ] = outc ;\n infop -> len ;\n return outc ;\n }
Expected output after read txt file using panda
CodePudding user response:
like this?
import json
with open("sample.json", "r") as f:
sample_dict = json.load(f)
output_str = ""
for key, val in sample_dict.items():
if key.endswith("0"):
output_str = "label1\t"
else:
output_str = "label2\t"
output_str = val "\n"
with open("sample.txt", "w") as f:
f.write(output_str)