I have data with 3 columns. I need to compare this fields and if they are similar count them. If they are not similar then output information. I need to get this
run.py
from create_excel import create_excel, settings_merge, create_header
from functions import separate_first_raw, separate_and_check_all_fields
input_file = "test.csv"
output_file = "test.xlsx"
my_file = open(input_file, "r", encoding="utf-8")
workbook = create_excel(output_file)
worksheet = workbook.add_worksheet()
merge_format = settings_merge(workbook)
raw = 1
column = 0
test = 0
raw_domain = 1
raw_site = 1
sum_category = 1
raw_category = 1
first = "true"
create_header(worksheet)
if my_file:
current_line = my_file.readline()
first_line_value = current_line.split(",")
first_line_id = first_line_value[0]
first_line_domain = first_line_value[1]
first_line_value = first_line_value[2]
# worksheet.write(1, 0, first_line_id)
# worksheet.write(1, 1, first_line_domain)
# worksheet.write(1, 4, first_line_value)
raw = 2
for line in my_file:
previous_line = current_line
print(f'previous_line {previous_line}')
current_line = line
print(f'current {current_line}')
# print(f"Previous {previous_line}")
# print(f"Current {current_line}")
domain = separate_first_raw(previous_line, current_line, first)
site = separate_and_check_all_fields(previous_line, current_line, 1, first)
category = separate_and_check_all_fields(previous_line, current_line, 2, first)
#
# # for i in domain:
# # print(i)
# # if i != 0:
# # worksheet.merge_range(raw_domain, 0, raw, 0, i[0], merge_format)
# # print(f"Id: {i[0]} {raw_domain} and {raw}")
# # raw_domain = raw 1
# # for i in site:
# # if i != 0:
# # worksheet.write(raw, 1, i[1], merge_format)
# # raw_site = raw 1
for i in category:
if i != 0:
print(i)
if first == "true":
worksheet.write(raw, 3, first_line_value, merge_format)
worksheet.write(raw, 2, sum_category, merge_format)
else:
worksheet.write(raw, 3, i[2], merge_format)
worksheet.write(raw, 2, sum_category, merge_format)
raw_category = raw 1
sum_category = 1
raw = 1
first = "false"
else:
sum_category = 1
# else:
# print("Please input file")
workbook.close()
functions.py
def separate_first_raw(previous_line, current_line, test):
something = []
previous_list = previous_line.split(",")
current_list = current_line.split(",")
# if test == "true":
# something.append(previous_list)
# else:
if previous_list[0] != current_list[0]:
something.append(current_list)
else:
something.append(0)
return something
def separate_and_check_all_fields(previous_line, current_line, field, test):
something = []
previous_list = previous_line.split(",")
current_list = current_line.split(",")
if test == "true":
something.append(previous_list)
else:
if previous_list[field] != current_list[field] or \
previous_list[1] != current_list[1] or previous_list[0] != current_list[0]:
something.append(current_list)
else:
something.append(0)
return something
create_excel.py
import xlsxwriter
def create_excel(filename):
workbook = xlsxwriter.Workbook(filename)
return workbook
def create_header(worksheet):
worksheet.write(0, 0, "id")
worksheet.write(0, 1, "domain")
worksheet.write(0, 2, "firewall")
worksheet.write(0, 3, "category")
def settings_merge(workbook):
merge_format = workbook.add_format({
'bold': 1,
'border': 1,
'align': 'center',
'valign': 'vcenter'})
return merge_format
CodePudding user response: