I used the code below but I know it's not computationally efficient, and I know that every pair should be compared only once, and the number of comparisons should be less with every iteration
Full_book_Dwt_diff=[]
for page_number in range(len(Full_book_DWT)):
compared_page_DWT=[]
for i in range(len(Full_book_DWT[page_number])):
DWT_diff_per_word=[]
for sub in range(len(Full_book_DWT)):
for j in range(len(Full_book_DWT[sub])):
Diff=np.absolute(Full_book_DWT[page_number][i]-Full_book_DWT[sub][j])
size=Diff.size
DWT_diff_per_word.append(((np.sum(Diff)/size),i,sub,j))
DWT_diff_per_word.sort(key=lambda x: x[0])
compared_page_DWT.append(DWT_diff_per_word)
Full_book_Dwt_diff.append(compared_page_DWT)
CodePudding user response:
Start the inner loop at page_number 1
so you don't compare the same elements twice.
Full_book_Dwt_diff = []
for page_number, page in enumerate(Full_book_DWT):
compared_page_DWT = []
for i, word in enumerate(page):
DWT_diff_per_word = []
for page_number2, page2 in enumerate(Full_book_DWT[page_number 1:], page_number 1):
for j, word2 in enumerate(page2):
Diff = np.absolute(word, word2)
size = Diff.size
DWT_diff_per_word.append(((np.sum(Diff)/size),i,page_number2,j))
DWT_diff_per_word.sort(key=lambda x: x[0])
compared_page_DWT.append(DWT_diff_per_word)
Full_book_Dwt_diff.append(compared_page_DWT)
CodePudding user response:
i think i explained it wrongly but this solution worked the best for me
word_counter=np.zeros((len(Full_book_DWT), 1))
page_counter=0
pairs=[]
for page_number in range(len(Full_book_DWT)):
page_counter =1
for i in range(len(Full_book_DWT[page_number])):
word_counter[page_number] =1
for sub in range(page_counter,len(Full_book_DWT)):
for j in range(int(word_counter[page_number]),len(Full_book_DWT[sub])):
Diff=np.absolute(Full_book_DWT[page_number][i]
-Full_book_DWT[sub][j])
size=Diff.size
pairs.append((page_number,i,sub,j,(np.sum(Diff)/size)))