Efficiently splitting an array of integers into subarrays of their prime divisors so that no two sub-CodePudding

I am currently trying to split an integer array into sub arrays so that:

No two different sub arrays contain elements that have any common prime divisors.
Elements that share prime divisors are placed into the same sub array.

e.g. an array contatining 2, 3, 6, 7, 9, 14, 19 would be split into two arrays [2, 3, 6, 7, 9, 14] and [19].

I've figured out a working solution, but i think it's crude and inefficient and I was wondering if there was a way to optimize it.

The solution:

public class SOExample {

    private final Map<Integer, Set<Integer>> integersPrimesMap = new HashMap<>();
    private final List<Set<Integer>> primesSubArrays = new ArrayList<>();
    private final Set<Integer> usedIntegers = new HashSet<>();

    public SOExample(int[] arr) {
        for (int i : arr) {
            calculatePrimeFactors(i);
        }
        for (Map.Entry<Integer, Set<Integer>> x : integersPrimesMap.entrySet()) {
            primesSubArrays.add(x.getValue());
        }
        // Combine sub arrays which contain overlapping elements
        Iterator<Set<Integer>> i = primesSubArrays.iterator();
        while (i.hasNext()) {
            Set<Integer> x = i.next();
            for (Set<Integer> j : primesSubArrays) {
                if (x != j && !Collections.disjoint(x, j)) {
                    j.addAll(x);
                    i.remove();
                    break;
                }
            }
        }
        // figure out which integer belongs to which sub array from their prime divisors in integersPrimesMap and create sub arrays of integers.
    }

    private void calculatePrimeFactors(int n) {
        if (usedIntegers.contains(n)) {
            return;
        }
        usedIntegers.add(n);
        Set<Integer> factors = new HashSet<>();
        if (n % 2 == 0) {
            factors.add(2);
            n = n / 2;
        }
        while (n % 2 == 0) {
            n = n / 2;
        }

        for (int i = 3; i <= Math.sqrt(n); i = i   2) {
            if (n % i == 0) {
                factors.add(i);
                n = n / i;
            }
            while (n % i == 0) {
                factors.add(i);
                n = n / i;
            }
            if (n == 1) {
                break;
            }
        }
        if (n > 2) {
            factors.add(n);
        }
        integersPrimesMap.put(n, factors);
    }
}

My biggest struggle is with combining sub arrays of the integers prime factors, my current solution is O(n^2). I'm fairly certrain that it can be done faster but I was unable to work out the answer by my self.

CodePudding user response：

For disjoint sets, we can use a disjoint-set data structure.

Python code (sorry, I'm unfamiliar with Java):

# The Disjoint set (by Matt Timmermans)
# has the value as either the size of the set
# in case of the parent, or the negative index
# of the parent in case of a child.
def find(ds, idx):
  if ds[idx] > 0:
    return idx

  root = find(ds, -ds[idx])

  # Path compression
  if ds[idx] != -root:
    ds[idx] = -root

  return root

# Returns False if the
# elements are ALREADY
# in the same set.
def union(ds, a, b):
  a_root = find(ds, a)
  b_root = find(ds, b)

  if a_root == b_root:
    return False

  # Union by size
  if ds[a_root] >= ds[b_root]:
    ds[a_root]  = ds[b_root]
    ds[b_root] = -a_root
  else:
    ds[b_root]  = ds[a_root]
    ds[a_root] = -b_root

  return True

# End code by by Matt Timmermans


def prime_factors(n):
  primes = []
  
  i = 2
  
  while i*i <= n:
    if n % i == 0:
      primes.append(i)
      while n % i == 0:
        n //= i
    i  = 1

  if n > 1:
    primes.append(n)

  return primes


import collections


def update(ds, m, val, index):
  if val in m:
    return index
  m[val] = index
  if len(ds) < index   1:
    ds.append(1)
  return index   1


def f(A):
  ds = []
  prime_index = 0
  prime_to_index = {}
  factor_list = [0] * len(A)

  for i, a in enumerate(A):
    ps = prime_factors(a)

    factor_list[i] = ps[0]
    prime_index = update(ds, prime_to_index, ps[0], prime_index)

    ps_0_idx = prime_to_index[ps[0]]

    for j in range(1, len(ps)):
      prime_index = update(ds, prime_to_index, ps[j], prime_index)
      idx = prime_to_index[ps[j]]
      union(ds, ps_0_idx, idx)

  result = collections.defaultdict(list)

  for i, factor in enumerate(factor_list):
    parent = find(ds, prime_to_index[factor])
    result[parent].append(A[i])

  return list(result.values())

Output:

A = [2, 3, 6, 7, 9, 14, 19]

print(f(A)) # [[2, 3, 6, 7, 9, 14], [19]]

CodePudding user response：

this algorithm is with complexity of O(n^2*log(m)) when n is the length of the array and m is the maximum element in the array.

public static void main(String[] args) {
    for (int[] result : split(new int[] {2, 3, 6, 7, 9, 14, 19})) {
        System.out.println(Arrays.toString(result));
    }
}

//Complexity: O(n^2 * m)    [ n=arr.length, m=max(arr) ]
public static int[][] split(int[] arr) {
    List<LinkedList> lists = new ArrayList<>();
    for (int n : arr) {
        LinkedList selectedList = null;
        for (int i = 0; i < lists.size(); i  ) {
            LinkedList list = lists.get(i);
            if (list.hasNumberWithCommonPrimeFactor(n)) {
                if (selectedList == null) {//if we haven't added the number yet
                    //add the number to the set
                    selectedList = list;
                    list.add(n);
                } else {
                    //join the lists
                    selectedList.joinAfter(list);
                    lists.remove(i);
                    i--;
                }
            }
        }
        if (selectedList == null) {//if we haven't added the number
            //let put the number inside a new list
            lists.add(new LinkedList(n));
        }
    }
    //convert the sets to arrays
    return lists.stream().map(LinkedList::toArray).toArray(int[][]::new);
}

//gcd function with complexity O(log(a b))
public static int gcd(int a, int b) {
    return a % b == 0 ? b : gcd(b, a % b);
}

private static class LinkedList {
    public Node first;
    public Node last;
    public int size;

    //Complexity: O(1)
    public LinkedList(int firstValue) {
        first = last = new Node(firstValue);
        size = 1;
    }

    //Complexity: O(1)
    public void add(int value) {
        last = last.next = new Node(value);
        size  ;
    }

    //Complexity: O(1)
    public void joinAfter(LinkedList other) {
        last.next = other.first;
        last = other.last;
        size  = other.size;
    }

    //Complexity: O(n)
    public int[] toArray() {
        int[] arr = new int[size];
        Node node = first;
        for (int i = 0; i < size; i  ) {
            arr[i] = node.value;
            node = node.next;
        }
        return arr;
    }

    //Complexity: O(n)
    public boolean hasNumberWithCommonPrimeFactor(int num) {
        Node node = first;
        while (node != null) {
            if (gcd(node.value, num) > 1)
                return true;
            node = node.next;
        }
        return false;
    }
}

private static class Node {
    public int value;
    public Node next;

    public Node(int value) {
        this.value = value;
    }
}

this will print

[2, 6, 3, 9, 14, 7]
[19]

the idea I was using is that gcd(a,b)==1 if and only if a,b aren't sharing any prime factor