I am looking to split a data set in half (consisting of 1000's rows) automatically as the same action needs to be done daily. The difficulty is that I need each Slot start time segment to be halved and separated so that there is a control group and a test group.
I have attached a screenshot showing the data on the left (Slot start segments go from 05:30 all the way to 23:00 in 30 minute segments), then what my expected output is like on the right.
I suspect some VBA script will be required (no idea where to even start) - worth noting that the data will always be copied into the same cells, but the amount of data can vary as well as how many deliveries are in each time slot.
Screenshot of the data set on the left with the desired outcome on the right --
Sample
Customer No. | Delivery Date | Slot Start | Date Created |
---|---|---|---|
53930360 | 44855 | 05:30 | 44857 |
63544585 | 44855 | 05:30 | 44857 |
63541128 | 44855 | 05:30 | 44857 |
63545763 | 44855 | 05:30 | 44857 |
63541115 | 44855 | 05:30 | 44857 |
63544705 | 44855 | 05:30 | 44857 |
63537186 | 44855 | 05:30 | 44857 |
63537827 | 44855 | 05:30 | 44857 |
63501434 | 44855 | 06:00 | 44857 |
63541573 | 44855 | 06:00 | 44857 |
63518156 | 44855 | 06:00 | 44857 |
63512742 | 44855 | 06:00 | 44857 |
63542098 | 44855 | 06:00 | 44857 |
63542387 | 44855 | 06:00 | 44857 |
44643633 | 44855 | 06:00 | 44857 |
50709070 | 44855 | 06:00 | 44857 |
60258231 | 44855 | 06:00 | 44857 |
63475911 | 44855 | 06:00 | 44857 |
63538324 | 44855 | 06:00 | 44857 |
63528634 | 44855 | 06:00 | 44857 |
63535031 | 44855 | 06:00 | 44857 |
63534306 | 44855 | 06:00 | 44857 |
63532381 | 44855 | 06:00 | 44857 |
63547783 | 44855 | 06:00 | 44857 |
63518590 | 44855 | 06:00 | 44857 |
63539583 | 44855 | 06:00 | 44857 |
62220268 | 44855 | 06:00 | 44857 |
Desired output
Customer No. | Delivery Date | Slot Start | Date Created | Customer No. | Delivery Date | Slot Start | Date Created |
---|---|---|---|---|---|---|---|
53930360 | 44855 | 05:30 | 44857 | 63541115 | 44855 | 05:30 | 44857 |
63544585 | 44855 | 05:30 | 44857 | 63544705 | 44855 | 05:30 | 44857 |
63541128 | 44855 | 05:30 | 44857 | 63537186 | 44855 | 05:30 | 44857 |
63545763 | 44855 | 05:30 | 44857 | 63537827 | 44855 | 05:30 | 44857 |
63501434 | 44855 | 06:00 | 44857 | 63475911 | 44855 | 06:00 | 44857 |
63541573 | 44855 | 06:00 | 44857 | 63538324 | 44855 | 06:00 | 44857 |
63518156 | 44855 | 06:00 | 44857 | 63528634 | 44855 | 06:00 | 44857 |
63512742 | 44855 | 06:00 | 44857 | 63535031 | 44855 | 06:00 | 44857 |
63542098 | 44855 | 06:00 | 44857 | 63534306 | 44855 | 06:00 | 44857 |
63542387 | 44855 | 06:00 | 44857 | 63532381 | 44855 | 06:00 | 44857 |
44643633 | 44855 | 06:00 | 44857 | 63547783 | 44855 | 06:00 | 44857 |
50709070 | 44855 | 06:00 | 44857 | 63518590 | 44855 | 06:00 | 44857 |
60258231 | 44855 | 06:00 | 44857 | 63539583 | 44855 | 06:00 | 44857 |
62220268 | 44855 | 06:00 | 44857 |
CodePudding user response:
The accepted answer is elegant and achieves what the OP needs. If someone wants to go the Apps Script way, here's one approach:
function myFunction() {
const ss = SpreadsheetApp.getActiveSpreadsheet();
const rawSheet = ss.getSheetByName('Raw');
const testSheet = ss.getSheetByName('Test Group');
const controlSheet = ss.getSheetByName('Control Group');
const rawData = rawSheet.getDataRange().getDisplayValues(); // Returns a 2D array
const headerRow = rawData.shift();
const timestampColumn = rawData.map(row => row[2])
const uniqueTimestamps = timestampColumn.filter((time, index) => timestampColumn.indexOf(time) === index);
let testGroupAll = [headerRow];
let controlGroupAll = [headerRow];
for (time of uniqueTimestamps) {
// slotData is test group after splicing below
const slotData = rawData.filter(row => row[2] === time);
const controlGroup = slotData.splice(0, Math.round(slotData.length / 2));
Logger.log(slotData); // test group - 2D array
Logger.log(controlGroup); // 2D array
testGroupAll = testGroupAll.concat(slotData);
controlGroupAll = controlGroupAll.concat(controlGroup);
}
Logger.log(testGroupAll);
Logger.log(controlGroupAll);
testSheet.getDataRange().clear();
controlSheet.getDataRange().clear();
testSheet.getRange(1, 1, testGroupAll.length, 4).setValues(testGroupAll);
controlSheet.getRange(1, 1, controlGroupAll.length, 4).setValues(controlGroupAll);
}
CodePudding user response:
try:
=QUERY(A:D; "limit "&ROUNDUP(COUNTA(A2:A)/2); 1)
and:
=QUERY(A:D; "offset "&ROUNDUP(COUNTA(A2:A)/2); 1)
UPDATE
same principle...
=ARRAYFORMULA(SPLIT(FLATTEN(SPLIT(QUERY(MAP(UNIQUE(FILTER(C2:C, C2:C<>""))*1,
LAMBDA(x, QUERY(FLATTEN(QUERY(TRANSPOSE(QUERY(FILTER({A:C, D:D&""}, C:C=x),
"limit "&ROUNDDOWN(COUNTA(FILTER(A:A, C:C=x))/2), )),,9^9)),,9^9))),,9^9), "")), " "))
and:
=ARRAYFORMULA(SPLIT(FLATTEN(SPLIT(QUERY(MAP(UNIQUE(FILTER(C2:C, C2:C<>""))*1,
LAMBDA(x, QUERY(FLATTEN(QUERY(TRANSPOSE(QUERY(FILTER({A:C, D:D&""}, C:C=x),
"offset "&ROUNDDOWN(COUNTA(FILTER(A:A, C:C=x))/2), )),,9^9)),,9^9))),,9^9), "")), " "))