I am new to SQL.I have two tables Customers and deposit_transactions. I am trying to select the country which has the highest account balance for each date
. I am joining deposit_transactions table(to get the sum of amount based on customer_id) and joining with customers table to get the Country
Schema (MySQL v8.0)
CREATE TABLE deposit_transactions (
`deposit_id` VARCHAR(7),
`customer_id` VARCHAR(5),
`date` TEXT,
`transaction_type` VARCHAR(7),
`amount` INTEGER,
`currency` VARCHAR(3)
);
INSERT INTO deposit_transactions
(`deposit_id`, `customer_id`, `date`, `transaction_type`, `amount`, `currency`)
VALUES
('DE47653', 'DO900', '1/1/2019', 'pay_in', '10000', 'EUR'),
('DE47654', 'DO901', '2/1/2019', 'pay_in', '10000', 'EUR'),
('DE47655', 'DO902', '2/1/2019', 'pay_in', '10000', 'EUR'),
('UK47656', 'DO903', '3/1/2019', 'pay_in', '10000', 'GBP'),
('UK47657', 'DO904', '3/1/2019', 'pay_in', '130000', 'GBP'),
('DE47658', 'DO905', '3/1/2019', 'pay_in', '140000', 'EUR'),
('DE47659', 'DO906', '3/1/2019', 'pay_in', '10000', 'EUR'),
('DE47660', 'DO907', '3/1/2019', 'pay_in', '10000', 'EUR'),
('DE47661', 'DO908', '3/1/2019', 'pay_in', '100000', 'EUR'),
('DE47662', 'DO909', '4/1/2019', 'pay_in', '10000', 'EUR'),
('UK47663', 'DO910', '4/1/2019', 'pay_in', '10000', 'GBP'),
('UK47664', 'DO911', '4/1/2019', 'pay_in', '5000', 'GBP'),
('UK47665', 'DO912', '4/1/2019', 'pay_in', '4000', 'GBP'),
('DE47666', 'DO913', '5/1/2019', 'pay_in', '10000', 'EUR'),
('DE47661', 'DO908', '5/1/2019', 'pay_out', '50000', 'EUR'),
('DE47667', 'DO914', '11/1/2019', 'pay_in', '30000', 'EUR'),
('DE47668', 'DO915', '11/1/2019', 'pay_in', '10000', 'EUR'),
('DE47669', 'DO916', '11/1/2019', 'pay_in', '25000', 'EUR'),
('DE47670', 'DO917', '11/1/2019', 'pay_in', '50000', 'EUR'),
('DE47667', 'DO914', '12/1/2019', 'pay_out', '7000', 'EUR'),
('DE47667', 'DO914', '12/15/2019', 'pay_in', '12000', 'EUR'),
('DE47671', 'DO918', '1/1/2021', 'pay_in', '9000', 'EUR'),
('DE47672', 'DO919', '1/1/2021', 'pay_in', '10000', 'EUR'),
('DE47673', 'DO920', '1/1/2021', 'pay_in', '11000', 'EUR'),
('DE47674', 'DO921', '1/1/2021', 'pay_in', '12000', 'EUR'),
('DE47675', 'DO922', '1/1/2021', 'pay_in', '13000', 'EUR'),
('DE47676', 'DO923', '1/1/2021', 'pay_in', '14000', 'EUR'),
('DE47677', 'DO924', '1/1/2021', 'pay_in', '30000', 'EUR'),
('DE47678', 'DO925', '1/1/2021', 'pay_in', '16000', 'EUR');
CREATE TABLE customers (
`customer_id` VARCHAR(5),
`country` VARCHAR(7)
);
INSERT INTO customers
(`customer_id`, `country`)
VALUES
('DO900', 'Germany'),
('DO901', 'Germany'),
('DO902', 'Spain'),
('DO903', 'UK'),
('DO904', 'UK'),
('DO905', 'Austria'),
('DO906', 'Germany'),
('DO907', 'Germany'),
('DO908', 'Germany'),
('DO909', 'Germany'),
('DO910', 'UK'),
('DO911', 'UK'),
('DO912', 'UK'),
('DO913', 'Germany'),
('DO914', 'Austria'),
('DO915', 'Germany'),
('DO916', 'Austria'),
('DO917', 'Germany'),
('DO918', 'Germany'),
('DO919', 'Spain'),
('DO920', 'Germany'),
('DO921', 'Spain'),
('DO922', 'Germany'),
('DO923', 'Germany'),
('DO924', 'Germany'),
('DO925', 'Spain'),
('DO926', 'Germany'),
('DO927', 'Germany'),
('DO928', 'Germany'),
('DO929', 'Germany'),
('DO711', 'UK'),
('DO712', 'UK'),
('DO713', 'Germany'),
('DO714', 'Austria'),
('DO715', 'Germany'),
('DO716', 'Austria'),
('DO717', 'Germany'),
('DO718', 'Germany'),
('DO719', 'Spain'),
('DO720', 'Germany'),
('DO721', 'Spain'),
('DO722', 'Germany'),
('DO723', 'Germany'),
('DO724', 'Germany'),
('DO725', 'Spain'),
('DO726', 'Germany'),
('DO727', 'Germany'),
('DO728', 'Germany'),
('DO729', 'Germany');
Below is the sample query I created. But I am not sure if its correct
Query
SELECT t.customer_id,
c.country,t.total_amount
FROM (SELECT customer_id,
date,
transaction_type,
Sum(amount)
OVER (
partition BY customer_id ) AS total_amount_transfered,
Row_number()
OVER (
partition BY customer_id
ORDER BY date DESC) AS n
FROM deposit_transactions)t
INNER JOIN customers AS c
ON c.customer_id = t.customer_id
WHERE t.n = 1
AND t.transaction_type = 'pay_in' order by total_amount desc limit 1;
CodePudding user response:
Step 1. Finding the total balance amount for each country.
Since you need to get the balance and you have two kinds of transactions, I'd rather transform the "pay-out" transactions into negative balance and leave "pay-in" transactions as are. Also you should take into account the different currencies, because "GBP" currency has a higher value than "EUR" currency (GBP ~ EUR*0.85). If you don't want to consider "pay-out" negative impact, you can transform them to 0. This balance wrangling is done here by a CASE
statement.
Then the balances are aggregated with a SUM
function over the corresponding countries (accessible because of the JOIN
operation) and grouped over the "country" and "date" field.
SELECT country,
date AS transaction_date,
-- STR_TO_DATE(date, '%m/%d/%Y') AS transaction_date,
SUM(CASE WHEN transaction_type = 'pay_in' AND currency = 'EUR' THEN amount
WHEN transaction_type = 'pay_in' AND currency = 'GBP' THEN amount/0.85
-- WHEN transaction_type = 'pay_out' THEN 0
WHEN transaction_type = 'pay_out' AND currency = 'EUR' THEN -amount
WHEN transaction_type = 'pay_out' AND currency = 'GBP' THEN -amount/0.85
END) AS amount_eur
FROM deposit_transactions dt
INNER JOIN customers c
ON dt.customer_id = c.customer_id
GROUP BY country,
transaction_date
Optionally in this step, you could also transform the "date" field from the TEXT
type to the DATE
type using the STR_TO_DATE
function.
Step 2. Ranking the countries over the summed amounts.
In order to apply a rank, you use the ROW_NUMBER
window function, to partition on the transaction date and order by the balance amount descendently.
SELECT *, ROW_NUMBER() OVER(
PARTITION BY transaction_date
ORDER BY amount_eur DESC ) AS rn
FROM total_amounts_per_country
Step 3. Selecting the country having the highest balance.
You can just all rows which have the rank equal to 1, given that there will be a 1 for each date. Then select the interesting fields, namely "transaction_date" and "country".
SELECT transaction_date,
country
FROM ranked_countries
WHERE rn = 1
Full Query. It uses MySQL common table expressions to combine the three steps.
WITH total_amounts_per_country AS(
SELECT country,
date AS transaction_date,
-- STR_TO_DATE(date, '%m/%d/%Y') AS transaction_date,
SUM(CASE WHEN transaction_type = 'pay_in' AND currency = 'EUR' THEN amount
WHEN transaction_type = 'pay_in' AND currency = 'GBP' THEN amount/0.85
-- WHEN transaction_type = 'pay_out' THEN 0
WHEN transaction_type = 'pay_out' AND currency = 'EUR' THEN -amount
WHEN transaction_type = 'pay_out' AND currency = 'GBP' THEN -amount/0.85 END) AS amount_eur
FROM deposit_transactions dt
INNER JOIN customers c
ON dt.customer_id = c.customer_id
GROUP BY country,
transaction_date
), ranked_countries AS (
SELECT *, ROW_NUMBER() OVER(
PARTITION BY transaction_date
ORDER BY amount_eur DESC ) AS rn
FROM total_amounts_per_country
)
SELECT transaction_date,
country
FROM ranked_countries
WHERE rn = 1
Check the demo here.