-- Q1 -- -- Number of customers who spent more than 50% of their total in the store by store WITH tmp AS ( SELECT store_id, customer_id, SUM(store_sales) / SUM(SUM(store_sales)) OVER (PARTITION BY customer_id) As Ratio FROM sales_fact GROUP BY store_id, customer_id ) SELECT store_id, SUM(CASE WHEN Ratio > 0.5 THEN 1 ELSE 0 END) As NCustomers FROM tmp GROUP BY store_id ORDER BY store_id -- Q2 -- -- Number of customers who spent the largest amount of their total in the store by store WITH temp AS ( SELECT store_id, customer_id, RANK() OVER (PARTITION BY customer_id ORDER BY SUM(store_sales) DESC) AS rk FROM sales_fact GROUP BY store_id, customer_id) SELECT store_id, SUM(CASE WHEN rk=1 THEN 1 ELSE 0 END) NoCustomerWithTopSpending FROM temp GROUP BY store_id ORDER BY store_id -- Q3 -- -- number of customers with total sales in the store lower or equal than 100 by store WITH temp AS ( SELECT store_id, customer_id, SUM(store_sales) As Total, CASE WHEN SUM(store_sales) <= 100 THEN 1 ELSE 0 END As ToBeCounted FROM sales_fact GROUP BY store_id, customer_id ) SELECT store_id, SUM(ToBeCounted) As NCustomers FROM temp GROUP BY store_id ORDER BY store_id -- Q4 -- -- number of customers with at least one day with total sales in the store greater or equal than 100 by store WITH temp AS ( SELECT store_id, customer_id, the_date, SUM(store_sales) As Total, CASE WHEN SUM(store_sales) >= 100 THEN customer_id ELSE NULL END As ToBeCounted FROM sales_fact S, time_by_day T WHERE S.time_id = T.time_id GROUP BY store_id, customer_id, the_date ) SELECT store_id, COUNT(DISTINCT ToBeCounted) As NCustomers FROM temp GROUP BY store_id ORDER BY store_id -- Q5 -- -- number of customers with no day with total sales in the store greater or equal than 100 (but with at least one sale in the store) by store WITH temp AS ( SELECT store_id, customer_id, the_date, SUM(store_sales) As Total, CASE WHEN SUM(store_sales) >= 100 THEN customer_id ELSE NULL END As ToBeCounted FROM sales_fact S, time_by_day T WHERE S.time_id = T.time_id GROUP BY store_id, customer_id, the_date ) SELECT store_id, COUNT(DISTINCT customer_id) - COUNT(DISTINCT ToBeCounted) As NCustomersWithNoDay FROM temp GROUP BY store_id ORDER BY store_id -- Q6 -- -- all triples customer_id, the_year, month_of_the_year in which the customer -- bought something in that month but nothing in the next month. WITH tmp AS ( SELECT DISTINCT customer_id, the_year, month_of_year, the_year*12+month_of_year AS month_id FROM sales_fact S, time_by_day T WHERE S.time_id = T.time_id ), tmp1 AS ( SELECT customer_id, the_year, month_of_year, month_id, LEAD(month_id) OVER(PARTITION BY customer_id ORDER BY month_id) AS next_month_id FROM tmp ) SELECT customer_id, the_year, month_of_year FROM tmp1 WHERE month_id+1 <> next_month_id ORDER BY customer_id, month_id -- Q7 -- -- the ratio of total sales to a customer in a year-month over the total sales -- to the customer in that year, by customer and year-month WITH tmp AS ( SELECT customer_id, the_year, month_of_year, SUM(store_sales) as SalesByYearMonth FROM sales_fact S, time_by_day T WHERE S.time_id = T.time_id GROUP BY customer_id, the_year, month_of_year ), tmp1 AS ( SELECT customer_id, the_year, month_of_year, SalesByYearMonth, SUM(SalesByYearMonth) OVER( PARTITION BY customer_id, the_year) AS SalesByYear FROM tmp ) SELECT customer_id, the_year, month_of_year, SalesByYearMonth, SalesByYear, 100.0*SalesByYearMonth/SalesByYear As Ratio FROM tmp1 ORDER BY customer_id, the_year, month_of_year -- or, in a compact form: SELECT customer_id, the_year, month_of_year, SUM(store_sales) as SalesByYearMonth, SUM(SUM(store_sales)) OVER( PARTITION BY customer_id, the_year) AS SalesByYear, 100.0*SUM(store_sales)/(SUM(SUM(store_sales)) OVER( PARTITION BY customer_id, the_year)) As Ratio FROM sales_fact S, time_by_day T WHERE S.time_id = T.time_id GROUP BY customer_id, the_year, month_of_year ORDER BY customer_id, the_year, month_of_year -- Q8 -- -- the top spending day of week, by customer WITH temp AS ( SELECT customer_id, the_day, SUM(store_sales) AS total, RANK() OVER (PARTITION BY customer_id ORDER BY SUM(store_sales) DESC) AS rk FROM sales_fact AS s, time_by_day AS t WHERE t.time_id = s.time_id GROUP BY customer_id, the_day ) SELECT customer_id, the_day FROM temp WHERE rk=1 ORDER BY customer_id -- Q9 -- -- the 10 top spending customers and the ratio of their spending over the total sales of the store, by store_id WITH temp AS ( SELECT store_id, customer_id, SUM(store_sales) AS total, 100.0*SUM(store_sales) / SUM(SUM(store_sales)) OVER(PARTITION BY store_id) As ratio, RANK() OVER(PARTITION BY store_id ORDER BY SUM(store_sales) DESC) AS rk FROM sales_fact GROUP BY store_id, customer_id ) SELECT store_id, customer_id, total, ratio FROM temp WHERE rk <= 10 ORDER BY store_id -- Q10 -- -- add to the previous query also the running total of the top k customers, for k=1, …, 10 WITH temp AS ( SELECT store_id, customer_id, SUM(store_sales) AS total, 100.0*SUM(store_sales) / SUM(SUM(store_sales)) OVER(PARTITION BY store_id) As ratio, RANK() OVER(PARTITION BY store_id ORDER BY SUM(store_sales) DESC) AS rk FROM sales_fact GROUP BY store_id, customer_id ) SELECT store_id, customer_id, total, ratio, SUM(total) OVER(PARTITION BY store_id ORDER BY rk ROWS UNBOUNDED PRECEDING) AS running FROM temp WHERE rk <= 10 ORDER BY store_id -- Q11 -- -- add to the previous query also the delta between customer k and k+1, for k=1,…,9 WITH temp AS ( SELECT store_id, customer_id, SUM(store_sales) AS total, 100.0*SUM(store_sales) / SUM(SUM(store_sales)) OVER(PARTITION BY store_id) As ratio, RANK() OVER(PARTITION BY store_id ORDER BY SUM(store_sales) DESC) AS rk FROM sales_fact GROUP BY store_id, customer_id ) SELECT store_id, customer_id, total, ratio, SUM(total) OVER(PARTITION BY store_id ORDER BY rk ROWS UNBOUNDED PRECEDING) AS running, total - LEAD(total, 1, 0) OVER(PARTITION BY store_id ORDER BY rk ) AS delta FROM temp WHERE rk <= 10 ORDER BY store_id