如何使用大数据 SQL 语句
如何使用大数据 SQL 语句
标签: SQL, 大数据 从一个庞大的数据库中众多的表格和视图中 query 出所需的数据,是一件熟能生巧的工作。
SELECT itemid, itembrand, itemunitprice, RANK() OVER ( ORDER BY itemunitprice ) AS rankunitprice FROM ssitem;
SELECT custname, SUM(salesdollar) AS sumsales, RANK() OVER ( ORDER BY SUM(salesdollar) DESC ) salesrank FROM sssales, sscustomer WHERE sssales.custid = sscustomer.custid GROUP BY custname;
SELECT custstate, custname, sum(salesdollar) AS sumsales, RANK() OVER ( PARTITION BY custstate ORDER BY SUM(salesdollar) DESC ) salesrank FROM sssales, sscustomer WHERE sssales.custid = sscustomer.custid GROUP BY custstate, custname ORDER BY custstate;
SELECT storezip, timeyear, SUM(salesdollar) AS sumsales, SUM(SUM(salesdollar)) OVER (ORDER BY storezip, timeyear ROWS UNBOUNDED PRECEDING) AS cumsumsales FROM ssstore, sstimedim, sssales WHERE sssales.storeid = ssstore.storeid AND sssales.timeno = sstimedim.timeno GROUP BY storezip, timeyear;
SELECT storezip, timeyear, SUM(salesdollar) AS sumsales, SUM(SUM(salesdollar)) OVER (PARTITION BY storezip ORDER BY storezip, timeyear ROWS UNBOUNDED PRECEDING) AS cumsumsales FROM ssstore, sstimedim, sssales WHERE sssales.storeid = ssstore.storeid AND sssales.timeno = sstimedim.timeno GROUP BY storezip, timeyear;
SELECT storezip, timeyear, SUM(salesdollar) AS sumsales, ROUND(AVG(SUM(salesdollar)) OVER (ORDER BY storezip, timeyear ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING), 2) AS centermovavgsumsales FROM ssstore, sstimedim, sssales WHERE sssales.storeid = ssstore.storeid AND sssales.timeno = sstimedim.timeno GROUP BY storezip, timeyear;
SELECT timeyear, custcity, SUM(salesdollar) AS sumsales, ROUND(RATIO_TO_REPORT(SUM(salesdollar)) OVER (PARTITION BY timeyear), 2) AS sumsalesratio FROM sscustomer, sssales, sstimedim WHERE sssales.custid = sscustomer.custid AND sssales.timeno = sstimedim.timeno GROUP BY timeyear, custcity ORDER BY timeyear, SUM(salesdollar) DESC;
SELECT itemname, itembrand, itemunitprice, cumdistunitprice FROM (SELECT itemid, itemname, itembrand, itemunitprice, CUME_DIST() OVER (ORDER BY itemunitprice DESC) AS cumdistunitprice FROM ssitem) WHERE cumdistunitprice <= 0.8;
SELECT custname, SUM(salesunits) AS sumsalesunits, RANK() OVER (ORDER BY SUM(salesunits)) AS ranksalesunits, PERCENT_RANK() OVER (ORDER BY SUM(salesunits)) AS perranksalesunits, ROW_NUMBER() OVER (ORDER BY SUM(salesunits)) AS rownumsalesunits, ROUND(CUME_DIST() OVER (ORDER BY SUM(salesunits)), 2) AS cumdistsalesunits FROM sssales, sscustomer WHERE sssales.custid = sscustomer.custid GROUP BY custname;
SELECT itemname, itemunitprice, RANK() OVER (ORDER BY itemunitprice) AS rankunitprice, PERCENT_RANK() OVER (ORDER BY itemunitprice) AS perrankunitprice, ROW_NUMBER() OVER (ORDER BY itemunitprice) AS rownumunitprice, CUME_DIST() OVER (ORDER BY itemunitprice) AS cumdistunitprice, FROM ssitem;
CREATE VIEW connex_sales_view AS SELECT ssitem.itemid, itemname, itemcategory,itemunitprice, salesno, salesunits, salesdollar, salescost, timeyear, timemonth, timeday FROM ssitem, sssales, sstimedim WHERE itembrand = connex AND timeyear BETWEEN 2010 AND 2012 AND ssitem.itemid = sssales.itemid AND sstimedim.timeno = sssales.timeno;
SELECT itemname, itemcategory, itemunitprice, salesno, salesunits, salesdollar, salescost, timeyear, timemonth, timeday FROM connex_sales_view WHERE itemunitprice < 100 AND timeyear BETWEEN 2011 AND 2012;
CREATE MATERIALIZED VIEW MV1 BUILD IMMEDIATE REFRESH COMPLETE ON DEMAND ENABLE QUERY REWRITE AS SELECT storestate, timeyear, SUM(salesdollar) AS sumdollar FROM sssales, ssstore, sstimedim WHERE sssales.storeid = ssstore.storeid AND sssales.timeno = sstimedim.timeno AND timeyear > 2010 GROUP BY storestate, timeyear;