如何使用大数据 SQL 语句

如何使用大数据 SQL 语句

标签: SQL, 大数据 从一个庞大的数据库中众多的表格和视图中 query 出所需的数据,是一件熟能生巧的工作。

SELECT itemid, itembrand, itemunitprice,
  RANK() OVER ( ORDER BY itemunitprice ) AS rankunitprice
  FROM ssitem;
SELECT custname, SUM(salesdollar) AS sumsales,
  RANK() OVER ( ORDER BY SUM(salesdollar) DESC ) salesrank
  FROM sssales, sscustomer
  WHERE sssales.custid = sscustomer.custid
  GROUP BY custname;
SELECT custstate, custname, sum(salesdollar) AS sumsales,
  RANK() OVER ( PARTITION BY custstate
                ORDER BY SUM(salesdollar) DESC ) salesrank
  FROM sssales, sscustomer
  WHERE sssales.custid = sscustomer.custid
  GROUP BY custstate, custname
  ORDER BY custstate;
SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,
  SUM(SUM(salesdollar)) OVER
    (ORDER BY storezip, timeyear
     ROWS UNBOUNDED PRECEDING) AS cumsumsales
  FROM ssstore, sstimedim, sssales
  WHERE sssales.storeid = ssstore.storeid
  AND sssales.timeno = sstimedim.timeno
  GROUP BY storezip, timeyear;
SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,
  SUM(SUM(salesdollar)) OVER
    (PARTITION BY storezip
     ORDER BY storezip, timeyear
     ROWS UNBOUNDED PRECEDING) AS cumsumsales
  FROM ssstore, sstimedim, sssales
  WHERE sssales.storeid = ssstore.storeid
  AND sssales.timeno = sstimedim.timeno
  GROUP BY storezip, timeyear;
SELECT storezip, timeyear, SUM(salesdollar) AS sumsales,
  ROUND(AVG(SUM(salesdollar)) OVER
       (ORDER BY storezip, timeyear
        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING), 2) AS centermovavgsumsales
  FROM ssstore, sstimedim, sssales
  WHERE sssales.storeid = ssstore.storeid
  AND sssales.timeno = sstimedim.timeno
  GROUP BY storezip, timeyear;
SELECT timeyear, custcity, SUM(salesdollar) AS sumsales,
  ROUND(RATIO_TO_REPORT(SUM(salesdollar))
  OVER (PARTITION BY timeyear), 2) AS sumsalesratio
  FROM sscustomer, sssales, sstimedim
  WHERE sssales.custid = sscustomer.custid
  AND sssales.timeno = sstimedim.timeno
  GROUP BY timeyear, custcity
  ORDER BY timeyear, SUM(salesdollar) DESC;
SELECT itemname, itembrand, itemunitprice, cumdistunitprice
  FROM (SELECT itemid, itemname, itembrand, itemunitprice, 
    CUME_DIST() OVER (ORDER BY itemunitprice DESC) AS cumdistunitprice
    FROM ssitem)
  WHERE cumdistunitprice <= 0.8;
SELECT custname, SUM(salesunits) AS sumsalesunits,
  RANK() OVER (ORDER BY SUM(salesunits)) AS ranksalesunits,
  PERCENT_RANK() OVER (ORDER BY SUM(salesunits)) AS perranksalesunits,
  ROW_NUMBER() OVER (ORDER BY SUM(salesunits)) AS rownumsalesunits,
  ROUND(CUME_DIST() OVER  (ORDER BY SUM(salesunits)), 2) AS cumdistsalesunits
  FROM sssales, sscustomer
  WHERE sssales.custid = sscustomer.custid
  GROUP BY custname;
SELECT itemname, itemunitprice,
  RANK() OVER (ORDER BY itemunitprice) AS rankunitprice,
  PERCENT_RANK() OVER (ORDER BY itemunitprice) AS perrankunitprice,
  ROW_NUMBER() OVER (ORDER BY itemunitprice) AS rownumunitprice,
  CUME_DIST() OVER (ORDER BY itemunitprice) AS cumdistunitprice,
  FROM ssitem;
CREATE VIEW connex_sales_view AS 
  SELECT ssitem.itemid, itemname, itemcategory,itemunitprice, salesno, salesunits,
  salesdollar, salescost, timeyear, timemonth, timeday
  FROM ssitem, sssales, sstimedim
  WHERE itembrand = connex
  AND timeyear BETWEEN 2010 AND 2012
  AND ssitem.itemid = sssales.itemid
  AND sstimedim.timeno = sssales.timeno;
SELECT itemname, itemcategory, itemunitprice, salesno, salesunits,
  salesdollar, salescost, timeyear, timemonth, timeday
  FROM connex_sales_view
  WHERE itemunitprice < 100 AND timeyear BETWEEN 2011 AND 2012;
CREATE MATERIALIZED VIEW MV1
  BUILD IMMEDIATE
  REFRESH COMPLETE ON DEMAND
  ENABLE QUERY REWRITE AS 
  SELECT storestate, timeyear, SUM(salesdollar) AS sumdollar
  FROM sssales, ssstore, sstimedim
  WHERE sssales.storeid = ssstore.storeid
  AND sssales.timeno = sstimedim.timeno
  AND timeyear > 2010
  GROUP BY storestate, timeyear;
经验分享 程序员 微信小程序 职场和发展