Turbocharge your Data Warehouse Queries with Columnstore Indexes Len Wyatt Program Manager Microsoft Corporation DBI313
demo Columnstores speed up queries
Overview of Columnstore Index
6 … C1 C2 C3 C5C6C4
7 Segments C1 C2 C3 C5C6C4 Row group
OrderDateKeyProductKeyStoreKeyRegionKeyQuantitySalesAmount
OrderDateKeyProductKeyStoreKeyRegionKeyQuantitySalesAmount OrderDateKeyProductKeyStoreKeyRegionKeyQuantitySalesAmount
OrderDateKey ProductKey StoreKey RegionKey Quantity SalesAmount OrderDateKey ProductKey StoreKey RegionKey Quantity SalesAmount
OrderDateKey ProductKey StoreKey RegionKey Quantity SalesAmount OrderDateKey ProductKey StoreKey RegionKey Quantity SalesAmount
StoreKey StoreKey RegionKey Quantity OrderDateKey OrderDateKey ProductKey ProductKey SalesAmount SalesAmount
StoreKey StoreKey RegionKey Quantity OrderDateKey OrderDateKey ProductKey ProductKey SalesAmount SalesAmount
15 bitmap of qualifying rows Column vectors Batch object
Make sure most of the work of the query happens in batch mode
Loading Columnstores Effectively
Optimizing database and index design
DateLicenseNumMeasure XYZ ABC DateLicenseIdMeasure LicenseIdLicenseNum 1XYZ123 2ABC777
Optimizing queries
Common workarounds
demo Example need for a workaround
Make sure most of the work of the query happens in batch mode
select m.Title, COUNT(p.IP) PurchaseCount from Media m left outer join Purchase p on p.MediaId=m.MediaId group by m.Title order by COUNT(p.IP) desc with T (Title, PurchaseCount) as ( select m.Title, COUNT(p.IP) PurchaseCount from Media m join Purchase p on p.MediaId=m.MediaId group by m.Title ) select distinct m.Title, ISNULL(T.PurchaseCount,0) as PurchaseCount from Media m left outer join T on m.Title=T.Title order by ISNULL(T.PurchaseCount,0) desc; 6.4 sec elapsed 55 CPU-seconds 0.2 sec elapsed 1.9 CPU-sec
select p.Date, count(*) from Purchase p where p.MediaId in (select MediaId from MediaStudyGroup) group by p.Date order by p.Date; --or-- select p.Date, count(*) from Purchase p where exists (select m.MediaId from MediaStudyGroup m where m.MediaId = p.MediaId) group by p.Date order by p.Date; select p.Date, count(*) from Purchase p join MediaStudyGroup m on p.MediaId = m.MediaId group by p.Date order by p.Date; 3.0 sec elapsed 32 CPU-seconds 0.05 sec elapsed 0.3 CPU-seconds
create view vPurchase as select * from Purchase union all select * from DeltaPurchase; select p.date, d.DayNumOfMonth, count(*) from vPurchase as p, Date d where p.Date = d.DateId group by p.date, d.DayNumOfMonth; select p.date, d.DayNumOfMonth, m.Genre, count(*) from vPurchase p, Date d, Media m where p.Date = d.DateId and m.MediaId = p.MediaId group by p.date, d.DayNumOfMonth, m.Genre Batch mode 0.1 sec elapsed Row mode 19 sec elapsed
with MainSummary (date, DayNumOfmonth, Genre, c) as ( select p.date, d.DayNumOfMonth, m.Genre, count(*) c from Purchase p, Date d, Media m where p.Date = d.DateId and m.MediaId = p.MediaId group by p.date, d.DayNumOfMonth, m.Genre ), DeltaSummary (date, DayNumOfmonth, Genre, c) as ( select p.date, d.DayNumOfMonth, m.Genre, count(*) c from DeltaPurchase p, Date d, Media m where p.Date = d.DateId and m.MediaId = p.MediaId group by p.date, d.DayNumOfMonth, m.Genre ), CombinedSummary (date, DayNumOfMonth, Genre, c) as ( --union all across the output of the two queries select * from MainSummary UNION ALL select * from DeltaSummary ) --group by to aggregate the data. select t.date, t.DayNumOfmonth, t.Genre, sum(c) as c from CombinedSummary as t group by t.date, t.DayNumOfmonth, t.Genre; Batch mode 0.3 sec elapsed
select count(*) from Purchase with CountByDate (Date, c) as ( select Date, count(*) from Purchase group by Date ) select sum(c) from CountByDate; 1.0 sec elapsed 15 CPU-seconds 0.06 sec elapsed 0.3 CPU-seconds
select p.Date, count(distinct p.UserId) as UserIdCount, count(distinct p.MediaId) as MediaIdCount from Purchase p, Media m where p.MediaId = m.MediaId and m.Category in ('Horror') group by p.Date; 26 sec elapsed 31 CPU-seconds
with DistinctMediaIds (Date, MediaIdCount) as ( select p.Date, count(distinct p.MediaId) as MediaIdCount from Purchase p, Media m where p.MediaId = m.MediaId and m.Category in ('Horror') group by p.Date ), DistinctUserIds (Date, UserIdCount) as ( select p.Date, count(distinct p.UserId) as UserIdCount from Purchase p, Media m where p.MediaId = m.MediaId and m.Category in ('Horror') group by p.Date ) select m.Date, m.MediaIdCount, u.UserIdCount from DistinctMediaIds m join DistinctUserIds u on m.Date=u.Date 0.5 sec elapsed 6 CPU-seconds
Summary