db320: t-sql in sql server denalimscom.co.il/downloads/sqlpresentations/12061.pdf · 20101107 106...

Post on 23-Feb-2020

17 Views

Category:

Documents

0 Downloads

Preview:

Click to see full reader

TRANSCRIPT

T-SQL

in SQL Server

2012

Assaf Fraenkel

Senior Architect MCS

-- first page

SELECT orderid, orderdate, custid, empid

FROM Sales.Orders

ORDER BY orderdate, orderid

OFFSET 0 ROWS FETCH FIRST 5 ROWS ONLY;

-- next page

SELECT orderid, orderdate, custid, empid

FROM Sales.Orders

ORDER BY orderdate, orderid

OFFSET 5 ROWS FETCH NEXT 5 ROWS ONLY;

WITH SortKeys AS

(

SELECT orderid, orderdate

FROM Sales.Orders

ORDER BY orderdate, orderid

OFFSET 500 ROWS FETCH NEXT 5 ROWS ONLY

)

SELECT SK.orderid, SK.orderdate, A.custid, A.empid

FROM SortKeys AS SK

CROSS APPLY (SELECT custid, empid

FROM Sales.Orders AS O

WHERE O.orderid = SK.orderid) AS A;

CREATE SEQUENCE dbo.SeqOrderIDs AS INT

START WITH 1 INCREMENT BY 1;

SELECT NEXT VALUE FOR dbo.SeqOrderIDs;

-- running sum

SELECT actid, tranid, val,

SUM(val) OVER(PARTITION BY actid

ORDER BY tranid

ROWS BETWEEN UNBOUNDED PRECEDING

AND CURRENT ROW) AS balance

FROM dbo.Transactions;

-- moving average

SELECT actid, tranid, val,

AVG(val) OVER(PARTITION BY actid

ORDER BY tranid

ROWS BETWEEN 3 PRECEDING

AND CURRENT ROW) AS movavg

FROM dbo.Transactions;

-- Diff between current order value and prev/next order values

SELECT custid, orderid, orderdate, val,

val - LAG(val) OVER(PARTITION BY custid ORDER BY orderdate, orderid) AS diffprv,

val - LEAD(val) OVER(PARTITION BY custid ORDER BY orderdate, orderid) AS diffnxt

FROM Sales.OrderValues;

-- Diff between current order value and first/last order values

SELECT custid, orderid, orderdate, val,

val - FIRST_VALUE(val) OVER(PARTITION BY custid ORDER BY orderdate, orderid) AS difffirst,

val - LAST_VALUE(val) OVER(PARTITION BY custid

ORDER BY orderdate, orderid

ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS difflast

FROM Sales.OrderValues;

PARSE, TRY_CONVERT, TRY_PARSE

EOMONTH, DATEFROMPARTS, DATETIME2FROMPARTS, DATETIMEFROMPARTS, DATETIMEOFFSETFROMPARTS, SMALLDATETIMEFROMPARTS, TIMEFROMPARTS

CHOOSE, IIF (important for migrations from Access)

CONCAT, FORMAT

LOG

BEGIN TRY

... usual code goes here ...

END TRY

BEGIN CATCH

IF ERROR_NUMBER() = <x>

BEGIN

... handle error <x> ...

END

ELSE IF ERROR_NUMBER() = <y>

BEGIN

... handle error <y> ...

END

ELSE IF ERROR_NUMBER() = <z>

BEGIN

... handle error <z> ...

END

ELSE

THROW; -- let upper level deal with error

END CATCH;

-- example without a saved message

THROW 54321, 'This is a user error.', 1;

-- example with saved message

-- add message

EXEC sp_addmessage 43112609, 16, '%s is prime. It''s not an error but it sure

deserves attention!';

-- format a message and raise an error

DECLARE @msg AS NVARCHAR(2048);

SET @msg = FORMATMESSAGE(43112609, '2^43112609');

THROW 43112609, @msg, 1;

EXEC dbo.GetOrderInfo @orderid = 43671

WITH RESULT SETS

(

(

SalesOrderID INT NOT NULL,

OrderDate DATETIME NOT NULL,

TotalDue MONEY NOT NULL,

CurrencyRateID INT NULL

),

(

SalesOrderID INT NOT NULL,

SalesOrderDetailID INT NOT NULL,

OrderQty SMALLINT NOT NULL

)

);

sys.sp_describe_first_result_set

sys.dm_exec_describe_first_result_set, sys.dm_exec_describe_first_result_set_for_object

sys.sp_describe_undeclared_parameters

SELECT *

FROM Sales.Orders WITH (FORCESCAN)

WHERE orderdate >= '20080506';

SELECT *

FROM Sales.Orders WITH ( FORCESEEK (idx_nc_orderdate (orderdate) ) )

WHERE orderdate >= '20080506';

TRY_CONVERT(d )

TRY_CAST( AS )])

FORMAT(value, format [,culture])

PARSE( AS USING )

TRY_PARSE( AS USING ])

IIF(boolean_expr, true_value, false_value)

CHOOSE(index, val1, val2,... [,valN])

CONCAT(val1, val2,… [,valN])

EOMONTH(date [, months_to_add])

DATEFROMPARTS (year, month, day)

TIMEFROMPARTS (hour, minutes, seconds, fractions, scale)

DATETIME2FROMPARTS (year, month, day ,hour, minutes, seconds, fractions, scale)

DATETIMEFROMPARTS (year, month, day, hour, minutes, seconds, milliseconds)

SMALLDATETIMEFROMPARTS (year, month, day, hour, minutes)

DATETIMEOFFSETFROMPARTS

Columnstore

Indexes: A Deep

Dive

Assaf Fraenkel

Senior Architect MCS

Waiting ….

Waiting ….

C1 C2 C3 C5 C6 C4

8

C1 C2 C3 C5 C6 C4

Row group

Segment

9

Row

group

10

Base table

A B C D

Encode ,

compress

Encode ,

compress

Encode ,

compress

Compressed

column segments

1M rows/group

Column store index

Blobs

Row

group

Row group

Row

group

Se

gm

en

t

dir

ecto

ry

New system table: sys.column_store_segments

Includes segment metadata: size, min, max, …

11

OrderDateKey ProductKey StoreKey RegionKey Quantity SalesAmount

20101107 106 01 1 6 30.00

20101107 103 04 2 1 17.00

20101107 109 04 2 2 20.00

20101107 103 03 2 1 17.00

20101107 106 05 3 4 20.00

20101108 106 02 1 5 25.00

20101108 102 02 1 1 14.00

20101108 106 03 2 5 25.00

20101108 109 01 1 1 10.00

20101109 106 04 2 4 20.00

20101109 106 04 2 5 25.00

20101109 103 01 1 1 17.00

OrderDateKey ProductKey StoreKey RegionKey Quantity SalesAmount

20101107 106 01 1 6 30.00

20101107 103 04 2 1 17.00

20101107 109 04 2 2 20.00

20101107 103 03 2 1 17.00

20101107 106 05 3 4 20.00

20101108 106 02 1 5 25.00

OrderDateKey ProductKey StoreKey RegionKey Quantity SalesAmount

20101108 102 02 1 1 14.00

20101108 106 03 2 5 25.00

20101108 109 01 1 1 10.00

20101109 106 04 2 4 20.00

20101109 106 04 2 5 25.00

20101109 103 01 1 1 17.00

OrderDateKey

20101107

20101107

20101107

20101107

20101107

20101108

ProductKey

106

103

109

103

106

106

StoreKey

01

04

04

03

05

02

RegionKey

1

2

2

2

3

1

Quantity

6

1

2

1

4

5

SalesAmount

30.00

17.00

20.00

17.00

20.00

25.00

OrderDateKey

20101108

20101108

20101108

20101109

20101109

20101109

ProductKey

102

106

109

106

106

103

StoreKey

02

03

01

04

04

01

RegionKey

1

2

1

2

2

1

Quantity

1

5

1

4

5

1

SalesAmount

14.00

25.00

10.00

20.00

25.00

17.00

OrderDateKey

20101107

20101107

20101107

20101107

20101107

20101108

ProductKey

106

103

109

103

106

106

StoreKey

01

04

04

03

05

02

RegionKey

1

2

2

2

3

1

Quantity

6

1

2

1

4

5

SalesAmount

30.00

17.00

20.00

17.00

20.00

25.00 OrderDateKey

20101108

20101108

20101108

20101109

20101109

20101109

ProductKey

102

106

109

106

106

103

StoreKey

02

03

01

04

04

01

RegionKey

1

2

1

2

2

1

Quantity

1

5

1

4

5

1

SalesAmount

14.00

25.00

10.00

20.00

25.00

17.00

ProductKey SalesAmount

OrderDateKey

StoreKey

01

04

04

03

05

02

StoreKey

02

03

01

04

04

01

RegionKey

1

2

2

2

3

1

RegionKey

1

2

1

2

2

1

Quantity

6

1

2

1

4

5

Quantity

1

5

1

4

5

1

OrderDateKey

20101107

20101107

20101107

20101107

20101107

20101108

OrderDateKey

20101108

20101108

20101108

20101109

20101109

20101109

ProductKey

106

103

109

103

106

106

ProductKey

102

106

109

106

106

103

SalesAmount

30.00

17.00

20.00

17.00

20.00

25.00

SalesAmount

14.00

25.00

10.00

20.00

25.00

17.00

SELECT C2, SUM(C4)

FROM T

GROUP BY C2;

T.C2 T.C4

T.C2 T.C4

T.C2

T.C2

T.C2 T.C1

T.C1

T.C1

T.C1

T.C1 T.C3

T.C3

T.C3

T.C3

T.C3

T.C4

T.C4

T.C4

17

18

19

Min: 20101107 103 17.00

Max: 20101108 109 30.00

------------------------------------------

20101107 106 30.00

20101107 103 17.00

20101107 109 20.00

20101107 103 17.00

20101107 106 20.00

20101108 106 25.00

Min: 20101108 102 10.00

Max: 20101109 109 25.00 -------------------------------------------------------------

20101108 102 14.00

20101108 106 25.00

20101108 109 10.00

20101109 106 20.00

20101109 106 25.00

20101109 103 17.00

20

OrderDateKey

20101107

20101107

20101107

20101107

20101107

20101108

ProductKey

106

103

109

103

106

106

SalesAmount

30.00

17.00

20.00

17.00

20.00

25.00

OrderDateKey

20101108

20101108

20101108

20101109

20101109

20101109

ProductKey

102

106

109

106

106

103

SalesAmount

14.00

25.00

10.00

20.00

25.00

17.00

Min: 20101107 103 17.00

Max: 20101108 109 30.00

------------------------------------------

20101107 106 30.00

20101107 103 17.00

20101107 109 20.00

20101107 103 17.00

20101107 106 20.00

20101108 106 25.00

Min: 20101108 102 10.00

Max: 20101109 109 25.00 -------------------------------------------------------------

20101108 102 14.00

20101108 106 25.00

20101108 109 10.00

20101109 106 20.00

20101109 106 25.00

20101109 103 17.00

21

Good segment elimination for filters on leading key column using min/max values

22

23

Lis

t o

f q

ualify

ing

ro

ws

Column vectors

Batch object

24

25

Yes int, real, string, money, datetime, decimal <= 18 digits

No decimal > 18 digits, binary, varbinary, CLR, (n)varchar(max), varbinary (max),

uniqueidentifier, datetimeoffset with precision > 2

26

27

FAQsMemory grant request in MB = [(4.2 * Num of columns in the CS index) + 68] * DOP + (Num of string cols * 34)

• Columnstore indexes can enable phenomenal performance gains

• Batch mode processing is an essential ingredient for speedup

• Some adjustments to schema and loading processes may be necessary

• Some queries can benefit from tuning

• Columnstore indexes are not a magic bullet

column_id segment_id min_data_id max_data_id

1 1 20120101 20120131

1 2 20120115 20120215

1 3 20120201 20120228

select Date, count(*)

from dbo.Purchase

where Date >= 20120201

group by Date

partition 1

Date = 20120301

segment 1 min(Date) = 20120301, max(Date) = 20120301

segment 2 “

segment 3 “

segment 4 “

segment 5 “

partition 2

Date = 20120302

segment 1 min(Date) = 20120302, max(Date) = 20120302

segment 2 “

segment 3 “

segment 4 “

segment 5 “

select f.region_id, avg(f.duration) from fact_CDR f where f.region_id = 1 and f.date_id between 20120101 and 20120131 group by f.region_id

Date LicenseNum Measure

20120301 XYZ123 100

20120302 ABC777 200

Date LicenseId Measure

20120301 1 100

20120302 2 200

LicenseId LicenseNum

1 XYZ123

2 ABC777

Data Quality

Services Elad Ziklik

Microsoft

DATA QUALITY 101

What is Data Quality ?

Data Quality represents the degree to which the data is suitable for business usages

Data Quality is built through People + Technology + Processes

Bad Data Bad Business

3

5

Top 3

impediments

Source: Information Week Reports, 2011

Why Data Quality is Important

6

Top Barrier for BI

Source: Information Week Reports, 2011

7

DQ is MDM top driver

Source: Information Week Reports, 2011

8

Common Data Quality Issues Data Quality

Issue Sample Data Problem

Standard Are data elements

consistently defined and

understood?

Gender code = M, F, U in one system

and Gender code = 0, 1, 2 in another

system

Complete Is all necessary data present? 20% of customers’ last name is blank,

50% of zip-codes are 99999

Accurate Does the data accurately

represent reality or a

verifiable source?

A Supplier is listed as ‘Active’ but went

out of business six years ago

Valid Do data values fall within

acceptable ranges?

Salary values should be between

60,000-120,000

Unique Data appears several times Both John Ryan and Jack Ryan appear

in the system – are they the same

person?

9

DQ Market – A Brief Overview

Overall market size for DQ software in 2010 was $800M , forecasted to grow 16% y/y during the next five years

Focus on the business user - DQ is no longer just IT Pro – orgs seek to leverage the data experts in the DQ processes

Business process – For data quality (and MDM) initiatives to be a success – they need to support integration with the existing business processes

20.1%

15.9%

15.2% 13.0%

5.3%

30.4%

SAS Institute

IBM

Informatica

SAP

QAS

Other Vendors

Data Integration market ($2.6B in 2009) Source: Gartner

10

Components of Data Quality Solutions

Amend, remove or enrich data that is incorrect or incomplete. This includes correction, enrichment and standardization.

Identifying, linking or merging related entries within or across sets of data.

Cleansing Matching

Profiling Monitoring Analysis of the data source to provide insight into the quality of the data and help to identify data quality issues.

Tracking and monitoring the state of Quality activities and Quality of Data.

Common Data Quality Issues

11

Name Gender Street House # Zip code City State D.O.B

John Doe Male 60th street 45 New York New York 08/12/64

Jane Doe Male Jonathan ln 36 10023 Poughkeepsy NY 21-dec-1954

Name Gender Street House # Zip

code

City State D.O.B

John Doe Male E 60th St 45W 10022 New York NY 08/12/64

Jane Doe Female Jonathan

Lane

36 10023 Poughkeepsie NY 12/21/54

Completeness Accuracy Conformity Consistency Uniqueness

Name Address Postal Code City State

John Smith 545 S Valley View Drive # 136 34563 Anytown New York

Margaret & John smith 545 Valley View ave unit 136 34563-2341 Anytown New York

Maggie Smith 545 S Valley View Dr Anytown New York

John Smith 545 Valley Drive St. 34253 NY NY

Name Address Zip Code City State Cluster

John Smith 545 S Valley View Drive # 136 34563 Anytown New York 1

Margaret & John smith 545 Valley View ave unit 136 34563-2341 Anytown New York 1

Maggie Smith 545 S Valley View Dr Anytown New York 1

John Smith 545 Valley Drive St. 34253 NY NY 2

Before

Before

After

After

INTRODUCING DQS

AlwaysOn

ColumnStore Index

Power View

Data Quality Services

Distributed Replay

Reporting Alerts Multiple Secondaries

Availability Groups

T-SQL

Unstructured Data Performance

Flexible Failover Policy

Contained Database Authentication

SharePoint Active Directory Support

14

Key Points About DQS

High quality data is critical to effective business intelligence and to business activities

DQS is an on-premise Data Quality product in SQL Server 2012, extendible with knowledge from multiple parties thru Azure DataMarket

Richer DQ knowledge and capabilities in the cloud will make it even easier to provide high quality data

Data Quality Services (DQS) is a Knowledge-Driven data quality solution enabling IT Pros and data stewards to easily improve the quality of their data

15

Microsoft’s DQS Solution Concepts

Knowledge-Driven

Semantics

Knowledge Discovery

Based on a Data Quality Knowledge Base (DQKB)

Data Domains capture the semantics of your data

Acquires additional knowledge the more you use it

Open and Extendible

Easy to use

Add user-generated knowledge & 3rd party reference data providers

User experience designed for increased productivity

16

DQS Process

Build

Use

DQ Projects

Knowledge

Management

Integrated

Profiling Knowledge

Base

Data Quality Knowledge Base (DQKB)

Domains

Represent

the data

type

Values

Rules & Relations

3rd party Reference

Data

Knowledge

Base

Composite

Domains

Matching Policy

Domains

17

Matching

Reference

Data

DQ Clients

DQS UI

DQ Server

DQ Projects Store Common Knowledge Store Knowledge Base Store

DQ Engine

3rd Party

/ Internal

MS DQ

Domains Store

Reference

Data

Services

Reference

Data Sets

SSIS DQ

Component

DQ Active

Projects

MS Data

Domains

Local

Data

Domains

Published

KBs

Knowledge

Discovery

Data

Profiling &

Exploration

Cleansing

Knowledge

Discovery

and

Management

Interactive

DQ Projects

Data

Exploration

Azure Market Place

Categorized

Reference Data

Categorized

Reference Data

Services

Reference Data API

(Browse, Get,

Update…)

RD Services API

(Browse, Set,

Validate…)

MDS Excel

Add in

Future Clients –

Excel,

Dynamics

• Define

• Manage

• Coordinate

• Measure

• Continuously Improve

• Control and Monitor

DQS Empowers the users

With DQS the IW / Data Expert can get actively involved in Data Quality initiatives

• Define your data quality goals

• Create domains/composite domains

• Discover knowledge from your data

• Define rules & policies for data validation, cleansing

and matching

Creating a Knowledge Base

20

DQS Knowledge Sources

21

• Easily cleanse and enrich data with Reference Data Services from Azure MarketPlace

DataMarket

• A website that contains DQS knowledge available for downloading

DQS Data Store

• Discover / Import knowledge from your organization data

Organization Data

• A set of data domains that come out of the box with DQS

Out of the Box Knowledge

21

DQS can use Reference Data Services for validating,

cleansing and enriching your data

22

Reference Data Services (RDS)

23

Demo 1 – Introducing YOUR DATA Account

ID Home Team

Team Type

Revenue Type Sales

Home Arena

Address Line City State Zip

A124324 Boston Celtics Basketball

Food & Beverages 655 TD Garden 100 Legends Way Boston MA 2114

7676862 New York Yankees Baseball Music 389 Yankee Stadium

East 161st Street & River Avenue NY NY

4934235 Seattle Mariners MLB Music 443 Safeco Field 1516 First Avenue S Seattle WA 98134

Building Your Knowledge

24

Demo 1 – Introducing YOUR DATA

Account ID

Building Your Knowledge

Account ID

Home Team

Team Type

Revenue Type Sales

Home Arena

Address Line City State Zip

A124324 Boston Celtics Basketball

Food & Beverages 655 TD Garden 100 Legends Way Boston MA 2114

7676862 New York Yankees Baseball Music 389 Yankee Stadium

East 161st Street & River Avenue NY NY

4934235 Seattle Mariners MLB Music 443 Safeco Field 1516 First Avenue S Seattle WA 98134

Validation Rules:

• Must be between 3 and 9 characters long

• Can contain numbers or the letter A

Account ID

A124324 7676862 4934235

25

Demo 1 – Introducing YOUR DATA

Account ID

Building Your Knowledge

Account ID

Home Team

Team Type

Revenue Type Sales

Home Arena

Address Line City State Zip

A124324 Boston Celtics Basketball

Food & Beverages 655 TD Garden 100 Legends Way Boston MA 2114

7676862 New York Yankees Baseball Music 389 Yankee Stadium

East 161st Street & River Avenue NY NY

4934235 Seattle Mariners Baseball Music 443 Safeco Field 1516 First Avenue S Seattle WA 98134

List of values and synonyms:

• Basketball = NBA

• Baseball = MLB

• Football = NFL

• Hockey = NHL

• …

Account ID

A124324 7676862 4934235

Team Type

Team Type

Basketball

Baseball MLB

26

Demo 1 – Introducing YOUR DATA

Account ID

Building Your Knowledge

Account ID

Home Team

Team Type

Revenue Type Sales

Home Arena

Address Line City State Zip

A124324 Boston Celtics Basketball

Food & Beverages 655 TD Garden 100 Legends Way Boston MA 2114

7676862 New York Yankees Baseball Music 389 Yankee Stadium

East 161st Street & River Avenue NY NY

4934235 Seattle Mariners Baseball Music 443 Safeco Field 1516 First Avenue S Seattle WA 98134

Reference Data Service:

• Composite Domain containing Address

Line, City, State & Zip Domains

Account ID

A124324 7676862 4934235

Team Type

Team Type

Basketball

Baseball MLB

Address Line City State Zip

100 Legends Way Boston MA 2114

East 161st Street & River Avenue NY NY

1516 First Avenue S Seattle WA 98134

Composite Domain - Full Address

Address Line City State Zip

DEMO 1 – DQS BASICS

• Building a Knowledge Base

• Knowledge Discovery

• Cleansing your data

• Cleanse 2 Knowledge Base

28

What Did We See?

DQS IN SSIS

DQS & SSIS

• Data Quality is often needed as part of an

on-going batch process

• DQS SSIS component leverages the DQS

knowledge and engine, with the SSIS

execution environment

DQS Component Overview

Knowledge

Base

Reference Data

Definition

Values/Rules Source +

Mapping DQS Cleansing

Component

SSIS Package

Destination

Design Run

Monitor Review & Manage

Activity

Monitoring Interactive Cleansing

Project

Control advanced

output settings,

including cleansing

info columns

Configure the DQS

server and KB

Map required

columns to domains

Configuring the DQS Component

Use KBs you trust!

Use Conditional Split for

improved control and

automation

Use DQS Client for

reviewing SSIS results

Use SSIS Cleansing Info outputs for

further processing - analytics,

integration, etc.

Tips and Best Practices

MATCHING (OR THE ART

OF DEDUPING YOUR DATA)

Record matching is the task of identifying

records that match the same real world entity.

For example:

Where Do Duplicate Records Come From?

Reason Example

Poorly designed software No verification of existing records upon entry

Integrating data from

multiple systems

Mergers and acquisitions, systems with different

data integrity rules

Manually entered data Human errors can creep into the system when

input is not validated

Formatting and

abbreviations

"Doctor Robert Smith" Vs. "Dr. Bob Smith"

Change of attributes A person changed his address, last name,

marital status…

Tips & Tricks – Unclean Data & Matching

Matching Results

Matching Results Reasoning

The Data

Identifies exact and approximate matches,

enabling removal of duplicate data.

Enables creating a matching policy interactively

using a computer-assisted process.

Ensures that values that are equivalent, but were

entered in a different format or style, are in fact

rendered uniform.

DQS Matching Key Points

40

Demo 3 – Introducing YOUR DATA

Building Your Knowledge

Track Home Team No. Plays

Britney Spears – I’m a Slave 4 U Chicago Bulls 103

Britney Spears – Slave 4 U New Jersey Nets 198

Britney Spears – Slave For You Boston Celtics 234

Britney – I’m a Slave For You Toronto Raptors 123

41

Demo 3 – Introducing YOUR DATA

Building Your Knowledge

CD Parsing – “Track” Composite Domain

• Artist

• Song

• Delimiter based parsing using “-”

Composite Domain - Track

Song Artist

Track Home Team No. Plays

Britney Spears – I’m a Slave 4 U Chicago Bulls 103

Britney Spears – Slave 4 U New Jersey Nets 198

Britney Spears – Slave For You Boston Celtics 234

Britney – I’m a Slave For You Toronto Raptors 123

Track

Britney Spears – I’m a Slave 4 U

Britney Spears – Slave 4 U

Britney Spears – Slave For You

Britney – I’m a Slave For You

DEMO 3 – MATCHING

DQS allows you to find duplicates in your data

• Create, refine and build your matching policy

• Run a matching project using your KB

43

Demo 3 - Matching / De-Duplication

Similarity, select Similar if field values can be similar. Select Exact if field values

must be identical.

Weight, determines the contribution of each domain in the rule to the overall

matching score for two records.

Prerequisite validates whether field values return a 100% match; else the

records are not considered a match.

Minimum matching score is the threshold for considering two records as a

match.

Matching Rules - Summary

Uniqueness Best used for Description Examples

Low

• Define as Prerequisite

• Define with lower weights

Provides discriminatory

information

Gender, City, State

High

• Define as Similar or Exact

• Define with higher weights

Provides highly identifiable

information and is highly

discriminatory

Names (First, Last,

Company),

Address Line 1

Completeness Best used for Description

Low

Do not use or define with low weight High level of missing values

High

Include for matching if the column

provides highly identifiable

information

Low level of missing values

46

DQS Value Proposition

Knowledge-Driven

• Rich semantic Knowledge Base

• Continuous improvement as

knowledge is discovered

• Build once, reuse for multiple

DQ improvements

Open and Extendible

Easy to use

• Focus on cloud-based

Reference Data

• User-generated knowledge

• Integration with SSIS

• Focus on productivity and

user experience

• Designed for business users

• Out-of-the-box knowledge (DQ

content)

47

Additional DQS Resources

DQS Blog

Tips, tricks and

guidance on best

practices for using

DQS – courtesy of the

DQS team

DQS Movies

A set of getting

started movies for

an easy introduction

to DQS

DQS Forum

Come participate in

DQS related

discussions in our DQS

forum on MSDN

Available Here blogs.msdn.com/b/dqs Available Here

Need to preserve data puts

focus on ALTER scripts

Dependencies make even simple

tasks complex

Errors detected late

Hard to synchronize app and

database versions

Target different editions, cloud

Developing Databases

is Hard!

Database Development

with SQL Server 2012

Developer Tools

Maor David-Pur Developer Advisor – ISV @maordp

Declarative not scripted

Model-based design and validation

Same tools when connected or offline

Use schema differencing for:

Incremental import and deployment

Drift detection and reconciliation

Schema and app under source code control

Test locally, deploy to SQL Server or SQL

Azure

There is a Better Way…

• What is SSDT?

• Online Demo

• Offline Demo

• Control Platform Demo

Agenda

SQL Server Developer Tools

SSDT

Database

Services

Analysis

Services

Reporting

Services

Integration

Services

SSDT

Database

Services

Analysis

Services

Reporting

Services

Integration

Services

SSDT Database Services

T-SQL

Language

Services

T-SQL

Debugging

SQL Server

Explorer

Database

Publish

Table

Designer

Buffered

Declarative

Editing

SQL/CLR Schema

Compare

Static

Analysis

Local

Database

Runtime

Connected Development

Offline Development

Control Target Platform

Demos…

Connected Development

Demo

SSMS-like explorer, query

window

Buffered declarative approach

Model-based with error

detection

Code-backed designer

Modern TSQL coding

experience

Connected Development

Demo

Offline Development Working with Visual Studio

Database Projects

Database definition

managed in Visual Studio

Advanced language services

(Go To Definition, Find All

References, Refactoring…)

F5 debugging with new

Local Database Runtime

Visualize and migrate

schema changes

Offline Development

Demo

Control

Target

Platform

Target-aware (SQL Azure)

Publish direct, via script, or DAC

Offline and connected

development for SQL Azure

Control Target Platform

• What is SSDT?

• Online Development

• Offline Development

• Control Target Platform

Summary

With SQL Server 2012

On the web

With Visual Studio vNext

SSDT Availability

SQL Azure features supported as they emerge

Application/database development integration

On the horizon

Reference data support

Database diagrams

Query designer

Post 2012 Directions

Resources

MSDN Magazine - September 2011

The "Juneau" Database Project

MSDN Forum

Team Blog

Twitter #SQLDataTools

Introductory Videos

Build SQL Server databases using Juneau without leaving Visual Studio

Juneau integrates with existing .NET applications via Entity Framework

Creating and deploying a database project to SQL Azure using Juneau

Data Developer Center - Download latest SSDT bits

Look for update in Q4/2011

top related