-
Notifications
You must be signed in to change notification settings - Fork 0
/
Windowed_Functions_and_Bulk_Load.sql
194 lines (167 loc) · 6.15 KB
/
Windowed_Functions_and_Bulk_Load.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
IF OBJECT_ID('tempdb.dbo.#Client', 'U') IS NOT NULL
DROP TABLE #Client;
IF OBJECT_ID('tempdb.dbo.#ClientFinal', 'U') IS NOT NULL
DROP TABLE #ClientFinal;
GO
CREATE TABLE #Client
(ID VARCHAR(200),
DateOfVisit VARCHAR(200),
Country VARCHAR(200),
Cathegory VARCHAR(200),
mobileDeviceBranding VARCHAR(200),
mobileDeviceModel VARCHAR(200),
mobileMarketingName VARCHAR(200),
PageViews INTEGER,
ProjectID INTEGER
);
BULK INSERT [#Client] FROM 'C:\Data\data.csv'
WITH (FIRSTROW = 2
, FIELDTERMINATOR = ','
, ROWTERMINATOR = '0x0a'
, CODEPAGE = 'ACP'
, DATAFILETYPE ='char'
)
/*SELECT * FROM #Client
ORDER BY DateOfVisit,ID ASC;*/
-- CODEPAGE, DATAFILETYPE parameters do not work in my edition, there is workaround with a CAST or REPLACE function,
-- which is too much time consuming. Ths means that some characters may be mismatched or corrupted.
-- FIELDQUOTE parameter does not work, thus I use a workaround with REPLACE function
UPDATE [#Client]
SET ID = REPLACE(ID,'"', ''),
DateOfVisit = REPLACE(DateOfVisit,'"', '') ,
Country = REPLACE(Country,'"', ''),
Cathegory = REPLACE(Cathegory,'"', ''),
mobileDeviceBranding = REPLACE(mobileDeviceBranding,'"', ''),
mobileDeviceModel = REPLACE(mobileDeviceModel,'"', ''),
mobileMarketingName = REPLACE(mobileMarketingName,'"', ''),
PageViews = REPLACE(PageViews,'"', ''),
ProjectID = REPLACE(ProjectID,'"', '')
-- Value (not set) is replaced with NULL value
-- I presume that (not set) indicates either IP with a unknown location
-- or an unknown IP or a crashed procedure. It is necessary to further investigate
-- a reason for this output.
UPDATE [#Client]
SET ID = NULLIF(ID,'(not set)'),
DateOfVisit = NULLIF(DateOfVisit,'(not set)') ,
Country = NULLIF(Country,'(not set)'),
Cathegory = NULLIF(Cathegory,'(not set)'),
mobileDeviceBranding = NULLIF(mobileDeviceBranding,'(not set)'),
mobileDeviceModel = NULLIF(mobileDeviceModel,'(not set)'),
mobileMarketingName = NULLIF(mobileMarketingName,'(not set)')--,
--PageViews = NULLIF(PageViews,'(not set)'),
--ProjectID = NULLIF(ProjectID,'(not set)')
-- There have been strings 'NULL', which should be changed to NULL value
UPDATE [#Client]
SET ID = NULLIF(ID,'NULL'),
DateOfVisit = NULLIF(DateOfVisit,'NULL') ,
Country = NULLIF(Country,'NULL'),
Cathegory = NULLIF(Cathegory,'NULL'),
mobileDeviceBranding = NULLIF(mobileDeviceBranding,'NULL'),
mobileDeviceModel = NULLIF(mobileDeviceModel,'NULL'),
mobileMarketingName = NULLIF(mobileMarketingName,'NULL')--,
--PageViews = NULLIF(PageViews,'NULL'),
--ProjectID = NULLIF(ProjectID,'NULL')
-- Value NA is replaced with NULL value
UPDATE [#Client]
SET ID = NULLIF(ID,'NA'),
DateOfVisit = NULLIF(DateOfVisit,'NA') ,
Country = NULLIF(Country,'NA'),
Cathegory = NULLIF(Cathegory,'NA'),
mobileDeviceBranding = NULLIF(mobileDeviceBranding,'NA'),
mobileDeviceModel = NULLIF(mobileDeviceModel,'NA'),
mobileMarketingName = NULLIF(mobileMarketingName,'NA')
/*SELECT * FROM #Client;*/
--I can switch ID to integer after column consolidation (It was possible to define ID as
--Integer in the first place, but it is more careful approach to start with strings,
--when loading unknown data)
alter table [#Client]
alter column ID int;
GO
update [#Client]
SET DateOfVisit = convert(date,convert(varchar(10),DateOfVisit,120));
/*SELECT *
FROM #Client
ORDER BY ID ASC;*/
SELECT DATENAME(weekday,A.DateOfVisit) Weekday_ ,A.* INTO #ClientFinal
FROM #Client A
ORDER BY ID ASC;
/*SELECT *
FROM #ClientFinal
ORDER BY ID ASC;
*/
GO
SELECT mobileDeviceBranding AS Brands_in_Germany_and_Nigeria_in_January_2017_on_Mondays
FROM #ClientFinal
WHERE Country IN ('Germany', 'Nigeria')
AND DateOfVisit Between convert(date,'2017-01-01') AND convert(date,'2017-01-31')
AND Weekday_ = 'Monday'
--AND mobileDeviceBranding IS NOT NULL --
GROUP BY mobileDeviceBranding
ORDER BY mobileDeviceBranding;
GO
SELECT SUM(PageViews) AS Total_Page_Views, ProjectID
FROM #ClientFinal
WHERE Weekday_ IN ('Sunday', 'Saturday')
GROUP BY ProjectID
ORDER BY Total_Page_Views DESC;
/*SELECT ProjectID
FROM #ClientFinal --Alternative solution
WHERE Weekday_ IN ('Saturday', 'Sunday')
GROUP BY ProjectID
ORDER BY Sum(PageViews) DESC;*/
GO
/*SELECT --Auxiliary SELECT
SPV.ProjectID, SPV.Weekday_, SPV.Total_Page_Views FROM
(
SELECT
ROW_NUMBER()
OVER (PARTITION BY PV.ProjectID ORDER BY PV.Total_Page_Views DESC) AS RN,
PV.ProjectID, PV.Weekday_, PV.Total_Page_Views
FROM(
SELECT SUM(PageViews) AS Total_Page_Views, ProjectID, Weekday_ FROM #ClientFinal
GROUP BY ProjectID, Weekday_
) AS PV
) AS SPV
WHERE RN <= 2
;*/
SELECT
SPV.ProjectID, SPV.mobileDeviceBranding, SPV.Total_Page_Views FROM
(
SELECT
ROW_NUMBER()
OVER (PARTITION BY PV.ProjectID ORDER BY PV.Total_Page_Views DESC) AS RN,
PV.ProjectID, PV.mobileDeviceBranding, PV.Total_Page_Views
FROM(
SELECT SUM(PageViews) AS Total_Page_Views, ProjectID, mobileDeviceBranding FROM #ClientFinal
GROUP BY ProjectID, mobileDeviceBranding
) AS PV
) AS SPV
WHERE RN <= 5
;
GO
SELECT
TOP 2 Weekday_, SUM(PageViews) AS "Page Views from iPhone"
FROM #ClientFinal
WHERE mobileDeviceModel = 'iPhone'
GROUP BY Weekday_
ORDER BY SUM(PageViews) DESC
;
GO
SELECT
SPV.Country, SPV.mobileDeviceBranding, SPV.Total_Page_Views FROM
(
SELECT
ROW_NUMBER()
OVER (PARTITION BY PV.Country ORDER BY PV.Total_Page_Views DESC) AS RN,
PV.Country, PV.mobileDeviceBranding, PV.Total_Page_Views
FROM(
SELECT SUM(PageViews) AS Total_Page_Views, Country, mobileDeviceBranding FROM #ClientFinal
WHERE Country IN ('Denmark', 'Romania', 'Georgia', 'Nigeria', 'Tunisia')
GROUP BY Country, mobileDeviceBranding
) AS PV
) AS SPV
WHERE RN <= 1
ORDER BY SPV.Total_Page_Views DESC
;
GO
--SELECT * FROM #ClientFinal; --Auxiliary SELECT