Sales/Stores as salesperstores label='sales per stores' format=dollar12.2 from sashelp.shoes; quit;
2.9 处理SQL常用函数 MEAN或AVG:均值
COUNT或N或FREQ:非缺失值个数 MAX:最大值 MIN:最小值
NMISS:缺失值个数 STD:标准差 SUM:求和 VAR:方差
2.9.1 求和sum proc sql;
select Region, Product,Sales,Stores, sum(Sales,Inventory,Returns) as total from sashelp.shoes; quit;
2.9.2 求均值avg proc sql;
select Region, Product,Sales,Stores, avg(Sales) as salesavg from sashelp.shoes; quit;
2.9.3 分组求均值group by proc sql;
select Region,
avg(Sales) as salesavg from sashelp.shoes group by Region; quit;
2.9.4 计数count proc sql;
select Region,count(*) as count from sashelp.shoes group by Region; quit;
2.9.5 HAVING数据子集 proc sql;
select Region,count(*) as count from sashelp.shoes group by Region having count(*)>50; quit;
其它的就不多作介绍了,多用用就熟悉了
2.10子查询
2.10.1 找出regions平均sales大于全部平均sales的region proc sql;
select Region,
avg(Sales) as salesavg from sashelp.shoes group by Region having avg(Sales)>
(select avg(Sales) from sashelp.shoes); quit;
2.10.2 ANY关键词介绍
>ANY(20,30,40) 最终效果:>20
=ANY(20,30,40) 最终效果:=20 or =30 or =40
例如,选择出region为united state的sales小于任意region为africa的sales的数据 proc sql;
select Region,Sales from sashelp.shoes
where Region='United States' and Sales
(select Sales from sashelp.shoes where Region='Africa'); quit;
这个例子没有多少意义,只是说明一下any的用法
2.10.3 ALL关键词介绍
>ALL (20,30,40) 最终效果:>40
例如,选择出region为united state的sales小于所有region为africa的sales的数据 proc sql;
select Region,Sales from sashelp.shoes
where Region='United States' and Sales
(select Sales from sashelp.shoes where Region='Africa'); quit;
2.10.4 EXISTS与NOT EXISTS proc sql; select *
from sashelp.shoes where exists
(select * from sashelp.orsales); quit;
SAS中的SQL语句完全教程之二:数据合并与建表、建视图
SAS中的SQL语句完全教程之二:数据合并与建表、建视图索引等
本系列全部内容主要以《SQL Processing with the SAS System (Course Notes)》为主进行讲解,本书是在网上下载下来的,但忘了是在哪个网上下的,故不能提供下载链接了,需要的话可以发邮件向我索取,我定期邮给大家,最后声明一下所有资料仅用于学习,不得用于商业目的,否则后果自负。
1 连接joins分为内连接inner joins和外连接outer joins
内连接:仅返回匹配的数据,最多可以有32个表同时进行内连接
外连接:返回所有匹配的数据和非匹配的数据,一次只能有两个表或视图进行外连接
迪卡尔积:返回表内所有可能的匹配情况。例如表A有10*20的数据,表B有30*40的数据,则两个表的迪卡尔积有(10+30)*(20+40)=40*60的数据
我们先建立两个数据集:
data march;
input flight $3. +5 date date7. +3 depart time5. +2 orig $3. +3 dest $3. +7 miles +6 boarded +6 capacity; format date date7. depart time5.; informat date date7. depart time5.; cards;
219 01MAR94 9:31 LGA LON 3442 198 250 622 01MAR94 12:19 LGA FRA 3857 207 250 132 01MAR94 15:35 LGA YYZ 366 115 178 271 01MAR94 13:17 LGA PAR 3635 138 250 302 01MAR94 20:22 LGA WAS 229 105 180 114 02MAR94 7:10 LGA LAX 2475 119 210 202 02MAR94 10:43 LGA ORD 740 120 210 219 02MAR94 9:31 LGA LON 3442 147 250 132 02MAR94 15:35 LGA YYZ 366 106 178 202 03MAR94 10:43 LGA ORD 740 118 210
219 03MAR94 9:31 LGA LON 3442 197 250 622 03MAR94 12:19 LGA FRA 3857 180 250 271 03MAR94 13:17 LGA PAR 3635 147 250 202 04MAR94 10:43 LGA ORD 740 148 210 219 04MAR94 9:31 LGA LON 3442 232 250 622 04MAR94 12:19 LGA FRA 3857 137 250 132 04MAR94 15:35 LGA YYZ 366 117 178 271 04MAR94 13:17 LGA PAR 3635 146 250 302 04MAR94 20:22 LGA WAS 229 115 180 114 05MAR94 7:10 LGA LAX 2475 117 210 202 05MAR94 10:43 LGA ORD 740 104 210 219 05MAR94 9:31 LGA LON 3442 160 250 622 05MAR94 12:19 LGA FRA 3857 185 250 132 05MAR94 15:35 LGA YYZ 366 157 178 271 05MAR94 13:17 LGA PAR 3635 177 250 114 06MAR94 7:10 LGA LAX 2475 128 210 202 06MAR94 10:43 LGA ORD 740 115 210 219 06MAR94 9:31 LGA LON 3442 163 250 132 06MAR94 15:35 LGA YYZ 366 150 178 302 06MAR94 20:22 LGA WAS 229 66 180 114 07MAR94 7:10 LGA LAX 2475 160 210 132 07MAR94 15:35 LGA YYZ 366 164 178 271 07MAR94 13:17 LGA PAR 3635 155 250 302 07MAR94 20:22 LGA WAS 229 135 180 ; run;
data delay;
input flight $3. +5 date date7. +2 orig $3. +3 dest $3. +3 delaycat $15. +2 destype $15. +8 delay; informat date date7.; format date date7.; cards;
114 01MAR94 LGA LAX 1-10 Minutes Domestic 8 202 01MAR94 LGA ORD No Delay Domestic -5 622 01MAR94 LGA FRA No Delay International -5 132 01MAR94 LGA YYZ 11+ Minutes International 14 302 01MAR94 LGA WAS No Delay Domestic -2 114 02MAR94 LGA LAX No Delay Domestic 0 202 02MAR94 LGA ORD 1-10 Minutes Domestic 5 219 02MAR94 LGA LON 11+ Minutes International 18 622 02MAR94 LGA FRA No Delay International 0 132 02MAR94 LGA YYZ 1-10 Minutes International 5 271 02MAR94 LGA PAR 1-10 Minutes International 4
302 02MAR94 LGA WAS No Delay Domestic 0 114 03MAR94 LGA LAX No Delay Domestic -1 202 03MAR94 LGA ORD No Delay Domestic -1 219 03MAR94 LGA LON 1-10 Minutes International 4 622 03MAR94 LGA FRA No Delay International -2 132 03MAR94 LGA YYZ 1-10 Minutes International 6 271 03MAR94 LGA PAR 1-10 Minutes International 2 302 03MAR94 LGA WAS 1-10 Minutes Domestic 5 114 05MAR94 LGA LAX No Delay Domestic -2 202 06MAR94 LGA ORD No Delay Domestic 219 06MAR94 LGA LON 11+ Minutes International 132 06MAR94 LGA YYZ 1-10 Minutes International 302 06MAR94 LGA WAS 1-10 Minutes Domestic 622 07MAR94 LGA FRA 11+ Minutes International 132 07MAR94 LGA YYZ No Delay International 271 07MAR94 LGA PAR 1-10 Minutes International 302 07MAR94 LGA WAS No Delay Domestic ; run;
1.1 内连接 proc sql;
create table innerjoins as select a.*,b.*
from March a,Delay b
where a.flight=b.flight and a.date=b.date; quit;
1.2 外连接
1.2.1 左连接left join proc sql;
create table leftjoins as select *
from March a left join Delay b
on a.flight=b.flight and a.date=b.date; quit;
1.2.2 右连接right join proc sql;
create table rightjoins as select *
from March a right join Delay b
-3 27 7 1 21 -2 4 0

