金字塔分组算法-白红宇

金字塔分组算法

阅读量：4214 次

发布时间：2019-05-26

本文共 4118 字，大约阅读时间需要 13 分钟。

USE [RuleSet]

/****** Object: StoredProcedure [dbo].[P_SplitTable] Script Date: 2016/10/15 9:02:51 ******/

SET ANSI_NULLS ON

SET QUOTED_IDENTIFIER ON

ALTER proc [dbo].[P_SplitTable]

(

@table nvarchar(4000) --需要分析的查询支持跨库如select* from database.dbo.tablex

,@column nvarchar(200) --需要分析的字段，必须是数值类型

,@split nvarchar(100)='1,2,7'

,@type tinyint =0 -- 0表示按照数据的数量分组。count(1),1表示按照金额汇总。sum(@column)

,@sort nvarchar(100)='desc' --表示对字段进行排序的排序类型逆序表示要找到最大的那些。顺序asc表示要找到最小的那些

)

数据分析----金字塔分组

根据列的值对表的数据进行百分比分组，如'1,2,7'代表按照数据

begin

--set nocount on ;

declare @t table(id int identity(1,1),splitdata float,foot float,per float);

if OBJECT_ID('tempdb..#t_1') is not null

drop table #t_1;

create table #t_1 (id int identity(1,1) primary key ,col float,total float,total_percent float);

declare @total float,@null float,@nsql nvarchar(4000),@m int,@n int,@cur float,@foot float;

--基础统计

set @nsql='select @total='+(case when @type=0 then 'count' when @type=1 then 'sum' end)+'('+quotename(@column)+')

,@null=count(1)-count('+quotename(@column)+')

from ('+@table+') as aa'

exec sp_executesql @nsql,N'@total float output,@null float output',@total output,@null output;

insert into @t(splitdata)

select splitdata from [dbo].[fnSplitString](@split,',');

--写入规模累计百分比

update b

set b.per=a.per

from

(select a.id,(sum(b.splitdata)/(select sum(splitdata) from @t)) as per

from @t a join @t b on a.id>=b.id

group by a.id) a

join @t b

on a.id=b.id

--数据写入

set @nsql='select '+quotename(@column)+' ,'+(case when @type=0 then 'count' when @type=1 then 'sum' end)+'('+quotename(@column)+')

from ('+@table+') as aa

group by '+quotename(@column)+' order by '+quotename(@column)+' '+@sort+' ';

insert into #t_1(col,total) exec(@nsql);

--计算累计值得算法。当大量数据的时候，记录数cross join 将无法正常执行。

-- 引入分批次的概念。在累加基础上考虑初值即可。

select @m=min(id),@n=max(id) from #t_1

while @m<=@n

begin

;

with tb

(select a.id,(sum(b.total)+isnull((select sum(total) from #t_1 where id<@m),0))/@total as total_percent

from #t_1 a join #t_1 b on

a.id>=@m and a.id<@m+500 and b.id>=@m and b.id<@m+500

and a.id>=b.id

group by a.id

)

--数据百分比

update b

set b.total_percent=a.total_percent

from

tb a

join #t_1 b

on a.id=b.id and b.id >=@m and b.id<@m+500;

--print @m;

set @m=@m+500

end

update b

set b.foot=a.col

from

(select a.id,c.col from @t a join #t_1 b on a.per>b.total_percent

join #t_1 c on c.id=b.id+1 and a.per<=c.total_percent

) a

join @t b

on a.id=b.id

select id,splitdata,foot,@null as blank from @t;

--select * from @t;

--select * from #t_1 order by col desc

;

end

--测试 count 的统计

declare @table nvarchar(4000) --需要分析的查询支持跨库如select* from database.dbo.tablex

,@column nvarchar(200) --需要分析的字段，必须是数值类型

,@split nvarchar(100)='1,2,7'

,@type tinyint =0 -- 0表示按照数据的数量分组。count(1),1表示按照金额汇总。sum(@column)

,@sort nvarchar(100)='desc' --表示对字段进行排序的排序类型逆序表示要找到最大的那些。顺序asc表示要找到最小的那些

set @table='select a.BO_ID,sum(b.nBalance) as nbalance

from bigdatabi..[NFB_BOAccount] a join bigdatabi..[NFB_Balance] b

on a.cAccount=b.cAccount and a.bUse=1

group by a.BO_ID

set @column='nbalance';

exec dbo.P_SplitTable @table,@column,@split,@type,@sort

-- 测试 sum的统计

declare @table nvarchar(4000) --需要分析的查询支持跨库如select* from database.dbo.tablex

,@column nvarchar(200) --需要分析的字段，必须是数值类型

,@split nvarchar(100)='7,2,1'

,@type tinyint =1 -- 0表示按照数据的数量分组。count(1),1表示按照金额汇总。sum(@column)

,@sort nvarchar(100)='desc' --表示对字段进行排序的排序类型逆序表示要找到最大的那些。顺序asc表示要找到最小的那些

set @table='select a.BO_ID,sum(b.nBalance) as nbalance

from bigdatabi..[NFB_BOAccount] a join bigdatabi..[NFB_Balance] b

on a.cAccount=b.cAccount and a.bUse=1

group by a.BO_ID

set @column='nbalance';

exec dbo.P_SplitTable @table,@column,@split,@type,@sort

select

(case when nbalance>470172523.44 then 'A'

when nbalance>1739249.07 then 'B'

when nbalance>250666.27 then 'C'

else 'D'

end)

as sig

,sum(nbalance)

from

(select a.BO_ID,sum(b.nBalance) as nbalance

from bigdatabi..[NFB_BOAccount] a join bigdatabi..[NFB_Balance] b

on a.cAccount=b.cAccount and a.bUse=1

group by a.BO_ID) as aa

group by

(case when nbalance>470172523.44 then 'A'

when nbalance>1739249.07 then 'B'

when nbalance>250666.27 then 'C'

else 'D'

end)

order by sig

转载地址：http://gqfmi.baihongyu.com/

你可能感兴趣的文章

test-definitions/blob/master/auto-test/docker/docker.sh

查看>>

test-definitions/blob/master/auto-test/dsdbench/dsdbench.sh

查看>>

test-definitions/blob/master/auto-test/dsdbench/dsdbench.sh

查看>>

test-definitions/blob/master/auto-test/dstat/dstat.sh

查看>>

test-definitions/blob/master/auto-test/etcd/etcd.sh

查看>>

test-definitions/blob/master/auto-test/ftp/ftp.sh

查看>>

test-definitions/blob/master/auto-test/gprof/gprof.sh

查看>>

test-definitions/blob/master/auto-test/hhvm/hhvm.sh

查看>>

test-definitions/blob/master/auto-test/httperf-client/httperf.sh

查看>>

test-definitions/blob/master/auto-test/iostat/iostat.sh

查看>>

test-definitions/blob/master/auto-test/kernel-compilation/kernel-compilation.sh

查看>>

内核ACPI函数API之acpi_platform_fill_resource

查看>>

内核ACPI函数API之acpi_create_platform_device

查看>>

ceph网络通信架构

查看>>

内核ACPI函数API之acpi_scan_add_handler

查看>>

内核ACPI函数API之acpi_bind_one和acpi_unbind_one

查看>>

bios 为硬盘中的grub生成入口

查看>>

kernel解析SRAT 表的个数限制是NR_NODE_MEMBLKS

查看>>

内核ACPI函数API之acpi_execute_simple_method

查看>>

内核ACPI函数API之acpi_bus_get_status

查看>>