#include
template
<
uint64_t
N
>
constexpr
uint64_t
fact
=
fact
<
N
-
1
>
*
N
;
template
<>
constexpr
uint64_t
fact
<
0
>
=
1
;
using
namespace
std
;
int
main
()
{
cout
<<
fact
<
20
>
<<
endl
;
return
0
;
}
別的騷操作咱也不會,就先在這裡放個模板的。
#include
int
main
()
{
long
long
_
=
_
^
_
;
long
long
a
=!
_
;
long
long
b
=
a
<<
a
;
long
long
c
=
b
^
a
;
long
long
d
=
b
<<
a
;
long
long
e
=
d
^
a
;
long
long
f
=
e
^
c
;
long
long
g
=
f
|
a
;
long
long
h
=
b
<<
b
;
long
long
i
=
g
&-
g
^
h
;
long
long
j
=
a
+
b
+
c
+
d
;
long
long
k
=
i
^
b
;
long
long
l
=
c
<<
b
;
long
long
m
=~
g
^~
j
;
long
long
n
=
h
|
_
|
d
|
_
|
b
;
long
long
o
=
c
|
(
c
>>
_
<<
b
);
long
long
p
=~
o
&
o
+
a
;
long
long
q
=!!
p
|
p
;
long
long
r
=
q
+
q
-
q
*
q
/
q
;
long
long
s
=++
r
+
a
;
long
long
t
=
s
^
g
;
printf
(
“%I64d”
,
a
*
b
*
c
*
d
*
e
*
f
*
g
*
h
*
i
*
j
*
k
*
l
*
m
*
n
*
o
*
p
*
q
*
r
*
s
*
t
);
}
數學家版本:
#include
#include
int
main
()
{
std
::
cout
<<
std
::
tgamma
(
20
+
1
)
<<
std
::
endl
;
}
語言學家版本:
#include
#include
template
<
std
::
size_t
。。。
I
>
constexpr
auto
foo
(
std
::
index_sequence
<
I
。。。
>
)
{
return
((
I
+
1
)
*
。。。);
}
int
main
()
{
std
::
cout
<<
foo
(
std
::
make_index_sequence
<
20
>
())
<<
std
::
endl
;
}
“快速”版本:
#include
#include
long
long
foo
(
int
a
,
int
b
,
std
::
future
<
long
long
>
last
=
std
::
async
(
std
::
integral_constant
<
long
long
,
1
>
()))
{
return
a
==
b
?
a
*
last
。
get
()
:
foo
((
a
+
b
)
/
2
+
1
,
b
,
std
::
async
(
foo
,
a
,
(
a
+
b
)
/
2
,
std
::
move
(
last
)));
}
int
main
()
{
std
::
cout
<<
foo
(
1
,
20
)
<<
std
::
endl
;
}
歷史學家版本:
#include
void
main
(
void
)
{
int
i
;
long
long
j
;
for
(
i
=
1
,
j
=
1
;
i
<=
20
;
j
*=
i
++
);
printf
(
“%lld”
,
j
);
}
敏捷開發上線1。0版本:
#include
int
main
()
{
//printf(“%d”, 1*2*3*4*5*6*7*8*9*10);
printf
(
“%lld”
,
(
long
long
)
1
*
2
*
3
*
4
*
5
*
6
*
7
*
8
*
9
*
10
*
11
*
12
*
13
*
14
*
15
*
16
*
17
*
18
*
19
*
20
);
}
面向物件專家版本:
#include
#include
#include
struct
IBaseInterface
{
virtual
~
IBaseInterface
()
=
0
;
};
inline
IBaseInterface
::~
IBaseInterface
()
=
default
;
struct
IDataProvider
:
virtual
public
IBaseInterface
{
virtual
int
first
()
=
0
;
virtual
int
last
()
=
0
;
virtual
int
next
(
int
v
)
=
0
;
};
struct
ICalculator
:
virtual
public
IBaseInterface
{
virtual
long
long
calc
(
IDataProvider
*
)
=
0
;
};
struct
IPrinter
:
virtual
public
IBaseInterface
{
virtual
void
(
const
std
::
string
&
)
=
0
;
};
struct
ISerializer
:
virtual
public
IBaseInterface
{
virtual
std
::
string
serialize
(
long
long
value
)
=
0
;
};
struct
IRunnable
:
virtual
public
IBaseInterface
{
virtual
void
run
()
=
0
;
};
class
Foo
:
virtual
public
IRunnable
{
std
::
shared_ptr
<
IDataProvider
>
m_dp
;
std
::
shared_ptr
<
ICalculator
>
m_c
;
std
::
shared_ptr
<
ISerializer
>
m_s
;
std
::
shared_ptr
<
IPrinter
>
m_p
;
public
:
Foo
(
std
::
shared_ptr
<
IDataProvider
>
dp
,
std
::
shared_ptr
<
ICalculator
>
c
,
std
::
shared_ptr
<
ISerializer
>
s
,
std
::
shared_ptr
<
IPrinter
>
p
)
:
m_dp
(
std
::
move
(
dp
)),
m_c
(
std
::
move
(
c
)),
m_s
(
std
::
move
(
s
)),
m_p
(
std
::
move
(
p
))
{}
void
run
()
override
{
return
m_p
->
(
m_s
->
serialize
(
m_c
->
calc
(
m_dp
。
get
())));
}
};
class
DefaultSerializer
:
virtual
public
ISerializer
{
public
:
std
::
string
serialize
(
long
long
value
)
override
{
return
std
::
to_string
(
value
);
}
};
class
StreamPrinter
:
virtual
public
IPrinter
{
std
::
ostream
&
m_os
;
public
:
explicit
StreamPrinter
(
std
::
ostream
&
os
)
:
m_os
(
os
)
{}
void
(
const
std
::
string
&
s
)
override
{
m_os
<<
s
<<
std
::
endl
;
}
};
class
MultiplyAccumulateCalculator
:
virtual
public
ICalculator
{
public
:
long
long
calc
(
IDataProvider
*
dp
)
override
{
int
i
=
dp
->
first
();
long
long
j
=
i
;
do
j
*=
(
i
=
dp
->
next
(
i
));
while
(
i
!=
dp
->
last
());
return
j
;
}
};
int
main
()
{
struct
MyDataProvider
:
virtual
public
IDataProvider
{
int
first
()
override
{
return
1
;
}
int
last
()
override
{
return
20
;
}
int
next
(
int
v
)
override
{
return
v
+
1
;
}
};
Foo
foo
(
std
::
make_shared
<
MyDataProvider
>
(),
std
::
make_shared
<
MultiplyAccumulateCalculator
>
(),
std
::
make_shared
<
DefaultSerializer
>
(),
std
::
make_shared
<
StreamPrinter
>
(
std
::
cout
));
foo
。
run
();
}
提前最佳化的並行版本:
#include
#include
double
foo
(
int
x
)
{
__m128
a
=
{
1。0f
,
2。0f
,
3。0f
,
4。0f
};
__m128
b
=
{
4。0f
,
4。0f
,
4。0f
,
4。0f
};
__m128
c
=
{
1。0f
,
1。0f
,
1。0f
,
1。0f
};
for
(
int
i
=
0
;
i
<
x
/
4
;
++
i
,
a
=
_mm_add_ps
(
a
,
b
))
c
=
_mm_mul_ps
(
c
,
a
);
for
(
int
i
=
x
%
4
;
i
<
4
;
++
i
)
a
[
i
]
=
1。0f
;
c
=
_mm_mul_ps
(
c
,
a
);
return
(
double
)
c
[
0
]
*
(
double
)
c
[
1
]
*
(
double
)
c
[
2
]
*
(
double
)
c
[
3
];
}
int
main
()
{
std
::
cout
<<
foo
(
20
)
<<
std
::
endl
;
}
“宏孩兒”超程式設計版:
#include
// 由於boost。preprocessor僅提供255以下的整數運算
// 所以使用sequence來 (十位個位)(千位百位)(十萬位萬位) 的方式來表示大整數。
// 不進位加法:(77)(66)(55) + (44)(33)(22) = (121)(99)(77)
#define PP_ADD_N_N_CARRY_OP(R, DATA, I, ELEM) (BOOST_PP_ADD(BOOST_PP_SEQ_ELEM(I, DATA), ELEM))
#define PP_ADD_N_N_CARRY(SEQ_A, SEQ_B) BOOST_PP_SEQ_FOR_EACH_I(PP_ADD_N_N_CARRY_OP, SEQ_A, SEQ_B)
// 進位加法:(121)(99)(77) = (21)(0)(78)
// 注意SEQ_A的長度要比SEQ_B長
#define PP_ADD_N_N_OP(S, STATE, ELEM_CARRY) \
BOOST_PP_SEQ_PUSH_FRONT( \
BOOST_PP_SEQ_REPLACE(STATE, 0, BOOST_PP_MOD(BOOST_PP_ADD(BOOST_PP_SEQ_HEAD(STATE), ELEM_CARRY), 100)), \
BOOST_PP_DIV(BOOST_PP_ADD(BOOST_PP_SEQ_HEAD(STATE), ELEM_CARRY), 100) \
)
#define PP_ADD_N_N(SEQ_A, SEQ_B) BOOST_PP_SEQ_REVERSE(BOOST_PP_SEQ_FOLD_LEFT(PP_ADD_N_N_OP, BOOST_PP_SEQ_NIL(0), PP_ADD_N_N_CARRY(SEQ_A, SEQ_B)))
// 沒什麼好說的,X*N = X+X+X+X+X+。。。+X
#define PP_MUL_N_1_EXP_OP(Z, I, DATA) (DATA)
#define PP_MUL_N_1_EXP(SEQ_N, N) BOOST_PP_REPEAT(N, PP_MUL_N_1_EXP_OP, SEQ_N)
#define PP_MUL_N_1_MYOP(S, STATE, ITEM) PP_ADD_N_N(STATE, ITEM)
#define PP_MUL_N_1_FWD(EXP) BOOST_PP_SEQ_FOLD_LEFT(PP_MUL_N_1_MYOP, BOOST_PP_SEQ_HEAD(EXP), BOOST_PP_SEQ_TAIL(EXP))
#define PP_MUL_N_1(SEQ_N, N) PP_MUL_N_1_FWD(PP_MUL_N_1_EXP(SEQ_N, N))
#define FACT5 PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1((1), 2), 3), 4), 5)
#define FACT10 PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(FACT5, 6), 7), 8), 9), 10)
#define FACT15 PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(FACT10, 11), 12), 13), 14), 15)
#define FACT20 PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(FACT15, 16), 17), 18), 19), 20)
#define FACT25 PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(PP_MUL_N_1(FACT20, 21), 22), 23), 24), 25)
static_assert
(
false
,
BOOST_PP_STRINGIZE
(
FACT10
));
警告:目前只有Clang能算出FACT20,編譯緩慢是十分正常的,請耐心等待。預設計算10的階乘10!=3628800,期待輸出:
error: static_assert failed “(0) (88) (62) (3) (0) (0) (0) (0) (0) (0)”
真·模板超程式設計版本(大整數)
#include
#include
#include
using
BaseType_t
=
long
long
;
constexpr
BaseType_t
lgBase
=
9
;
// 注意10000*10000剛剛好小於int的取值範圍
constexpr
BaseType_t
Base
=
1000000000
;
// 注意10000*10000剛剛好小於int的取值範圍
// 大整數的表示
template
<
BaseType_t
。。。
I
>
struct
BigInteger
{
using
type
=
BigInteger
;
};
// 連線
template
<
class
T1
,
class
T2
>
struct
BI_Cat
;
template
<
BaseType_t
。。。
I1
,
BaseType_t
。。。
I2
>
struct
BI_Cat
<
BigInteger
<
I1
。。。
>
,
BigInteger
<
I2
。。。
>>
:
BigInteger
<
I1
。。。,
I2
。。。
>
{};
// 左移一個單元(即*Base)
template
<
class
T
>
struct
BI_SHL
;
template
<
BaseType_t
。。。
I
>
struct
BI_SHL
<
BigInteger
<
I
。。。
>>
:
BigInteger
<
I
。。。,
0
>
{};
// 去除開頭的0
template
<
class
T
>
struct
BI_Remove_Zeros
:
T
{};
template
<
BaseType_t
。。。
I
>
struct
BI_Remove_Zeros
<
BigInteger
<
0
,
I
。。。
>>
:
BI_Remove_Zeros
<
BigInteger
<
I
。。。
>>
{};
// 填充0到N個單元
template
<
int
X
,
class
IS
>
struct
BI_Fill_Impl
;
template
<
int
X
,
class
T
,
T
。。。
I
>
struct
BI_Fill_Impl
<
X
,
std
::
integer_sequence
<
T
,
I
。。。
>>
:
BigInteger
<
(
I
,
X
)。。。
>
{};
template
<
int
Size
>
struct
BI_Fill_Zeros
:
BI_Fill_Impl
<
0
,
std
::
make_index_sequence
<
Size
>>
{};
template
<
class
T
,
int
N
>
struct
BI_Resize
;
template
<
BaseType_t
。。。
I
,
int
N
>
struct
BI_Resize
<
BigInteger
<
I
。。。
>
,
N
>
:
BI_Cat
<
typename
BI_Fill_Zeros
<
N
-
sizeof
。。。(
I
)
>::
type
,
BigInteger
<
I
。。。
>>
{};
// 返回較大的數值
template
<
int
A
,
int
B
>
struct
int_min
:
std
::
integral_constant
<
int
,
(
A
<
B
?
B
:
A
)
>
{};
// 非進位加法:先把兩個數的位數改成一樣的然後依次相加
template
<
class
A
,
class
B
,
class
ShouldResize
>
struct
BI_AddNotCarry_Impl
;
template
<
BaseType_t
。。。
I1
,
BaseType_t
。。。
I2
>
struct
BI_AddNotCarry_Impl
<
BigInteger
<
I1
。。。
>
,
BigInteger
<
I2
。。。
>
,
std
::
true_type
>
:
BigInteger
<
(
I1
+
I2
)。。。
>
{};
template
<
BaseType_t
。。。
I1
,
BaseType_t
。。。
I2
>
struct
BI_AddNotCarry_Impl
<
BigInteger
<
I1
。。。
>
,
BigInteger
<
I2
。。。
>
,
std
::
false_type
>
:
BI_AddNotCarry_Impl
<
typename
BI_Resize
<
BigInteger
<
I1
。。。
>
,
int_min
<
sizeof
。。。(
I1
),
sizeof
。。。(
I2
)
>::
value
>::
type
,
typename
BI_Resize
<
BigInteger
<
I2
。。。
>
,
int_min
<
sizeof
。。。(
I1
),
sizeof
。。。(
I2
)
>::
value
>::
type
,
std
::
true_type
>
{};
template
<
class
A
,
class
B
>
struct
BI_AddNotCarry
;
template
<
BaseType_t
。。。
I1
,
BaseType_t
。。。
I2
>
struct
BI_AddNotCarry
<
BigInteger
<
I1
。。。
>
,
BigInteger
<
I2
。。。
>>
:
BI_AddNotCarry_Impl
<
BigInteger
<
I1
。。。
>
,
BigInteger
<
I2
。。。
>
,
std
::
bool_constant
<
sizeof
。。。(
I1
)
==
sizeof
。。。(
I2
)
>>
{};
// 判斷是否為0
template
<
class
Y
>
struct
BI_IsZero
;
template
<
BaseType_t
。。。
I
>
struct
BI_IsZero
<
BigInteger
<
I
。。。
>>
:
std
::
bool_constant
<
((
I
==
0
)
&&
。。。)
>
{};
// 自動進位
template
<
class
A
>
struct
BI_Carry
;
template
<
class
A
,
class
B
>
struct
BI_Add
:
BI_Carry
<
typename
BI_AddNotCarry
<
A
,
B
>::
type
>
{};
template
<
class
Mod
,
class
Div
,
class
ShouldCalc
=
typename
BI_IsZero
<
Div
>::
type
>
struct
BI_Carry_Impl
;
template
<
class
Mod
,
class
Div
>
struct
BI_Carry_Impl
<
Mod
,
Div
,
std
::
true_type
>
:
Mod
{};
template
<
class
Mod
,
class
Div
>
struct
BI_Carry_Impl
<
Mod
,
Div
,
std
::
false_type
>
:
BI_Add
<
Mod
,
typename
BI_SHL
<
Div
>::
type
>
{};
template
<
BaseType_t
。。。
I
>
struct
BI_Carry
<
BigInteger
<
I
。。。
>>
:
BI_Remove_Zeros
<
typename
BI_Carry_Impl
<
BigInteger
<
(
I
%
Base
)。。。
>
,
BigInteger
<
(
I
/
Base
)。。。
>>::
type
>
{};
// 乘以X並自動進位
template
<
class
A
,
int
X
>
struct
BI_MulX
;
template
<
BaseType_t
。。。
I1
,
int
X
>
struct
BI_MulX
<
BigInteger
<
I1
。。。
>
,
X
>
:
BI_Carry
<
BigInteger
<
(
I1
*
X
)。。。
>>
{};
// 計算階乘
template
<
int
X
>
struct
BI_Fact
:
BI_MulX
<
typename
BI_Fact
<
X
-
1
>::
type
,
X
>
{};
template
<>
struct
BI_Fact
<
0
>
:
BigInteger
<
1
>
{};
template
<
BaseType_t
。。。
I
>
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
BigInteger
<
I
。。。
>
)
{
return
((
out
<<
std
::
setfill
(
‘0’
)
<<
I
<<
std
::
setw
(
lgBase
)),
。。。);
}
int
main
()
{
std
::
cout
<<
typename
BI_Fact
<
20
>::
type
()
<<
std
::
endl
;
}
如果將BI_Fact<20>改為BI_Fact<1000>後,我們可愛的Clang直譯器花了3秒多的時間很偷稅地算出來了1000! =
402387260077093773543702433923003985719374864210714632543799910429938512398629020592044208486969404800479988610197196058631666872994808558901323829669944590997424504087073759918823627727188732519779505950995276120874975462497043601418278094646496291056393887437886487337119181045825783647849977012476632889835955735432513185323958463075557409114262417474349347553428646576611667797396668820291207379143853719588249808126867838374559731746136085379534524221586593201928090878297308431392844403281231558611036976801357304216168747609675871348312025478589320767169132448426236131412508780208000261683151027341827977704784635868170164365024153691398281264810213092761244896359928705114964975419909342221566832572080821333186116811553615836546984046708975602900950537616475847728421889679646244945160765353408198901385442487984959953319101723355556602139450399736280750137837615307127761926849034352625200015888535147331611702103968175921510907788019393178114194545257223865541461062892187960223838971476088506276862967146674697562911234082439208160153780889893964518263243671616762179168909779911903754031274622289988005195444414282012187361745992642956581746628302955570299024324153181617210465832036786906117260158783520751516284225540265170483304226143974286933061690897968482590125458327168226458066526769958652682272807075781391858178889652208164348344825993266043367660176999612831860788386150279465955131156552036093988180612138558600301435694527224206344631797460594682573103790084024432438465657245014402821885252470935190620929023136493273497565513958720559654228749774011413346962715422845862377387538230483865688976461927383814900140767310446640259899490222221765904339901886018566526485061799702356193897017860040811889729918311021171229845901641921068884387121855646124960798722908519296819372388642614839657382291123125024186649353143970137428531926649875337218940694281434118520158014123344828015051399694290153483077644569099073152433278288269864602789864321139083506217095002597389863554277196742822248757586765752344220207573630569498825087968928162753848863396909959826280956121450994871701244516461260379029309120889086942028510640182154399457156805941872748998094254742173582401063677404595741785160829230135358081840096996372524230560855903700624271243416909004153690105933983835777939410970027753472000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
無人值守自動迭代版本
#include
#include
#include
#include
int
main
()
{
std
::
vector
<
int
>
v
(
std
::
atoi
(
std
::
end
(
__DATE__
)
-
(
__LINE__
)
/
2
));
// 2020年,第六行
std
::
iota
(
v
。
begin
(),
v
。
end
(),
1
);
std
::
cout
<<
std
::
accumulate
(
v
。
begin
(),
v
。
end
(),
1ull
,
std
::
multiplies
<>
())
<<
std
::
endl
;
}
模板超程式設計版本(二進位制)
#include
#include
#include
using
Zero
=
std
::
integer_sequence
<
bool
>
;
using
One
=
std
::
integer_sequence
<
bool
,
true
>
;
template
<
class
T
>
struct
type_identity
:
std
::
enable_if
<
true
,
T
>
{};
// *2
template
<
class
T
>
struct
shl
;
template
<
bool
。。。
B
>
struct
shl
<
std
::
integer_sequence
<
bool
,
B
。。。
>>
:
type_identity
<
std
::
integer_sequence
<
bool
,
false
,
B
。。。
>>
{};
// *2+1
template
<
class
T
>
struct
shl_inc
;
template
<
bool
。。。
B
>
struct
shl_inc
<
std
::
integer_sequence
<
bool
,
B
。。。
>>
:
type_identity
<
std
::
integer_sequence
<
bool
,
true
,
B
。。。
>>
{};
// /2
template
<
class
T
>
struct
shr
;
template
<
bool
First
,
bool
。。。
Rest
>
struct
shr
<
std
::
integer_sequence
<
bool
,
First
,
Rest
。。。
>>
:
type_identity
<
std
::
integer_sequence
<
bool
,
Rest
。。。
>>
{};
// +1
template
<
class
T
>
struct
inc
;
template
<>
struct
inc
<
Zero
>
:
type_identity
<
One
>
{};
template
<
bool
。。。
B
>
struct
inc
<
std
::
integer_sequence
<
bool
,
true
,
B
。。。
>>
:
shl
<
typename
inc
<
std
::
integer_sequence
<
bool
,
B
。。。
>>::
type
>
{};
template
<
bool
。。。
B
>
struct
inc
<
std
::
integer_sequence
<
bool
,
false
,
B
。。。
>>
:
type_identity
<
std
::
integer_sequence
<
bool
,
true
,
B
。。。
>>
{};
// -1
template
<
class
T
>
struct
dec
;
template
<
bool
。。。
B
>
struct
dec
<
std
::
integer_sequence
<
bool
,
true
,
B
。。。
>>
:
type_identity
<
std
::
integer_sequence
<
bool
,
false
,
B
。。。
>>
{};
template
<
bool
。。。
B
>
struct
dec
<
std
::
integer_sequence
<
bool
,
false
,
B
。。。
>>
:
shl_inc
<
typename
dec
<
std
::
integer_sequence
<
bool
,
B
。。。
>>::
type
>
{};
// 取最低位
template
<
class
T
>
struct
lowest
:
std
::
false_type
{};
template
<
bool
First
,
bool
。。。
Rest
>
struct
lowest
<
std
::
integer_sequence
<
bool
,
First
,
Rest
。。。
>>
:
std
::
bool_constant
<
First
>
{};
// 擴充套件位數
template
<
class
T
>
struct
ext
;
template
<
bool
。。。
B
>
struct
ext
<
std
::
integer_sequence
<
bool
,
B
。。。
>>
:
type_identity
<
std
::
integer_sequence
<
bool
,
B
。。。,
false
>>
{};
// 比較兩個數是否相等
template
<
class
A
,
class
B
>
struct
test
;
template
<
bool
。。。
B1
,
bool
。。。
B2
>
struct
test
<
std
::
integer_sequence
<
bool
,
B1
。。。
>
,
std
::
integer_sequence
<
bool
,
B2
。。。
>>
:
std
::
bool_constant
<
(。。。
||
(
B1
||
B2
))
>
{};
// 計算位數大小
template
<
class
T
>
struct
len
;
template
<>
struct
len
<
Zero
>
:
std
::
integral_constant
<
std
::
size_t
,
0
>
{};
template
<
bool
。。。
B
>
struct
len
<
std
::
integer_sequence
<
bool
,
B
。。。
>>
:
std
::
integral_constant
<
std
::
size_t
,
sizeof
。。。(
B
)
>
{};
// 去除多餘的高位
template
<
class
T
,
bool
not_empty
=
test
<
T
,
T
>::
value
>
struct
shrink
:
type_identity
<
Zero
>
{};
template
<
bool
。。。
B
>
struct
shrink
<
std
::
integer_sequence
<
bool
,
false
,
B
。。。
>
,
true
>
:
shl
<
typename
shrink
<
std
::
integer_sequence
<
bool
,
B
。。。
>>::
type
>
{};
template
<
bool
。。。
B
>
struct
shrink
<
std
::
integer_sequence
<
bool
,
true
,
B
。。。
>
,
true
>
:
shl_inc
<
typename
shrink
<
std
::
integer_sequence
<
bool
,
B
。。。
>>::
type
>
{};
// 實現超前進位加法器
template
<
class
A
,
class
B
,
bool
loop
=
test
<
B
,
B
>::
value
>
struct
add_impl
:
type_identity
<
A
>
{};
template
<
bool
。。。
B1
,
bool
。。。
B2
>
struct
add_impl
<
std
::
integer_sequence
<
bool
,
B1
。。。
>
,
std
::
integer_sequence
<
bool
,
B2
。。。
>
,
true
>
:
add_impl
<
std
::
integer_sequence
<
bool
,
(
B1
^
B2
)。。。,
false
>
,
std
::
integer_sequence
<
bool
,
false
,
(
B1
&
B2
)。。。
>>
{};
// 加法時處理位數不同的情況
template
<
class
A
,
class
B
,
bool
=
(
len
<
A
>::
value
>
len
<
B
>::
value
)
>
struct
add_fill2
:
add_impl
<
A
,
B
>
{};
template
<
class
A
,
class
B
>
struct
add_fill2
<
A
,
B
,
true
>
:
add_fill2
<
A
,
typename
ext
<
B
>::
type
>
{};
template
<
class
A
,
class
B
,
bool
=
(
len
<
A
>::
value
<
len
<
B
>::
value
)
>
struct
add
:
add_fill2
<
A
,
B
>
{};
template
<
class
A
,
class
B
>
struct
add
<
A
,
B
,
true
>
:
add
<
typename
ext
<
A
>::
type
,
B
>
{};
// 實現乘法器
template
<
class
A
,
class
B
,
bool
add
=
lowest
<
B
>::
value
>
struct
mul
;
template
<
class
A
>
struct
mul
<
A
,
Zero
,
false
>
:
type_identity
<
Zero
>
{};
template
<
class
A
,
class
B
>
struct
mul
<
A
,
B
,
false
>
:
mul
<
typename
shl
<
A
>::
type
,
typename
shr
<
B
>::
type
>
{};
template
<
class
A
,
class
B
>
struct
mul
<
A
,
B
,
true
>
:
add
<
typename
mul
<
typename
shl
<
A
>::
type
,
typename
shr
<
B
>::
type
>::
type
,
A
>
{};
// 計算階乘
template
<
class
T
>
struct
greater_than_one
:
test
<
typename
dec
<
T
>::
type
,
typename
dec
<
T
>::
type
>
{};
template
<
class
T
>
struct
fact
:
shrink
<
typename
mul
<
T
,
typename
fact
<
typename
shrink
<
typename
dec
<
T
>::
type
>::
type
>::
type
>::
type
>
{};
template
<>
struct
fact
<
One
>
:
type_identity
<
One
>
{};
// 轉換為bitset輸出
template
<
bool
。。。
B
,
std
::
size_t
。。。
I
>
auto
ToBitSet
(
std
::
integer_sequence
<
bool
,
B
。。。
>
b
,
std
::
integer_sequence
<
std
::
size_t
,
I
。。。
>
)
{
std
::
bitset
<
sizeof
。。。(
B
)
>
ret
;
(。。。,
ret
。
set
(
I
,
B
));
return
ret
;
}
template
<
bool
。。。
B
>
auto
ToBitSet
(
std
::
integer_sequence
<
bool
,
B
。。。
>
b
)
{
return
ToBitSet
(
b
,
std
::
make_index_sequence
<
sizeof
。。。(
B
)
>
());
}
int
main
()
{
using
F20
=
fact
<
std
::
integer_sequence
<
bool
,
false
,
false
,
true
,
false
,
true
>>::
type
;
std
::
cout
<<
ToBitSet
(
F20
())。
to_ullong
()
<<
std
::
endl
;
//using F31 = fact
//std::cout << ToBitSet(F31())。to_string() << std::endl;
}
注:目前極限是計算到31的階乘,使用MSVC編譯31的階乘需30G記憶體。
更新:
2020-1-12 將“歷史學家”版本修改為void main(void)更具有歷史氣息
2020-1-16 將“快速”二分版本修改為「偽」遞迴呼叫
2020-1-18 並行版本
2020-2-22 宏超程式設計版本
2020-2-24 模板超程式設計版本(大整數)
2020-3-14 無人值守自動迭代版本
2021-10-12 模板超程式設計版本(二進位制)
一、可以寫成一個Y組合子:
// NOTE: Only compatible with C++14 or later versions。
#include
#include
int
main
(
int
argc
,
char
*
argv
[])
{
// y = λf。 (λx。 x x) (λx。 f (λn。 (x x) n))
const
auto
y
=
[]
(
const
auto
&
f
)
{
return
[
&
f
]
(
const
auto
&
x
)
{
return
x
(
x
);
}
([
&
f
]
(
const
auto
&
x
)
->
std
::
function
<
unsigned
long
long
(
unsigned
long
long
)
>
{
return
f
([
&
x
]
(
unsigned
long
long
n
)
{
return
x
(
x
)(
n
);
});
});
};
// almost_fac = λf。 λn。 if n > 0 then n × f (n - 1) else 1
const
auto
almost_fac
=
[]
(
auto
&&
f
)
{
return
[
f
]
(
unsigned
long
long
n
)
{
return
n
>
0
?
n
*
f
(
n
-
1
)
:
1
;
};
};
std
::
cout
<<
y
(
almost_fac
)(
20
)
<<
std
::
endl
;
return
0
;
}
二、可以跟風樓上守序善良一下:
// NOTE: Only tested on x86-64 Linux / macOS。
int
main
(
int
argc
,
char
*
argv
[])
{
asm
volatile
(
R
“(
movq
$
1
,
%
rax
movq
$
1
,
%
rcx
fac
:
mulq
%
rcx
incq
%
rcx
cmpq
$
20
,
%
rcx
jle
fac
movq
$
10
,
%
rcx
movq
%
rsp
,
%
rbx
movb
$
10
,
(
%
rsp
)
subq
$
8
,
%
rsp
digit
:
xorq
%
rdx
,
%
rdx
divq
%
rcx
addq
$
48
,
%
rdx
movb
%
dl
,
(
%
rsp
)
subq
$
8
,
%
rsp
testq
%
rax
,
%
rax
jnz
digit
movq
$
1
,
%
rdi
movq
%
rsp
,
%
rsi
:
addq
$
8
,
%
rsi
movq
$
1
,
%
rdx
)
”
#ifdef __APPLE__
“movq $0x2000004, %rax”
#else
“movq $1, %rax”
#endif
R
“(
syscall
movq
%
rsi
,
%
rsp
cmpq
%
rsi
,
%
rbx
jg
)
”
#ifdef __APPLE__
“movq $0x2000001, %rax”
#else
“movq $60, %rax”
#endif
R
“(
xorq
%
rdi
,
%
rdi
syscall
)
”);
}
三、上面的程式碼還可以更善良一些:
// NOTE: Only tested on x86-64 Linux / macOS。
constexpr
char
c
[]
=
“
\x48\x31\xc0\x48\x31\xc9\x48\xff\xc0\x48\xff\xc1\x48\xf7\xe1\x48\xff\xc1
”
“
\x48\x83\xf9\x14\x0f\x8e\xf0\xff\xff\xff\x48\x31\xc9\x48\x83\xc1\x0a\x48
”
“
\x89\xe3\xc6\x04\x24\x0a\x48\x83\xec\x08\x48\x31\xd2\x48\xf7\xf1\x48\x83
”
“
\xc2\x30\x88\x14\x24\x48\x83\xec\x08\x48\x85\xc0\x0f\x85\xe6\xff\xff\xff
”
“
\x48\x31\xff\x48\xff\xc7\x48\x89\xe6\x48\x83\xc6\x08\x48\x31\xd2\x48\xff
”
“
\xc2
”
#ifdef __APPLE__
“
\x48\xc7\xc0\xff\xff\xff\x01\x48\x83\xc0\x05\x0f\x05\x48\x89\xf4\x48\x39
”
“
\xf3\x90\x0f\x8f\xdc\xff\xff\xff\x48\xc7\xc0\xff\xff\xff\x01\x48\x83\xc0
”
“
\x02
”
#else
“
\x48\x31\xc0\x48\xff\xc0\x0f\x05\x48\x89\xf4\x48\x39\xf3\x0f\x8f\xe2\xff
”
“
\xff\xff\x48\x31\xc0\x48\x83\xc0\x3b\x48\xff\xc0
”
#endif
“
\x48\x31\xff\x0f\x05
”
;
int
main
(
int
argc
,
char
*
argv
[])
{
void
*
b
=
nullptr
;
#ifdef __clang__
*
(
&
b
+
4
)
=
(
void
*
)
&
c
;
#else
*
(
&
b
+
2
)
=
(
void
*
)
&
c
;
#endif
return
0
;
}
四、補一個模版,可惜不是很想寫遞迴:
// NOTE: Only compatible with C++17 or later versions。
#include
#include
#include
namespace
{
template
<
typename
T
>
struct
FactorialHelper
;
template
<
unsigned
long
long
。。。
Ns
>
struct
FactorialHelper
<
std
::
integer_sequence
<
unsigned
long
long
,
Ns
。。。
>>
:
public
std
::
integral_constant
<
unsigned
long
long
,
((
Ns
+
1
)
*
。。。)
>
{};
template
<
unsigned
long
long
N
>
using
Factorial
=
FactorialHelper
<
std
::
make_integer_sequence
<
unsigned
long
long
,
N
>>
;
}
// namespace
int
main
(
int
argc
,
char
*
argv
[])
{
std
::
cout
<<
Factorial
<
20
>::
value
<<
std
::
endl
;
return
0
;
}
五、模板也可以這麼寫:
// NOTE: Only compatible with C++17 or later versions。
#include
#include
#include
#include
namespace
{
constexpr
size_t
kMaxN
=
20
;
template
<
size_t
N
>
struct
Flag
{
friend
constexpr
int
AdlFlag
(
Flag
<
N
>
);
};
template
<
typename
T
>
struct
Writer
{
friend
constexpr
int
AdlFlag
(
T
)
{
return
0
;
}
};
template
<
typename
T
,
int
=
AdlFlag
(
T
{})
>
constexpr
size_t
IsAdlFlagDefined
(
int
)
{
return
1
;
}
template
<
typename
T
>
constexpr
size_t
IsAdlFlagDefined
(。。。)
{
return
0
;
}
template
<
size_t
Z
,
size_t
。。。
Sizes
,
size_t
R
=
((
IsAdlFlagDefined
<
Flag
<
Z
+
Sizes
>>
(
0
))
+
。。。)
>
constexpr
size_t
CountDefinedAdlFlags
(
std
::
integer_sequence
<
size_t
,
Sizes
。。。
>
)
{
return
R
;
}
template
<
size_t
MaxN
=
kMaxN
,
size_t
Z
=
0
,
typename
T
=
std
::
make_integer_sequence
<
size_t
,
MaxN
-
1
>
,
size_t
CounterMinus1
=
CountDefinedAdlFlags
<
Z
>
(
T
{}),
size_t
=
sizeof
(
Writer
<
Flag
<
CounterMinus1
>>
)
>
using
Counter
=
std
::
integral_constant
<
unsigned
long
long
,
CounterMinus1
+
1
>
;
}
// namespace
int
main
(
int
argc
,
char
*
argv
[])
{
// NOTE: Each ‘Counter<>::value’ is evaluated separately during the compile
// time thus the following lines cannot be shortened by loops or recursions。
std
::
cout
<<
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
*
Counter
<>::
value
<<
std
::
endl
;
return
0
;
}
六、最後,別光折騰CPU,GPU也別讓它閒著:
// NOTE: Only tested on x86-64 macOS。
#include
#include
#ifdef __APPLE__
#include
#else
#include
#endif
#include
#include
namespace
{
constexpr
unsigned
long
long
kN
=
20
;
constexpr
cl_uint
kMaxNumPlatforms
=
1
;
constexpr
cl_uint
kMaxNumDevices
=
16
;
constexpr
char
kKernelSource
[]
=
R
“(
__kernel
void
factorial
(
__global
long
unsigned
int
*
buffer
,
const
long
unsigned
int
n
)
{
const
long
unsigned
int
i
=
get_global_id
(
0
);
buffer
[
i
]
=
i
*
(
i
<
n
)
+
1
;
for
(
long
unsigned
int
stride
=
1
;
stride
<
n
;
stride
<<=
1
)
{
buffer
[
i
]
*=
buffer
[
i
+
stride
];
}
}
)
”;
void
check
(
const
bool
condition
,
const
char
*
err_msg
)
{
if
(
!
condition
)
{
std
::
cerr
<<
err_msg
<<
std
::
endl
;
exit
(
EXIT_FAILURE
);
}
}
void
check
(
const
cl_int
err
,
const
char
*
err_msg
)
{
if
(
err
!=
CL_SUCCESS
)
{
std
::
ostringstream
merged_err_msg
;
merged_err_msg
<<
err_msg
<<
“ With error: ”
<<
err
;
check
(
/* condition = */
false
,
merged_err_msg
。
str
()。
c_str
());
}
}
}
// namespace
int
main
(
int
argc
,
char
*
argv
[])
{
cl_platform_id
platforms
[
kMaxNumPlatforms
];
cl_uint
num_platforms
;
cl_int
err
=
clGetPlatformIDs
(
kMaxNumPlatforms
,
platforms
,
&
num_platforms
);
check
(
err
==
CL_SUCCESS
&&
num_platforms
>
0
,
“Failed to find platforms。”
);
cl_device_id
device_ids
[
kMaxNumDevices
];
cl_uint
num_devices
;
err
=
clGetDeviceIDs
(
platforms
[
0
],
CL_DEVICE_TYPE_GPU
,
kMaxNumDevices
,
device_ids
,
&
num_devices
);
check
(
err
,
“Failed to find devices。”
);
const
size_t
which_device
=
num_devices
-
1
;
const
cl_device_id
device_id
=
device_ids
[
which_device
];
const
cl_context
context
=
clCreateContext
(
/* properties = */
nullptr
,
/* num_devices = */
1
,
&
device_id
,
/* devices = */
nullptr
,
/* pfn_notify = */
nullptr
,
&
err
);
check
(
err
,
“Failed to create context。”
);
const
cl_command_queue
commands
=
clCreateCommandQueue
(
context
,
device_ids
[
which_device
],
/* cl_command_queue_properties = */
0
,
&
err
);
check
(
err
,
“Failed to create command queue。”
);
const
char
*
src
=
kKernelSource
;
const
cl_program
program
=
clCreateProgramWithSource
(
context
,
/* count= */
1
,
static_cast
<
const
char
**>
(
&
src
),
/* lengths = */
0
,
&
err
);
check
(
err
,
“Failed to create program。”
);
err
=
clBuildProgram
(
program
,
/* num_devices = */
0
,
/* device_list = */
nullptr
,
/* options = */
nullptr
,
/* pfn_notify = */
nullptr
,
/* user_data = */
nullptr
);
check
(
err
,
“Failed to build program。”
);
const
cl_kernel
kernel
=
clCreateKernel
(
program
,
“factorial”
,
&
err
);
check
(
err
,
“Failed to create kernel。”
);
const
size_t
log2n
=
static_cast
<
size_t
>
(
std
::
ceil
(
std
::
log2
(
kN
)));
const
size_t
num_threads
=
1
<<
log2n
;
const
size_t
buffer_size
=
num_threads
*
sizeof
(
unsigned
long
long
);
const
cl_mem
buffer
=
clCreateBuffer
(
context
,
CL_MEM_READ_WRITE
,
buffer_size
,
/* host_ptr = */
nullptr
,
&
err
);
check
(
err
,
“Failed to create buffer。”
);
err
=
clSetKernelArg
(
kernel
,
/* arg_index = */
0
,
sizeof
(
buffer
),
&
buffer
);
check
(
err
,
“Failed to set kernel arg ‘buffer’。”
);
err
=
clSetKernelArg
(
kernel
,
/* arg_index = */
1
,
sizeof
(
kN
),
&
kN
);
check
(
err
,
“Failed to set kernel arg ‘n’。”
);
size_t
local
;
err
=
clGetKernelWorkGroupInfo
(
kernel
,
device_id
,
CL_KERNEL_WORK_GROUP_SIZE
,
sizeof
(
local
),
&
local
,
/* param_value_size_ret = */
nullptr
);
check
(
err
,
“Failed to get kernel work group info。”
);
const
size_t
global
=
local
*
static_cast
<
size_t
>
(
std
::
ceil
(
num_threads
*
1。0
/
local
));
err
=
clEnqueueNDRangeKernel
(
commands
,
kernel
,
/* work_dim = */
1
,
/* global_work_offset = */
nullptr
,
&
global
,
&
local
,
/* num_events_in_wait_list = */
0
,
/* event_wait_list = */
nullptr
,
/* event = */
nullptr
);
check
(
err
,
“Failed to execute kernel。”
);
check
(
clFinish
(
commands
),
“Failed to finish commands。”
);
unsigned
long
long
result
;
err
=
clEnqueueReadBuffer
(
commands
,
buffer
,
CL_TRUE
,
/* offset = */
0
,
sizeof
(
unsigned
long
long
),
&
result
,
/* num_events_in_wait_list = */
0
,
/* event_wait_list = */
nullptr
,
/* event = */
nullptr
);
check
(
err
,
“Failed to read buffer。”
);
std
::
cout
<<
result
<<
std
::
endl
;
check
(
clReleaseMemObject
(
buffer
),
“Failed to release buffer。”
);
check
(
clReleaseProgram
(
program
),
“Failed to release program。”
);
check
(
clReleaseKernel
(
kernel
),
“Failed to release kernel。”
);
check
(
clReleaseCommandQueue
(
commands
),
“Failed to release command queue。”
);
check
(
clReleaseContext
(
context
),
“Failed to release context。”
);
return
0
;
}
寫這段程式碼的時候還以為不能用CUDA,因為它是C++的超集。感謝 @D Flip Flop 大神指出可以用CUDA Thrust,他的寫法比我手寫OpenCL簡潔太多了:
如何優雅地利用c++程式設計從1乘到20?
伽瑪函式唄。應該是最優雅的了。
#include
return
std
::
tgamma
(
20
+
1
)
;
能用標準庫解決的就標準庫,我覺得這就是優雅。