解决arm64 linux kernel O0编译时报错的问题

最近在学习 linux driver 时, 在 qemu 上使用 aarch64 架构来跑 Image, 在想使用单步跑看看 driver 的某些执行路径时, 发现给 driver 添加 O0 编译时会有很多错误.

比如在 Linux-6.10 中, 我们在如下修改状态下, 使用命令 make LLVM=1 ARCH=arm64 defconfig && make LLVM=1 ARCH=arm64 Image -j12 编译的时候

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 2c7bf4da0..857020b8c 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -116,7 +116,7 @@ CONFIG_ACPI_APEI_MEMORY_FAILURE=y
CONFIG_ACPI_APEI_EINJ=y
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=y
-CONFIG_JUMP_LABEL=y
+# CONFIG_JUMP_LABEL is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_IOSCHED_BFQ=y
diff --git a/drivers/Makefile b/drivers/Makefile
index fe9ceb0d2..f60455123 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -193,3 +193,5 @@ obj-$(CONFIG_CDX_BUS) += cdx/
obj-$(CONFIG_DPLL) += dpll/

obj-$(CONFIG_S390) += s390/
+
+subdir-ccflags-y += -O0

会报出汗多的 error: invalid operand for inline asm constraint 'i' 错误. 下面我们就来探究下这个问题.

很多出错都指向了下面这个函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
static __always_inline bool
alternative_has_cap_likely(const unsigned long cpucap)
{
if (!cpucap_is_possible(cpucap))
return false;

asm goto(
ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
:
: [cpucap] "i" (cpucap)
:
: l_no);

return true;
l_no:
return false;
}

其传入了一个 cpucap, 在调用这个内联函数的时候, 传入给 cpucap 的都是一个编译时常量, 比如

1
2
3
#define ARM64_ALWAYS_BOOT 0

alternative_has_cap_likely(ARM64_ALWAYS_BOOT);

这样在调用的地方, 将这个内联函数展开, 此时 cpucap 就是一个编译时的常量. 因此在内嵌汇编 asm 块中可以将这个 cpucap 以 “i” 类型输入参数传递.

但是当我们关闭了优化(比如用 O0)编译的时候, 这个内联函数就不会被内联展开, 此时 cpucap 就是一个变量了, 变量是不能作为 “i” 类型输入参数传递给 asm 块的. 因此就会报错.

要解决这个问题, 我们需要修改下这种类型的内联函数的实现, 比如将这种内联函数修改成宏. 不过这个函数内部存在跳转. 我们修改的宏内部最好不要有这种跳转, 避免跳转标签发生冲突, 可以按照如下的方式来修改:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#define alternative_has_cap_likely(cpucap)                                \
({ \
int ret = true; \
if (cpucap_is_possible(cpucap)) { \
asm(ALTERNATIVE_CB("mov %w[ret], #0", %[_cpucap], \
alt_cb_patch_nops) \
: [ret] "+r"(ret) \
: [_cpucap] "i"(cpucap) \
:); \
} else { \
ret = false; \
} \
ret; \
})

修改之后, 我们将 goto 这个需求去掉了. 但是实现了同样的功能. 由于此时我们将内联函数改为了宏, 也就避开了 O0 编译时内联函数不展开的问题了.

最终的修改 patch 如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 2c7bf4da0..4b430913c 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -116,7 +116,6 @@ CONFIG_ACPI_APEI_MEMORY_FAILURE=y
CONFIG_ACPI_APEI_EINJ=y
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=y
-CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_IOSCHED_BFQ=y
@@ -563,6 +562,7 @@ CONFIG_SPI_SUN6I=y
CONFIG_SPI_TEGRA210_QUAD=m
CONFIG_SPI_TEGRA114=m
CONFIG_SPI_SPIDEV=m
+CONFIG_SPI_SLAVE=y
CONFIG_SPMI=y
CONFIG_SPMI_MTK_PMIF=m
CONFIG_PINCTRL_DA9062=m
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index d328f549b..17892cb57 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -223,41 +223,32 @@ alternative_endif

#include <linux/types.h>

-static __always_inline bool
-alternative_has_cap_likely(const unsigned long cpucap)
-{
- if (!cpucap_is_possible(cpucap))
- return false;
-
- asm goto(
- ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
- :
- : [cpucap] "i" (cpucap)
- :
- : l_no);
-
- return true;
-l_no:
- return false;
-}
-
-static __always_inline bool
-alternative_has_cap_unlikely(const unsigned long cpucap)
-{
- if (!cpucap_is_possible(cpucap))
- return false;
-
- asm goto(
- ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])
- :
- : [cpucap] "i" (cpucap)
- :
- : l_yes);
-
- return false;
-l_yes:
- return true;
-}
+#define alternative_has_cap_likely(cpucap) \
+ ({ \
+ int ret = true; \
+ if (cpucap_is_possible(cpucap)) { \
+ asm(ALTERNATIVE_CB("mov %w[ret], #0", %[_cpucap], \
+ alt_cb_patch_nops) \
+ : [ret] "+r"(ret) \
+ : [_cpucap] "i"(cpucap) \
+ :); \
+ } else { \
+ ret = false; \
+ } \
+ ret; \
+ })
+
+#define alternative_has_cap_unlikely(cpucap) \
+ ({ \
+ int ret = false; \
+ if (cpucap_is_possible(cpucap)) { \
+ asm(ALTERNATIVE("nop", "mov %w[ret], #1", %[_cpucap]) \
+ : [ret] "+r"(ret) \
+ : [_cpucap] "i"(cpucap) \
+ :); \
+ } \
+ ret; \
+ })

#endif /* __ASSEMBLY__ */

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8b904a757..aa26a1152 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -475,13 +475,12 @@ static __always_inline bool cpus_have_cap(unsigned int num)
*
* @num must be a compile-time constant.
*/
-static __always_inline bool cpus_have_final_boot_cap(int num)
-{
- if (boot_capabilities_finalized())
- return alternative_has_cap_unlikely(num);
- else
- BUG();
-}
+#define cpus_have_final_boot_cap(num) \
+ ({ \
+ if (!boot_capabilities_finalized()) \
+ BUG(); \
+ alternative_has_cap_unlikely(num); \
+ })

/*
* Test for a capability without a runtime check.
@@ -492,13 +491,12 @@ static __always_inline bool cpus_have_final_boot_cap(int num)
*
* @num must be a compile-time constant.
*/
-static __always_inline bool cpus_have_final_cap(int num)
-{
- if (system_capabilities_finalized())
- return alternative_has_cap_unlikely(num);
- else
- BUG();
-}
+#define cpus_have_final_cap(num) \
+ ({ \
+ if (!system_capabilities_finalized()) \
+ BUG(); \
+ alternative_has_cap_unlikely(num); \
+ })

static inline int __attribute_const__
cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
diff --git a/drivers/Makefile b/drivers/Makefile
index fe9ceb0d2..f60455123 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -193,3 +193,5 @@ obj-$(CONFIG_CDX_BUS) += cdx/
obj-$(CONFIG_DPLL) += dpll/

obj-$(CONFIG_S390) += s390/
+
+subdir-ccflags-y += -O0
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 7a8d486e7..342f9dadc 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -36,3 +36,5 @@ obj-y += qcom/
obj-y += smccc/
obj-y += tegra/
obj-y += xilinx/
+
+subdir-ccflags-y += -Og
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 0d872d4ef..baff0f30c 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -104,3 +104,5 @@ obj-$(CONFIG_NET_VENDOR_XILINX) += xilinx/
obj-$(CONFIG_NET_VENDOR_XIRCOM) += xircom/
obj-$(CONFIG_NET_VENDOR_SYNOPSYS) += synopsys/
obj-$(CONFIG_NET_VENDOR_PENSANDO) += pensando/
+
+subdir-ccflags-y += -Og
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 79edd5b5e..dba21fa6c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -509,7 +509,7 @@ static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)

static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
{
- struct qdisc_skb_cb *qcb;
+ struct qdisc_skb_cb *qcb __attribute__((unused));

BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb));
BUILD_BUG_ON(sizeof(qcb->data) < sz);