/* replace a given token in all the valid symbols. Use the sampled symbols * to update the counts */ staticvoidcompress_symbols(constunsignedchar *str, int idx) { unsignedint i, len, size; unsignedchar *p1, *p2;
for (i = 0; i < table_cnt; i++) {
len = table[i]->len; p1 = table[i]->sym;
/* find the token on the symbol */ p2 = find_token(p1, len, str); if (!p2) continue;
/* decrease the counts for this symbol's tokens */ forget_symbol(table[i]->sym, len);
if (!base_relative) output_label("kallsyms_addresses"); else output_label("kallsyms_offsets");
for (i = 0; i < table_cnt; i++) { if (base_relative) { /* * Use the offset relative to the lowest value * encountered of all relative symbols, and emit * non-relocatable fixed offsets that will be fixed * up at runtime. */
/* table of offset markers, that give the offset in the compressed stream * every 256 symbols */ markers = malloc(sizeof(unsignedint) * ((table_cnt + 255) / 256)); if (!markers) { fprintf(stderr, "kallsyms failure: " "unable to allocate required memory\n"); exit(EXIT_FAILURE); }
output_label("kallsyms_names"); off = 0; for (i = 0; i < table_cnt; i++) { if ((i & 0xFF) == 0) markers[i >> 8] = off; table[i]->seq = i;
/* There cannot be any symbol of length zero. */ if (table[i]->len == 0) { fprintf(stderr, "kallsyms failure: " "unexpected zero symbol length\n"); exit(EXIT_FAILURE); }
/* Only lengths that fit in up-to-two-byte ULEB128 are supported. */ if (table[i]->len > 0x3FFF) { fprintf(stderr, "kallsyms failure: " "unexpected huge symbol length\n"); exit(EXIT_FAILURE); }
/* Encode length with ULEB128. */ if (table[i]->len <= 0x7F) { /* Most symbols use a single byte for the length. */ printf("\t.byte 0x%02x", table[i]->len); off += table[i]->len + 1; } else { /* "Big" symbols use two bytes. */ printf("\t.byte 0x%02x, 0x%02x", (table[i]->len & 0x7F) | 0x80, (table[i]->len >> 7) & 0x7F); off += table[i]->len + 2; } for (k = 0; k < table[i]->len; k++) printf(", 0x%02x", table[i]->sym[k]); printf("\n"); } printf("\n");
output_label("kallsyms_markers"); for (i = 0; i < ((table_cnt + 255) >> 8); i++) printf("\t.long\t%u\n", markers[i]); printf("\n");
free(markers);
sort_symbols_by_name(); output_label("kallsyms_seqs_of_names"); for (i = 0; i < table_cnt; i++) printf("\t.byte 0x%02x, 0x%02x, 0x%02x\n", (unsignedchar)(table[i]->seq >> 16), (unsignedchar)(table[i]->seq >> 8), (unsignedchar)(table[i]->seq >> 0)); printf("\n");
output_label("kallsyms_token_table"); off = 0; for (i = 0; i < 256; i++) { best_idx[i] = off; expand_symbol(best_table[i], best_table_len[i], buf); printf("\t.asciz\t\"%s\"\n", buf); off += strlen(buf) + 1; } printf("\n");
output_label("kallsyms_token_index"); for (i = 0; i < 256; i++) printf("\t.short\t%d\n", best_idx[i]); printf("\n"); }
if bit_size: if bit_size notin (64, 32): exit( '[!] Please specify a register bit size of either 32 or 64 bits' ) else: self.is_64_bits = bit_size == 64
except KallsymsNotFoundException as first_error: # Maybe an OpenWRT kernel with an uncompressed kallsyms try: self.find_kallsyms_names_uncompressed() self.find_kallsyms_markers_uncompressed() self.uncompressed_kallsyms = True
deffind_kallsyms_token_table(self): """ kallsyms_token_table is an array of 256 variable length null- terminated string fragments. Positions which correspond to an ASCII character which is used in at least one symbol contain the corresponing character (1), other position contain a string fragment chosen by the compression algorithm (2). Hence, characters [0-9] and [a-z] are always present at their respective positions, but ":" (which comes after "9") never does. (1) See "insert_real_symbols_in_table" of "scripts/kallsyms.c" (2) See "optimize_result" of "scripts/kallsyms.c" """
position = 0
candidates_offsets = [] # offsets at which sequence_to_find was found candidates_offsets_followed_with_ascii = [] # variant with an higher certainty
sequence_to_find = b''.join( b'%c\0' % i for i inrange(ord('0'), ord('9') + 1) )
deffind_kallsyms_markers(self): """ kallsyms_markers contains one offset in kallsyms_names for each 1 in 256 entries of it. Offsets are stored as either ".long" (a Gnu AS type that corresponds for example to 4 bytes in x86_64) since kernel v4.20, either as the maximum register byte of the system (the C "long" type) on older kernels. Remember about the size of this field for later. The first index is always 0, it is sorted, and it is aligned. """
# Try possible sizes for the table element (long type) for table_element_size in (8, 4, 2): position = self.kallsyms_token_table__offset endianness_marker = '>'ifself.is_big_endian else'<' long_size_marker = {2: 'H', 4: 'I', 8: 'Q'}[table_element_size]
# Search for start of kallsyms_markers given first element is 0 and it is sorted for _ inrange(32): # 往前尝试 32 次搜索 kallsyms_markers 的第 0 个元素 position = self.kernel_img.rfind( b'\x00' * table_element_size, 0, position ) # 第 0 个和第 1 个元素有可能是 00 00 00 00, 00 00 xx xx ,这样查找到的不是第 0 个元素,所以向下对齐 position -= position % table_element_size # 只检查头 4 个元素,也就是 4*256=1024 个符号,一般 kallsyms 大小远大于此 entries = unpack_from( endianness_marker + '4' + long_size_marker, self.kernel_img, position, ) # 由于对齐了,再次检查可能的第 0 个元素是否为 0 if entries[0] != 0: continue
for i inrange(1, len(entries)): # kallsyms_names entries are at least 2 bytes and at most 0x3FFF bytes long # 由于这里是每 256 个元素的偏移,因此两个元素之间满足一定的大小关系 # 这里假设符号长度至少为 2 ,因此相隔 256 个符号偏移至少要相差 0x200 # 又假设符号长度不超过 0x3fff (这是 ULEB128 两个字符能表示的最大限度) # 因此 256 个符号偏移不能超过 0x4000 * 0x100 = 0x400000 # (这里不知道为啥少了个 0 ,但是一般符号不会太长所以也合理) # 即: entries[i] - entries[i - 1] 在 (0x200, 0x40000] 之间才符合条件,此处隐含了 > 0 if ( entries[i - 1] + 0x200 >= entries[i] or entries[i - 1] + 0x40000 < entries[i] ): break else: logging.info( '[+] Found kallsyms_markers at file offset 0x%08x' % position ) self.kallsyms_markers__offset = position self.offset_table_element_size = table_element_size return raise ValueError('Could not find kallsyms_markers')
deffind_kallsyms_names(self): position = self.kallsyms_markers__offset
# Approximate the position of kallsyms_names based on the # last entry of "kallsyms_markers" - we'll determine the # precise position in the next method
for i inrange(1, len(kallsyms_markers_entries)): curr = kallsyms_markers_entries[i] last = kallsyms_markers_entries[i - 1] if last + 0x200 >= curr or last + 0x40000 < curr: kallsyms_markers_entries = kallsyms_markers_entries[:i] break
# Symbol types are the same as exposed by "man nm" classKallsymsSymbolType(Enum): # Seen in actual kernels ABSOLUTE = 'A' BSS = 'B' DATA = 'D' RODATA = 'R' TEXT = 'T' WEAK_OBJECT_WITH_DEFAULT = 'V' WEAK_SYMBOL_WITH_DEFAULT = 'W'
token_table = self.get_token_table() possible_symbol_types = [i.value for i in KallsymsSymbolType]
dp = []
while needle == -1: position = self.kallsyms_names__offset
# Check whether this looks like the correct symbol # table, first depending on the beginning of the # first symbol (as this is where an uncertain gap # of 4 padding bytes may be present depending on # versions or builds), then thorough the whole # table. Raise an issue further in the code (in # another function) if an exotic kind of symbol is # found somewhere else than in the first entry.
# 找到一个可能是 kallsyms_names 开头的位置 # 要满足第一个字符是 type 的可能字符 # 这里似乎只假定这个符号长度是 <= 0x7f if ( not ( first_token_of_first_name[0].lower() in'uvw' and first_token_of_first_name[0] in possible_symbol_types ) and first_token_of_first_name[0].upper() notin possible_symbol_types ): # 不满足则往前进 4 ,因为对齐是 4 self.kallsyms_names__offset -= 4 ifself.kallsyms_names__offset < 0: raise ValueError('Could not find kallsyms_names') continue
# Each entry in the symbol table starts with a u8 size followed by the contents. # The table ends with an entry of size 0, and must lie before kallsyms_markers. # This for loop uses a bottom-up DP approach to calculate the numbers of symbols without recalculations. # dp[i] is the length of the symbol table given a starting position of "kallsyms_markers - i" # If the table position is invalid, i.e. it reaches out of bounds, the length is marked as -1. # The loop ends with the number of symbols for the current position in the last entry of dp.
# dp[i] 表示从 kallsyms_markers__offset-i 开始,如果可能为一个符号,则是倒数第几个符号,否则为 -1 # kallsyms_names 结束到 kallsyms_markers 开始中间可能有 0 作为 padding , # 每一个 0 都可以看作一个 uleb128 长度前缀的符号,也就是可以看作倒数第 0 个符号 # 是否可能作为符号,取决于当前位置作为符号,计算长度之后,没有超过 kallsyms_markers__offset # 如果没有超过,则 next_i 就是下一个符号的位置,dp[next_i] 是下一个符号的倒数序号 # 如果倒数序号不为 -1 就可以将当前位置看作是一个符号,其倒数序号为 dp[next_i] + 1 # 从 0 开始是因为 kallsyms_markers 这个位置必然是 0 ,如果 kallsyms_names 长度刚好对齐,那么至少要有一个 # 0 来作为起始位置 # 这个 dp 数组在循环中是复用的 for i inrange( len(dp), self.kallsyms_markers__offset - position + 1 ): curr = self.kernel_img[self.kallsyms_markers__offset - i] if curr & 0x80: # "Big" symbol symbol_size = ( curr & 0x7F | ( self.kernel_img[ self.kallsyms_markers__offset - i + 1 ] << 7 ) ) + 2 else: symbol_size = curr + 1 next_i = i - symbol_size if curr == 0: # Last entry of the symbol table dp.append(0if i <= 256else -1) elif ( next_i < 0or dp[next_i] == -1 ): # If table would exceed kallsyms_markers, mark as invalid dp.append(-1) else: dp.append(dp[next_i] + 1) num_symbols = dp[-1]
if num_symbols < 256: self.kallsyms_names__offset -= 4 ifself.kallsyms_names__offset < 0: raise ValueError('Could not find kallsyms_names') continue
if ( needle == -1 ): # There may be no padding between kallsyms_names and kallsyms_num_syms, if the alignment is already correct: in this case: try other offsets for "kallsyms_names" self.kallsyms_names__offset -= 4 ifself.kallsyms_names__offset < 0: raise ValueError('Could not find kallsyms_names')