michael@0: #!/usr/bin/perl michael@0: # vim:sw=4:ts=4:et: michael@0: # This Source Code Form is subject to the terms of the Mozilla Public michael@0: # License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: # file, You can obtain one at http://mozilla.org/MPL/2.0/. michael@0: michael@0: # $Id: fix-linux-stack.pl,v 1.16 2008/05/05 21:51:11 dbaron%dbaron.org Exp $ michael@0: # michael@0: # This script uses addr2line (part of binutils) to process the output of michael@0: # nsTraceRefcnt's Linux stack walking code. This is useful for two michael@0: # things: michael@0: # (1) Getting line number information out of michael@0: # |nsTraceRefcnt::WalkTheStack|'s output in debug builds. michael@0: # (2) Getting function names out of |nsTraceRefcnt::WalkTheStack|'s michael@0: # output on optimized builds (where it mostly prints UNKNOWN michael@0: # because only a handful of symbols are exported from component michael@0: # libraries). michael@0: # michael@0: # Use the script by piping output containing stacks (such as raw stacks michael@0: # or make-tree.pl balance trees) through this script. michael@0: michael@0: use strict; michael@0: use IPC::Open2; michael@0: use File::Basename; michael@0: michael@0: # XXX Hard-coded to gdb defaults (works on Fedora). michael@0: my $global_debug_dir = '/usr/lib/debug'; michael@0: michael@0: # We record several things for each file encountered. michael@0: # michael@0: # - {pipe_read}, {pipe_write}: these constitute a bidirectional pipe to an michael@0: # addr2line process that gives symbol information for a file. michael@0: # michael@0: # - {cache}: this table holds the results of lookups that we've done michael@0: # previously for (pre-adjustment) addresses, which lets us avoid redundant michael@0: # calls to addr2line. michael@0: # michael@0: # - {address_adjustment}: addr2line wants offsets relative to the base address michael@0: # for shared libraries, but it wants addresses including the base address michael@0: # offset for executables. This holds the appropriate address adjustment to michael@0: # add to an offset within file. See bug 230336. michael@0: # michael@0: my %file_infos; michael@0: michael@0: sub set_address_adjustment($$) { michael@0: my ($file, $file_info) = @_; michael@0: michael@0: # find out if it's an executable (as opposed to a shared library) michael@0: my $elftype; michael@0: open(ELFHDR, '-|', 'readelf', '-h', $file); michael@0: while () { michael@0: if (/^\s*Type:\s+(\S+)/) { michael@0: $elftype = $1; michael@0: last; michael@0: } michael@0: } michael@0: close(ELFHDR); michael@0: michael@0: # If it's an executable, make adjustment the base address. michael@0: # Otherwise, leave it zero. michael@0: my $adjustment = 0; michael@0: if ($elftype eq 'EXEC') { michael@0: open(ELFSECS, '-|', 'readelf', '-S', $file); michael@0: while () { michael@0: if (/^\s*\[\s*\d+\]\s+\.text\s+\w+\s+(\w+)\s+(\w+)\s+/) { michael@0: # Subtract the .text section's offset within the michael@0: # file from its base address. michael@0: $adjustment = hex($1) - hex($2); michael@0: last; michael@0: } michael@0: } michael@0: close(ELFSECS); michael@0: } michael@0: michael@0: $file_info->{address_adjustment} = $adjustment; michael@0: } michael@0: michael@0: # Files sometimes contain a link to a separate object file that contains michael@0: # the debug sections of the binary, removed so that a smaller file can michael@0: # be shipped, but kept separately so that it can be obtained by those michael@0: # who want it. michael@0: # See http://sources.redhat.com/gdb/current/onlinedocs/gdb_16.html#SEC154 michael@0: # for documentation of debugging information in separate files. michael@0: # On Fedora distributions, these files can be obtained by installing michael@0: # *-debuginfo RPM packages. michael@0: sub separate_debug_file_for($) { michael@0: my ($file) = @_; michael@0: # We can read the .gnu_debuglink section using either of: michael@0: # objdump -s --section=.gnu_debuglink $file michael@0: # readelf -x .gnu_debuglink $file michael@0: # Since readelf prints things backwards on little-endian platforms michael@0: # for some versions only (backwards on Fedora Core 6, forwards on michael@0: # Fedora 7), use objdump. michael@0: michael@0: # See if there's a .gnu_debuglink section michael@0: my $have_debuglink = 0; michael@0: open(ELFSECS, '-|', 'readelf', '-S', $file); michael@0: while () { michael@0: if (/^\s*\[\s*\d+\]\s+\.gnu_debuglink\s+\w+\s+(\w+)\s+(\w+)\s+/) { michael@0: $have_debuglink = 1; michael@0: last; michael@0: } michael@0: } michael@0: close(ELFSECS); michael@0: return '' unless ($have_debuglink); michael@0: michael@0: # Determine the endianness of the shared library. michael@0: my $endian = ''; michael@0: open(ELFHDR, '-|', 'readelf', '-h', $file); michael@0: while () { michael@0: if (/^\s*Data:\s+.*(little|big) endian.*$/) { michael@0: $endian = $1; michael@0: last; michael@0: } michael@0: } michael@0: close(ELFHDR); michael@0: if ($endian ne 'little' && $endian ne 'big') { michael@0: print STDERR "Warning: could not determine endianness of $file.\n"; michael@0: return ''; michael@0: } michael@0: michael@0: michael@0: # Read the debuglink section as an array of words, in hexidecimal. michael@0: open(DEBUGLINK, '-|', 'objdump', '-s', '--section=.gnu_debuglink', $file); michael@0: my @words; michael@0: while () { michael@0: if ($_ =~ /^ [0-9a-f]* ([0-9a-f ]{8}) ([0-9a-f ]{8}) ([0-9a-f ]{8}) ([0-9a-f ]{8}).*/) { michael@0: push @words, $1, $2, $3, $4; michael@0: } michael@0: } michael@0: close(DEBUGLINK); michael@0: michael@0: while (@words[$#words] eq ' ') { michael@0: pop @words; michael@0: } michael@0: michael@0: if ($#words < 1) { michael@0: print STDERR "Warning: .gnu_debuglink section in $file too short.\n"; michael@0: return ''; michael@0: } michael@0: michael@0: my @chars; michael@0: while ($#words >= 0) { michael@0: my $w = shift @words; michael@0: if ($w =~ /^([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})$/) { michael@0: push @chars, $1, $2, $3, $4; michael@0: } else { michael@0: print STDERR "Warning: malformed objdump output for $file.\n"; michael@0: return ''; michael@0: } michael@0: } michael@0: michael@0: my @hash_bytes = map(hex, @chars[$#chars - 3 .. $#chars]); michael@0: $#chars -= 4; michael@0: michael@0: my $hash; michael@0: if ($endian eq 'little') { michael@0: $hash = ($hash_bytes[3] << 24) | ($hash_bytes[2] << 16) | ($hash_bytes[1] << 8) | $hash_bytes[0]; michael@0: } else { michael@0: $hash = ($hash_bytes[0] << 24) | ($hash_bytes[1] << 16) | ($hash_bytes[2] << 8) | $hash_bytes[3]; michael@0: } michael@0: michael@0: # The string ends with a null-terminator and then 0 to three bytes michael@0: # of padding to fill the current 32-bit unit. (This padding is michael@0: # usually null bytes, but I've seen null-null-H, on Ubuntu x86_64.) michael@0: my $terminator = 1; michael@0: while ($chars[$terminator] ne '00') { michael@0: if ($terminator == $#chars) { michael@0: print STDERR "Warning: missing null terminator in " . michael@0: ".gnu_debuglink section of $file.\n"; michael@0: return ''; michael@0: } michael@0: ++$terminator; michael@0: } michael@0: if ($#chars - $terminator > 3) { michael@0: print STDERR "Warning: Excess padding in .gnu_debuglink section " . michael@0: "of $file.\n"; michael@0: return ''; michael@0: } michael@0: $#chars = $terminator - 1; michael@0: michael@0: my $basename = join('', map { chr(hex($_)) } @chars); michael@0: michael@0: # Now $basename and $hash represent the information in the michael@0: # .gnu_debuglink section. michael@0: #printf STDERR "%x: %s\n", $hash, $basename; michael@0: michael@0: my @possible_results = ( michael@0: dirname($file) . $basename, michael@0: dirname($file) . '.debug/' . $basename, michael@0: $global_debug_dir . dirname($file) . '/' . $basename michael@0: ); michael@0: foreach my $result (@possible_results) { michael@0: if (-f $result) { michael@0: # XXX We should check the hash. michael@0: return $result; michael@0: } michael@0: } michael@0: michael@0: return ''; michael@0: } michael@0: michael@0: sub get_file_info($) { michael@0: my ($file) = @_; michael@0: my $file_info = $file_infos{$file}; michael@0: unless (defined $file_info) { michael@0: my $debug_file = separate_debug_file_for($file); michael@0: $debug_file = $file if ($debug_file eq ''); michael@0: michael@0: my $pid = open2($file_info->{pipe_read}, $file_info->{pipe_write}, michael@0: '/usr/bin/addr2line', '-C', '-f', '-e', $debug_file); michael@0: michael@0: set_address_adjustment($file, $file_info); michael@0: michael@0: $file_infos{$file} = $file_info; michael@0: } michael@0: return $file_info; michael@0: } michael@0: michael@0: # Ignore SIGPIPE as a workaround for addr2line crashes in some situations. michael@0: $SIG{PIPE} = 'IGNORE'; michael@0: michael@0: select STDOUT; $| = 1; # make STDOUT unbuffered michael@0: while (<>) { michael@0: my $line = $_; michael@0: if ($line =~ /^([ \|0-9-]*)(.*) ?\[([^ ]*) \+(0x[0-9A-F]{1,8})\](.*)$/) { michael@0: my $before = $1; # allow preservation of balance trees michael@0: my $badsymbol = $2; michael@0: my $file = $3; michael@0: my $address = hex($4); michael@0: my $after = $5; # allow preservation of counts michael@0: michael@0: if (-f $file) { michael@0: my $file_info = get_file_info($file); michael@0: my $result = $file_info->{cache}->{$address}; michael@0: if (not defined $result) { michael@0: my $address2 = $address + $file_info->{address_adjustment}; michael@0: my $out = $file_info->{pipe_write}; michael@0: my $in = $file_info->{pipe_read}; michael@0: printf {$out} "0x%X\n", $address2; michael@0: chomp(my $symbol = <$in>); michael@0: chomp(my $fileandline = <$in>); michael@0: if (!$symbol || $symbol eq '??') { $symbol = $badsymbol; } michael@0: if (!$fileandline || $fileandline eq '??:0') { michael@0: $fileandline = $file; michael@0: } michael@0: $result = "$symbol ($fileandline)"; michael@0: $file_info->{cache}->{$address} = $result; michael@0: } michael@0: print "$before$result$after\n"; michael@0: } else { michael@0: print STDERR "Warning: File \"$file\" does not exist.\n"; michael@0: print $line; michael@0: } michael@0: michael@0: } else { michael@0: print $line; michael@0: } michael@0: }