diff options
Diffstat (limited to 'admin/nmcheck')
-rwxr-xr-x | admin/nmcheck | 371 |
1 files changed, 371 insertions, 0 deletions
diff --git a/admin/nmcheck b/admin/nmcheck new file mode 100755 index 0000000..1544713 --- /dev/null +++ b/admin/nmcheck @@ -0,0 +1,371 @@ +#!/usr/bin/perl -w + +# Check namespace cleanness of a library. +# Allowed symbols are passed as arguments. +# They may have trailing * = wildcard. +# Wildcards may be also specified as *::* (e.g. K*::* for all KDE classes) +# Symbols are listed as full function unmangled names without arguments, +# e.g. 'foo bar* nspace::*' allows foo(), foo(int), bar(), barbar() +# and all symbols in namespace/class nspace. +# If an argument has comma in it, it's a filename of a file containing +# allowed symbols, one per line. + + +$thisProg = "$0"; # This programs name + +$library = ""; +$allowed_symbols = ""; +$debug = 0; +$allowed_weak = ""; +$weak_specified = 0; + +while( defined( $ARGV[ 0 ] )) +{ + $_ = shift; + if( /^--verbose$|^-v$/ ) + { + $debug = 1; + } + elsif( /^--help$|^-h$/ ) + { + print STDOUT "Usage $thisProg [OPTION] ... library [allowed symbols] ...\n", + "\n", + "Check if the given library has only allowed public symbols.\n", + "\n", + " --allowweak=[symbol] allow only these weak symbols\n", + " -v, --verbose verbosely list files processed\n", + " -h, --help print this help, then exit\n"; + exit 0; + } + elsif( /^--allowweak=(.*)$/ ) + { + $allowed_weak .= " " . $1; + $weak_specified = 1; + } + elsif( /^--allowweak$/ ) # simply list all weak + { + $allowed_weak .= " "; + $weak_specified = 1; + } + elsif( /^--*/ ) + { + die "Invalid argument!\n"; + } + else + { + if( ! $library ) + { + $library = $_; + } + else + { + $allowed_symbols .= " " . $_; + } + } +} + +if( ! $weak_specified ) +{ + $allowed_weak = "*"; + # allow all weak symbols by default + # instances of templates and similar stuff - unfortunately includes also things from other libraries, + # so it cannot be on by default +} + +print STDERR "library:" . $library . "\n" if $debug; +print STDERR "allowed_symbols:" . $allowed_symbols . "\n" if $debug; +print STDERR "allowed_weak:" . $allowed_weak . "\n" if $debug; + +$default_symbols = "_fini _init"; # system symbols +# on my system, every .so has : +# A _DYNAMIC +# A _GLOBAL_OFFSET_TABLE_ +# A __bss_start +# A _edata +# A _end +# T _fini +# T _init +# no need to list A symbols in $default_symbols + +print STDERR "default_symbols: " . $default_symbols . "\n" if $debug; + +print STDOUT "Namespace cleanness check for " . $library . " :\n"; + +$lib_file = ""; +if( $library =~ /\.la$/ ) +{ + # get the real library file from .la + open( FILEIN, $library ) || die "Couldn't open $! !\n"; + while( $line = <FILEIN> ) + { + if( $line =~ /library_names=\'([^ ]*).*/o ) + { + $lib_file = $1; + } + } + close( FILEIN ); + if( ! $lib_file ) + { + print STDERR "Library file not found in .la file!\n"; + exit 1; + } + my $libpath = $library; + $libpath =~ s%[^/]*$%%; + if( -e $libpath . ".libs/" . $lib_file ) + { + $lib_file = $libpath . ".libs/" . $lib_file; + } + else + { + $lib_file = $libpath . $lib_file; + } +} +else +{ + $lib_file = $library; +} + +print STDERR "libfile: ". $lib_file . "\n" if $debug; + +$allowed_symbols .= " " . $default_symbols; + +sub process_symbols($\@\%\@); + +@wildcards = (); +%exacts = (); +@regwildcards = (); +process_symbols( $allowed_symbols, @wildcards, %exacts, @regwildcards ); +@weak_wildcards = (); +%weak_exacts = (); +@weak_regwildcards = (); +process_symbols( $allowed_weak, @weak_wildcards, %weak_exacts, @weak_regwildcards ); + +# grep is for stripping not exported symbols, which don't have address (=first column) +$nm_command = "nm -BDCg " . $lib_file . " | grep -v '^ ' |"; + +# TODO how portable is this nmcheck stuff? + +print STDERR "nm command:" . $nm_command . "\n" if $debug; + +open( FILEIN, $nm_command ) || die "nm command failed\n"; + +my $exit_code = 0; + +while( $line = <FILEIN> ) +{ + my $type; + my $symbol; + if( $line =~ /^[^ ]* (.) (.*)$/o ) + { + $type = $1; + $symbol = $2; + } + else + { + die "Invalid line: " . $line . "\n"; + } + + print STDERR "Type: " . $type . " , symbol: " . $symbol . "\n" if $debug; + if( $type eq "A" ) + { # these should be system symbols, so ignore them + next; + } + + my $orig_symbol = $symbol; + + if( $symbol =~ /\(anonymous namespace\)/o ) + { # TODO tell to prefer named namespaces? (shorter symbols) + next; + } + + # strip prefixes + # the :: appending is to make "CLASS::*" work also for "vtable for CLASS" + $symbol =~ s/^typeinfo for (.*)$/$1::/o; + $symbol =~ s/^typeinfo fn for (.*)$/$1::/o; + $symbol =~ s/^typeinfo name for (.*)$/$1::/o; + $symbol =~ s/^vtable for (.*)$/$1::/o; + $symbol =~ s/^guard variable for (.*)$/$1::/o; + $symbol =~ s/^reference temporary for (.*)$/$1::/o; + $symbol =~ s/^VTT for (.*)$/$1::/o; + $symbol =~ s/^virtual thunk \[[^\]]*\] to (.*)$/$1::/o; + $symbol =~ s/^non-virtual thunk \[[^\]]*\] to (.*)$/$1::/o; + $symbol =~ s/^covariant return thunk \[[^\]]*\] to (.*)$/$1::/o; + $symbol =~ s/^construction vtable thunk for (.*)$/$1::/o; + $symbol =~ s/^construction vtable for .*-in-(.*) [0-9]*$/$1::/o; + + # templates seem to have also return types mangled in their name, and nm prints it too + # they have also template arguments in the symbol + # get rid of both of those + while( $symbol =~ /<.*>/o ) + { + $symbol =~ s/<[^<>]*>//o; # strip innermost <> + } + if( $symbol !~ /operator\(\)/o ) + { + $symbol =~ s/ ?\(.*\).*$//o; # strip () and all after it + } + else + { + $symbol =~ s/(^|:| )operator\(\) ?\(.*\).*$//o; # strip () and all after it + } + $symbol =~ s/\[.*\] *$//o; # strip [in-charge] etc. + if( $symbol =~ /(^|:| )operator /o ) + { + $symbol =~ s/.* ([^\s]*)operator /$1/o; # strip everything before 'X::operator blah' + } + else + { + $symbol =~ s/.* ([^\s]+) *$/$1/o; # get last word (strip return type) + } + + # print STDERR "Processed symbol: " . $symbol . "\n" if $debug; + + my $found = 0; + if( $exacts{ $symbol } ) + { + $found = 1; + } + if( ! $found ) + { + for my $wild ( @wildcards ) + { + if( index( $symbol, $wild ) == 0 ) + { + $found = 1; + last; + } + } + } + if( ! $found ) + { + for my $wild ( @regwildcards ) + { + if( $symbol =~ /^$wild$/ ) + { + $found = 1; + last; + } + } + } + if( ( ! $found ) && ( $type eq "W" || $type eq "V" )) + { + if( $weak_exacts{ $symbol } ) + { + $found = 1; + } + if( ! $found ) + { + for my $wild ( @weak_wildcards ) + { + if( index( $symbol, $wild ) == 0 ) + { + $found = 1; + last; + } + } + } + if( ! $found ) + { + for my $wild ( @weak_regwildcards ) + { + if( $symbol =~ /^$wild$/ ) + { + $found = 1; + last; + } + } + } + } + + if( ! $found ) + { + print STDERR "Public symbol " . $orig_symbol . " is not allowed!\n"; + $exit_code = 1; + } +} + +close( FILEIN ); + +print STDOUT $exit_code == 0 ? "OK\n" : "FAILED\n"; + +exit $exit_code; + +sub process_symbols($\@\%\@) +{ + my $allowed_symbols = $_[ 0 ]; + my $wildcards_ref = $_[ 1 ]; + my $exacts_ref = $_[ 2 ]; + my $regwildcards_ref = $_[ 3 ]; + + $allowed_symbols =~ s/^ *//o; # strip whitespace + $allowed_symbols =~ s/ *$//o; + + if( $allowed_symbols eq "NONE" ) + { + $allowed_symbols = ""; + } + + my @symbols1 = split( ' ', $allowed_symbols ); + my $i = 0; + my @symbols2 = (); + while( defined( $symbols1[ $i ] )) + { + my $symbol = $symbols1[ $i ]; + if( $symbol =~ /\./ ) # dot in name -> file + { + open( SYMIN, $symbol ) || die ( "Cannot open file " . $symbol . "!" ); + while( $line = <SYMIN> ) + { + $line =~ s/^\s*//o; # strip whitespace + $line =~ s/\s*$//o; + if( $line !~ /^$/o # empty line + && $line !~ /^\s*#/ ) # comment line starting with # + { + $symbols2[ $#symbols2 + 1 ] = $line; + } + } + close( SYMIN ); + } + else + { + $symbols2[ $#symbols2 + 1 ] = $symbol; + } + $i++; + } + $i = 0; + while( defined( $symbols2[ $i ] )) + { + my $symbol = $symbols2[ $i ]; + if( $symbol =~ /__/ + || $symbol =~ /^_[A-Z]/ ) + { # ISO C++ 2.10.2 + die "Symbols containing a double underscore or beginning with an underscore and an upper-case letter are reserved!\n"; + } + elsif( $symbol eq "main" + || $symbol eq "main*" ) + { + die "Symbol main is not allowed!\n"; + } + if( $symbol =~ /^([^\*]*)\*$/o # trailing * without any * before it + && $symbol !~ /operator\*$/o ) + { + print STDERR "wildcard:" . $symbol . "\n" if $debug; + $wildcards_ref->[ $#{$wildcards_ref} + 1 ] = $1; + } + elsif( $symbol =~ /\*$/o + && ( $symbol =~ /\*::/o || $symbol =~ /::\*/o ) + && $symbol !~ /^\*/o + && $symbol !~ /operator\*$/o ) + { + print STDERR "regwildcard:" . $symbol . "\n" if $debug; + $symbol =~ s/\*/\.\*/go; # change * to .* (regexp) + $regwildcards_ref->[ $#{$regwildcards_ref} + 1 ] = $symbol; + } + else + { + print STDERR "exact:" . $symbol . "\n" if $debug; + $exacts_ref->{ $symbol } = 1; + } + $i++; + } +} |