# # Basic libumem support in dbx # written by Chris Quenelle # # ---------------------------------------------------------- # libumem integration for dbx # ---------------------------------------------------------- # # # You can use these ksh functions in combination with libumem # to get basic memory checking functionality. # # The module is contained entirely in a dbx ksh script that # you can source in your .dbxrc file. # # What follows is a simplified sample session that shows # how to use this module. # # For technical notes on the implementation of this module, # see further down in this file. # # ---------------------------------------------------------- # Simple Demo # ---------------------------------------------------------- # # (dbx) source umem.dbx # (dbx) alias u=umem # # (dbx) u start # Enabling libumem debugging # # (dbx) cc -g t.c # (dbx) debug a.out # (dbx) list 1,$ # 1 #include # 2 int main() # 3 { # 4 char * p; # 5 p = malloc(1); # 6 p = malloc(1); # 7 p = malloc(1); # 8 free(p); # 9 p++; # 10 // this free will cause an error in libumem # 11 // if checking is on, because it's a bad free # 12 free(p); # 13 } # 14 # # (dbx) run # signal ABRT (Abort) in __lwp_kill at 0xff2bd5ec # 0xff2bd5ec: __lwp_kill+0x0008: bcc,a,pt %icc,__lwp_kill+0x18 ! 0xff2bd5fc # Current function is main # 12 free(p); # # (dbx) print p # p = 0x5bfa9 "\xad\xbe\xef\xde\xad\xbe\xef\xfe\xed\xfa\xce\xfe\xed\xfa\xce" # # (dbx) u findblock 0x5bfa9 # # Building umem_syms helper library. # Address 0x5bfa9 is inside the umem block at 0x5bfa0. # This corresponds to the malloc block at 0x5bfa8. # # # So we can see that the pointer we tried to free points into # # the middle of a block. # # # Let's ask for a history of the umem block containing p # # (dbx) u bhist p # # ================================================================= # Log Rec Addr Block Addr Thrd Timestamp # ------------ ---------- ---- --------- # 0x320c8 0x5bfa0 1 0xbda1d488b8260 # 0x107d0 : in `a.out`_start /* No debugging info */ # 0x10c1c : in `a.out`t.c`main at "t.c":7 # 0xff36aeb4 : in `libumem.so.1`malloc /* No debugging info */ # 0xff36e2d0 : in `libumem.so.1`_umem_alloc /* No debugging info */ # 0xff36de8c : in `libumem.so.1`_umem_cache_alloc /* No debugging info */ # ================================================================= # Log Rec Addr Block Addr Thrd Timestamp # ------------ ---------- ---- --------- # 0x3212c 0x5bfa0 1 0xbda1d488ba010 # 0x107d0 : in `a.out`_start /* No debugging info */ # 0x107d0 : in `a.out`_start /* No debugging info */ # 0x10c2c : in `a.out`t.c`main at "t.c":8 # 0xff36b214 : in `libumem.so.1`malloc.c`process_free /* No debugging info */ # 0xff36dfec : in `libumem.so.1`_umem_cache_free /* No debugging info */ # ================================================================= # # # Don't ask me why _start shows up twice in libumem stack capture. # # It's probably a stray tail-call optimization someplace. # # # If you want to see the recent history of allocations/frees, do this: # # (dbx) u log # Log Rec Addr Block Addr Thrd Timestamp # ------------ ---------- ---- --------- # 0x32000 0x5bfe0 1 0xbda1d488b15c8 # 0x32064 0x5bfc0 1 0xbda1d488b6fa0 # 0x320c8 0x5bfa0 1 0xbda1d488b8260 # 0x3212c 0x5bfa0 1 0xbda1d488ba010 # # # ---------------------------------------------------------- # Technical Notes # ---------------------------------------------------------- # # The code in this directory represents a basic # dbx plugin module to support using Solaris libumem # to monitor and debug memory errors. So far it # doesn't approach the level of functionality that's # built in to mdb, but it's a start. The functionality # in this module can be extended without knowing any # internals of dbx. However, you do have to have a decent # understanding of the internal data structures in libumem. # # Here are some other resources that would be helpful # in understanding or extending this module: # # Source code for the mdb libumem module: # http://cvs.opensolaris.org/source/xref/usr/src/cmd/mdb/common/modules/libumem/ # # Source code for libumem: # http://cvs.opensolaris.org/source/xref/usr/src/lib/libumem/ # # A Technical Article on how to use mdb with libumem: # http://access1.sun.com/techarticles/libumem.html # # If anyone knows of a diagram of libumem data structures, or other # design documents for libumem, please let me know. I'll add a reference # to it in this file. It would have helped me a lot in writing this # module. # # The module is contained entirely in a dbx ksh script that # you can source in your .dbxrc file. For example: # # .dbxrc: # source ~/umem.dbx # alias u=umem # # In order to give dbx access to the variables inside libumem, this # module will automatically compile a helper library with -g and # load it into dbx. So to use this module, you need to have a # C compiler on your search path. # # Stability: # Because the data structures internal to libumem are subject # to change, you probably don't want this module to be built # in to dbx. Hopefully someone (possibly me) will update this # module when the libumem data structures change. # If you get tired of typing 'umem', do this: # alias u=umem # # ksh tips: # use "typeset varname" to declare your local variables to # avoid polluting the global namespace # # dbx has some quirks that make ksh programming challenging # # language mode: # dbxenv langauge_mode is normally set to "autodetect" # this can cause dbx to warn you at odd times like: # dbx: warning: unknown language, 'c' assumed # This can be worked around be redirecting error output to /dev/null # You could also work around it by setting/restoring dbxenv language_mode # around the expression evaluation in um_eval # # bad printf format # sometimes dbx gets confused about checking that "print -f" # options match the types of arguments. Often it does the # right thing anyway, so this is another reason to ignore errors # when evaluating expressions. # umem() { subcmd=$1 shift eval "umem_$subcmd" $* } umem_help() { eval "cat << EOHELP libumem helper routines: umem start - turn on libumem using LD_PRELOAD, and UMEM_xxx umem stop - reset LD_PRELOAD umem log - dump out the libumem transaction log (unsorted) umem ubhist - show all allocs/frees for the given umem block umem bhist - call findblock wth addr, then ubhist with result umem findblock - find the umem block containing the given address human readable output umem ubaddr - find the umem block containing the given address only address is printed umem help - this message EOHELP" } umem_log() { um_check; if [ $? = 99 ] ; then return 0; fi; um_has_tlog; if [ $? = 99 ] ; then return 0; fi; um_log_dump_head um_scan um_logit_dump } um_eval_with_errors() { typeset rv # echo "... $1" 1>&2 rv=$[$1] # echo "... $rv" 1>&2 echo "$rv" } um_eval() { typeset rv # echo "... $1" 1>&2 2>/dev/null rv=$[$1] # echo "... $rv" 1>&2 echo "$rv" } # The symbols might need to be fully qualified like: # `libumem.so.1`max_ncpus # also, the "umem_max_ncpus" on s10 might be "max_ncpus" on s9 # dbx running on S10 seems to take bare symbols, but running # on s9, you need to qualify them. So we always qualify them. um_symaddr() { typeset out umsym rv lm # produces: variable: `libumem.so.1`umem.c`umem_transaction_log out=$(whereis $1) # echo "$out" 1>&2 # puts words in $1 $2 etc if [ "$out" = "" ]; then echo "0" return; fi set $out # picks the fully scoped variable name umsym=$2 # we COULD use a 'case' statement here to special case # different names of symbols on different versions of solaris. # For now we don't need to do that. rv=$(um_eval " & $umsym" ) echo $rv } um_log_dump_head() { # " 0x100118000 0x10014dfc0 1 0xbd695ea66e678 echo " Log Rec Addr Block Addr Thrd Timestamp" echo " ------------ ---------- ---- ---------" } um_logit_dump() { typeset ptr bcaddr bcthread bctimestamp rec ptr="$1" rec="((umem_bufctl_audit_t *)$ptr)" bcaddr=$(um_eval "$rec->bc_addr") bcthread=$(um_eval "-fd $rec->bc_thread") bctimestamp=$(um_eval "-fllx $rec->bc_timestamp") printf "%19s %19s %4d %19s\n" $ptr $bcaddr $bcthread $bctimestamp } umem_bhist() { typeset addr addr=$(umem_ubaddr "$1") umem_ubhist "$addr" } umem_ubhist() { um_check; if [ $? = 99 ] ; then return 0; fi; um_has_tlog; if [ $? = 99 ] ; then return 0; fi; um_scan um_logit_ubhist $1 echo ================================================================= } um_logit_ubhist() { typeset ptr blockaddr thisblock ptr=$1 blockaddr=$2 thisblock=$(um_eval "-flx ((umem_bufctl_audit_t *)$ptr)->bc_addr") if [ "$thisblock" = "$blockaddr" ] then echo ================================================================= um_log_dump_head um_logit_dump $ptr um_auditstack $ptr fi } umem_ubaddr() { typeset ptr um_check; if [ $? = 99 ] ; then return 0; fi; um_has_tlog; if [ $? = 99 ] ; then return 0; fi; # Remove type information so this turns into a plain hex pointer value # and not eg: 0x5ffa9 "\xad\xbe\xef" ptr=$(um_eval_with_errors "(void*)$1") um_scan um_logit_ubaddr $ptr } umem_findblock() { typeset ptr um_check; if [ $? = 99 ] ; then return 0; fi; um_has_tlog; if [ $? = 99 ] ; then return 0; fi; # Remove type information so this turns into a plain hex pointer value # and not eg: 0x5ffa9 "\xad\xbe\xef" ptr=$(um_eval_with_errors "(void*)$1") um_scan um_logit_findblock $ptr } um_logit_ubaddr() { typeset cache size log searchaddr ptr useraddr addr cond ptr=$1 searchaddr=$2 log="((umem_bufctl_audit_t *)$ptr)" cache=$(um_eval "-flx $log->bc_cache") size=$(um_eval "-flx ((struct umem_cache *)$cache)->cache_chunksize") addr=$(um_eval "-flx $log->bc_addr") cond=$(um_eval "$searchaddr >= $addr && $searchaddr < ($addr + $size)") if [ "$cond" = "1" ] then echo $addr umem_stop_scan=1 fi } um_logit_findblock() { typeset cache size log searchaddr ptr useraddr addr cond ptr=$1 searchaddr=$2 log="((umem_bufctl_audit_t *)$ptr)" cache=$(um_eval "-flx $log->bc_cache") size=$(um_eval "-flx ((struct umem_cache *)$cache)->cache_chunksize") addr=$(um_eval "-flx $log->bc_addr") cond=$(um_eval "$searchaddr >= $addr && $searchaddr < ($addr + $size)") if [ "$cond" = "1" ] then useraddr=$(um_eval "-flx $addr + 8") echo Address $searchaddr is inside the umem block at $addr. echo " This corresponds to the malloc block at $useraddr." umem_stop_scan=1 fi } um_has_tlog() { typeset tladdr tladdr=$(um_symaddr umem_transaction_log) if [ "$(um_eval "*(void**)$tladdr" )" = "0" ] then echo "libumem not initialized (no transaction log)" 1>&2 return 99 fi return 0 } um_scan() { typeset addr tlog base chunksize recsize sdep typeset ncpus lhcpu cpu chunk cpubase last rec typeset callback tladdr cpuvar umem_stop_scan="" tladdr=$(um_symaddr umem_transaction_log) tlog="((umem_log_header_t*)*(void**)$tladdr)" base=$(um_eval "(void*)$tlog->lh_base"); chunksize=$(um_eval "(void*)$tlog->lh_chunksize"); # I can almost use: # sdep=$[umem_stack_depth] # except for a few dbx bugs. Same comment applies elsewhere. # bug1: language mode complaints, symbol lookup differences on S9, S10 sdep=$(um_eval "*(int*) $(um_symaddr umem_stack_depth)" ) recsize=$(um_logrecsize $sdep) # on Solaris 9 they renamed one of the variables, I don't know why cpuvar="umem_max_ncpus" if [ $(uname -r) = "5.9" ]; then cpuvar="max_ncpus" fi ncpus=$(um_eval "*(int*) $(um_symaddr $cpuvar)" ) lhcpu="(&$tlog->lh_cpu)" (( cpu = 0 )) callback=$1 ; shift while [ $cpu -lt $ncpus ] do # Since lh_cpu is declared as [1], we need to # get rid of the dbx warning about array bounds # We do this by casting through void* chunk=$(um_eval $lhcpu[$cpu].clh_chunk ) cpubase=$(um_eval "(void*) ( $base + ( $chunk * $chunksize ))" ); last=$(um_eval "-flx (void*) ($lhcpu[$cpu].clh_current)") if [ "$last" = "(nil)" ]; then last=0; fi; ptr="$cpubase" while [ $(um_eval "(int) ( $ptr < $last ) ") = "1" ] do eval $callback $ptr "$*" ptr=$(um_eval "(void*) ($ptr + $recsize)" ) if [ "$umem_stop_scan" = "1" ] then return fi done (( cpu = cpu + 1 )) done } # # one frame is included in size of umem_bufctl_audit_t, hence we # need a -1. However: because the struct def contains an 8-byte # field, the result of "sizeof" includes an extra 4 bytes of padding # at the end which isn't really there. We have to remove both. # Null out the padding for 64-bit programs # um_logrecsize() { # "so_" means "sizeof_" typeset so_bca so_extra res typeset so_vstar typeset pad=4 # I used sizeof(long) here and it sometimes # showed '8' when it was supposed to say '4' # if the loaded program does have stabs/dwarf info for 'long' # then dbx assumes that because the dbx process is 64-bits # the size of long defaults to 8. void* seems to work reliably so_vstar=$(um_eval "sizeof(void*)") if [ "$so_vstar" = "8" ] then pad=0 fi so_bca=$(um_eval "sizeof(umem_bufctl_audit_t) - $pad") # remove padding (( so_extra = ( $1 - 1 ) * $so_vstar )) # remove 1 thats in the struct (( res = so_bca + so_extra )) echo $res } um_auditstack() { typeset addr= typeset i= addr=$(um_eval "-flx &((umem_bufctl_audit_t*)$1)->bc_stack[0]" ) i=$(um_eval umem_stack_depth) while [ $i -ge 0 ] do addr2=$(um_eval "-flx ((uintptr_t*)$addr)[$i] ") if [ $addr2 != "0x0" ] then # note special trick to get rid of 'unknown lang' warning # redirect comes at front because whereis command uses # native expr syntax, not shell syntax echo "$addr2 : $(2>/dev/null whereis -a $addr2)" fi (( i = i - 1 )) done } umem_start() { echo Enabling libumem debugging export LD_PRELOAD_32 export LD_PRELOAD_64 LD_PRELOAD_32="/usr/lib/libumem.so" LD_PRELOAD_64="/usr/lib/64/libumem.so" export UMEM_DEBUG UMEM_DEBUG=default export UMEM_LOGGING UMEM_LOGGING=transaction } um_load_and_use() { # this guy (-load) can mess up $vfunc loadobject -load $1 > /dev/null loadobject -use $1 > /dev/null um_fix_vfunc } um_check() { typeset rightlib wronglib bitsoption typeset source="~/.umdbx/umem_syms.c" typeset lib32="~/.umdbx/umem_syms32.so" typeset lib64="~/.umdbx/umem_syms64.so" if [ ! -d ~/.umdbx ] then mkdir ~/.umdbx fi if [ "$(proc -map | grep libumem)" = "" ] then echo "This program was not run with libumem." 1>&2 return 99 fi # This complication is because when the user jumps # from a 23-bit to a 64-bit program and back, we need to # switch helper libraries. if [ "$(/bin/file $prog | grep 32-bit)" != "" ] then rightlib=$lib32 wronglib=$lib64 else bitsoption="-xarch=generic64" rightlib=$lib64 wronglib=$lib32 fi if [ "$umem_symbols_library" = "$wronglib" ] then loadobject -unload $umem_symbols_library umem_symbols_library="" fi # If we still have a symbol defined by this point, we're good. if [ "$(whereis umem_bufctl_audit_t)" != "" ] then return # found symbols fi # If the library got hidden because of a 'debug', just unhide it # When user debugs a new program, we might need # to automatically re-"use" this library. # The symbols will by hidden by dbx because this # library is not one of the ones used in the program. if [ "$umem_symbols_library" != "" ] then loadobject -use $umem_symbols_library return fi if [ -f "$rightlib" ] then umem_symbols_library="$rightlib" um_load_and_use $umem_symbols_library return fi # Otherwise we need a compiler to build a lib. if [ "$(whence cc)" = "" ] then echo "You must have a C compiler on your path use this module." 1>&2 # um_fix_vfunc return 99 fi # um_fix_vfunc # Build the right library from source history - echo "Building umem_syms helper library." 1>&2 rm -f $source $rightlib eval "cat > $source < umem_bufctl_t *umdbx_bufctl; umem_bufctl_audit_t *umdbx_bufctl_audit; umem_buftag_t *umdbx_buftag; umem_slab_t *umdbx_slab; umem_magtype_t *umdbx_magtype; umem_cpu_cache_t *umdbx_cpu_cache; umem_log_header_t *umdbx_log_header; umem_cpu_t *umdbx_cpu; umem_cpu_log_header_t *umdbx_cpu_log_header; struct umem_cache *umdbx_cache; EOFILE" cc -g -KPIC -G $bitsoption $source -o $rightlib umem_symbols_library=$rightlib um_load_and_use $umem_symbols_library history + } um_fix_vfunc() { # sometimes 'current func' gets messed up. # This is an attempt to fix that be resetting the # 'visitng func' to the current frame function. frame $(frame) > /dev/null } umem_stop() { echo Disabling libumem debugging LD_PRELOAD_32= LD_PRELOAD_64= }