Troubleshooting tools

# strace / ltrace
# systemcalls examples:
# process, files networking, signal, inter-process, comunication, terminals, threads, i/o devices 
#---------------------------------------
strace -s 2000 -f ./traceme
strace -c [command]		# summary
strace -p 1,2,3... -e trace=close,open -Ttrf -iq -o output.strace
strace -o /tmp/strace.out -s 2000  -fp 12152 # -e raw=select
# trace=network,memory,process,signal,file

ltrace -e fwrite_unlocked ls
ltrace -c /usr/bin/id # summary of libraries

open,read,write,connect,socket


#  ps
#----------------------------------------
ps -e 						# all processes
ps aux
ps -C firefox-bin
ps axo pid,ppid,command 
ps o pid,ppid,command,uid
pstree -p | grep httpd
#nohup webservices/env/bin/gunicorn -b 0.0.0.0:8000 apm.wsgi:application --chdir=webservices/apm/ > $WORKSPACE/scriptoutput.txt 2>&1 &
BUILD_ID=dontKillMe nohup webservices/env/bin/gunicorn -b 0.0.0.0:8000 apm.wsgi:application --chdir=webservices/apm/ &

# see what is eating RAM
ps -e -o pid,vsz,comm= | sort -n -k 2


#  lsof
#----------------------------------------
lsof -a -u root -d txt
lsof -p ^1
lsof -i tcp/udp
lsof -i :80
lsof -i tcp:80
lsof -d txt/mem       # lsof -d 0-2

lsof -i # show all connections
lsof -Pi
lsof -i6 # ipv6 connections
lsof -i -sTCP:LISTEN # listen state
lsof -a -u apache -i # all conenctions by user apache

lsof /var/log/messages # who is using this file
lsof +D /usr/lib
lsof -u apache # files opened by apache user

lsof -p 666 # opened files by a PID
lsof -c sendmail # opened files by sendmail



# others
#----------------------------------------
ldd /usr/bin/ls

# /proc filesystem information
cmdline, cmd process
maps, memory map
status, processes privileges
fd, file desc in use by processes
cwd, link to proc current dir
root, link to proc root dir
mounts, mount table 

# quickly
strings /usr/bin/who        # strings
ldd /usr/bin/yes            # dependecies
nm -D -l -S /usr/bin/yes    # symlbs
objdump -h /usr/bin/who     # sections
objdump -s -j .rodata /yes  # data
objdump -d -r -j .text /yes # code

# others
pstack <pid>

# OPENBSD
ktrace -o output.txt -p <PID>
kdump -f output.txt

# killing process without same
pids=( $(pgrep -f resque) )
for pid in "${pids[@]}"; do
  if [[ $pid != $$ ]]; then
    kill "$pid"
  fi
done



# Memory stuff
#------------------------------
memstat -p 666






### BASIC 

# check for uptime and load average
uptime

# top check cpu usage
top

# check processes
ps -ef f
ps -eo user,sz,rss,minflt,majflt,pcpu,args

# check virtual mem usage
vmstat -Sm 1

# check disk i/o status
iostat -xmdz 1

# check for multi-processor
mpstat -P ALL 1

# check memory
free -m

### INTERMEDIATE

# system call tracer
strace -tttT -p 313

# sniff network packets
tcpdump -i <iface> -w /tmp/out.tcpdump

# network stadistics
netstat -p -c 1

# network iface stats
nicstat 1

# process status
pidstat -t 1
pidstat -d 1

# check swap
swapon -s

# file descriptor checks
lsof -iTCP -sTCP:ESTABLISHED

# system activity reporter
sar -n TCP,ETCP,DEV 1

### ADVANCED

# socket stadistics
ss -mop
ss -i

# network usage
iptraf

# block device i/o by process
iotop

# kernel slab allocator mem usage
slabtop

# perf
perf_events