Use iostat to get the performance data
# iostat -x
Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s avgrq-sz avgqu-sz await svctm %util
sda 0.48 2.76 1.90 0.92 56.44 29.40 30.53 0.01 4.48 2.46 0.69
Work out the queue length
requests * average wait time / 1000 = queue length
((1.90+0.92) * 4.48) / 1000 = 0.01
Calculate the throughput
reads and writes * sector size
(56.44+29.40) * 512 / 1024 = 43Ki
Calculate utilization
requests * service time / 1000 * 100 = utilization
(1.90+0.92) * 2.46 / 1000 * 100 = 0.69
Determine peak arrival rate
1/service time * 1000
(1/2.46 )*1000 = 406.5
Sunday, 30 January 2011
Systemtap
Install Systemtap. You will need to kernel-debuginfo, kernel-devel and kernel-headers first
#yum install kernel-debuginfo-$(uname -r) kernel-devel-$(uname -r) kernel-headers-$(uname -r)
If the machine is a production server then only install the runtime
#yum install systemtap-runtime
ELSE
#yum install systemtap
Check the following examples
#ll /usr/share/doc/systemtap-1.1/examples
drwxr-xr-x 2 root root 4096 Jan 30 15:52 general
drwxr-xr-x 2 root root 4096 Jan 30 15:52 html
-rw-r--r-- 1 root root 31140 Nov 17 14:41 index.html
-rw-r--r-- 1 root root 21938 Nov 17 14:41 index.txt
drwxr-xr-x 2 root root 4096 Jan 30 15:52 interrupt
drwxr-xr-x 2 root root 4096 Jan 30 15:52 io
-rw-r--r-- 1 root root 63347 Nov 17 14:41 keyword-index.html
-rw-r--r-- 1 root root 41966 Nov 17 14:41 keyword-index.txt
drwxr-xr-x 2 root root 4096 Jan 30 15:52 locks
drwxr-xr-x 2 root root 4096 Jan 30 15:52 memory
drwxr-xr-x 2 root root 4096 Jan 30 15:52 network
drwxr-xr-x 2 root root 4096 Jan 30 15:52 process
drwxr-xr-x 2 root root 4096 Jan 30 15:52 profiling
-rw-r--r-- 1 root root 5065 Nov 17 14:41 README
It is possible to create modules which can be run using staprun
#stap iotop.stp -m iotop
will create iotop.ko. This can be run with the following command
#staprun iotop.ko
#yum install kernel-debuginfo-$(uname -r) kernel-devel-$(uname -r) kernel-headers-$(uname -r)
If the machine is a production server then only install the runtime
#yum install systemtap-runtime
ELSE
#yum install systemtap
Check the following examples
#ll /usr/share/doc/systemtap-1.1/examples
drwxr-xr-x 2 root root 4096 Jan 30 15:52 general
drwxr-xr-x 2 root root 4096 Jan 30 15:52 html
-rw-r--r-- 1 root root 31140 Nov 17 14:41 index.html
-rw-r--r-- 1 root root 21938 Nov 17 14:41 index.txt
drwxr-xr-x 2 root root 4096 Jan 30 15:52 interrupt
drwxr-xr-x 2 root root 4096 Jan 30 15:52 io
-rw-r--r-- 1 root root 63347 Nov 17 14:41 keyword-index.html
-rw-r--r-- 1 root root 41966 Nov 17 14:41 keyword-index.txt
drwxr-xr-x 2 root root 4096 Jan 30 15:52 locks
drwxr-xr-x 2 root root 4096 Jan 30 15:52 memory
drwxr-xr-x 2 root root 4096 Jan 30 15:52 network
drwxr-xr-x 2 root root 4096 Jan 30 15:52 process
drwxr-xr-x 2 root root 4096 Jan 30 15:52 profiling
-rw-r--r-- 1 root root 5065 Nov 17 14:41 README
It is possible to create modules which can be run using staprun
#stap iotop.stp -m iotop
will create iotop.ko. This can be run with the following command
#staprun iotop.ko
OProfile
Install Oprofile you will need to kernel-debuginfo first
#yum install kernel-debuginfo-$(uname -r)
#yum install -y oprofile oprofile-gui
Add the following to rc.local when profiling
Setup Oprofile for kernel profiling
#opcontrol --setup --vmlinux=/usr/lib/debug/lib/modules/$(uname -r)/vmlinux
OR
Setup Oprofile for non kernel profiling
#opcontrol --setup --no-vmlinux
THEN
Check available events
#opcontrol --list-events
Clear the data and start to profile
#opcontrol --reset
#opcontrol --start
#Dump the data to a file and stop the profiling
#opcontrol --dump
#opcontrol --stop
Get the information
#opreport
Get report for sepcific binary
#opreport -l /bin/bash
For the kernel use
#opreport -l /usr/lib/debug/lib/modules/$(uname -r)/vmlinux
Run the following to unload Oprofile
#opcontrol --deinit
#yum install kernel-debuginfo-$(uname -r)
#yum install -y oprofile oprofile-gui
Add the following to rc.local when profiling
Setup Oprofile for kernel profiling
#opcontrol --setup --vmlinux=/usr/lib/debug/lib/modules/$(uname -r)/vmlinux
OR
Setup Oprofile for non kernel profiling
#opcontrol --setup --no-vmlinux
THEN
Check available events
#opcontrol --list-events
Clear the data and start to profile
#opcontrol --reset
#opcontrol --start
#Dump the data to a file and stop the profiling
#opcontrol --dump
#opcontrol --stop
Get the information
#opreport
Get report for sepcific binary
#opreport -l /bin/bash
For the kernel use
#opreport -l /usr/lib/debug/lib/modules/$(uname -r)/vmlinux
Run the following to unload Oprofile
#opcontrol --deinit
Valgrind
Use valgrind to check a process for memory leaks
#valgrind --tool=memcheck cat /proc/PID/maps
Run a program using valgrind to check for memory leaks
#valgrind --tool=memcheck program_name
#valgrind --tool=memcheck cat /proc/PID/maps
Run a program using valgrind to check for memory leaks
#valgrind --tool=memcheck program_name
ARP CACHE
The cache can be viewed with the following command
#ip neighbor list
192.168.48.1 dev eth0 lladdr 00:50:56:c0:00:08 REACHABLE
192.168.48.190 dev eth0 lladdr 00:0c:29:5e:1c:46 REACHABLE
The cache can be flushed with this command
#ip neighbor flush dev eth0
The follwing settings can be modified
# sysctl -a | grep "4.neigh.default.gc"
net.ipv4.neigh.default.gc_thresh3 = 1024 #This is the HARD upper limit
net.ipv4.neigh.default.gc_thresh2 = 512 #This is the SOFT upper limit
net.ipv4.neigh.default.gc_thresh1 = 128
net.ipv4.neigh.default.gc_interval = 30 #This is the garbage collection interval in seconds
net.ipv4.neigh.default.gc_stale_time = 60
You will only need to adjust the limits if you need to allow for lots of simultaneous connections
#ip neighbor list
192.168.48.1 dev eth0 lladdr 00:50:56:c0:00:08 REACHABLE
192.168.48.190 dev eth0 lladdr 00:0c:29:5e:1c:46 REACHABLE
The cache can be flushed with this command
#ip neighbor flush dev eth0
The follwing settings can be modified
# sysctl -a | grep "4.neigh.default.gc"
net.ipv4.neigh.default.gc_thresh3 = 1024 #This is the HARD upper limit
net.ipv4.neigh.default.gc_thresh2 = 512 #This is the SOFT upper limit
net.ipv4.neigh.default.gc_thresh1 = 128
net.ipv4.neigh.default.gc_interval = 30 #This is the garbage collection interval in seconds
net.ipv4.neigh.default.gc_stale_time = 60
You will only need to adjust the limits if you need to allow for lots of simultaneous connections
HUGE PAGES
HUGE PAGES
These can be created by editing sysctl.conf and adding
vm.nr_hugepages=INTEGER
You can also use the following kernel parameter
hugepages=INTEGER
You can check the settings using the following
#cat /proc/meminfo | grep -i huge
HugePages_Total: 20
HugePages_Free: 20
HugePages_Rsvd: 0
Hugepagesize: 4096 kB
If the application uses mmap to request pages then they must be mounted as a filesystem
#mkdir /hugepages
#mount -t hugetlbfs none /hugepages
These can be created by editing sysctl.conf and adding
vm.nr_hugepages=INTEGER
You can also use the following kernel parameter
hugepages=INTEGER
You can check the settings using the following
#cat /proc/meminfo | grep -i huge
HugePages_Total: 20
HugePages_Free: 20
HugePages_Rsvd: 0
Hugepagesize: 4096 kB
If the application uses mmap to request pages then they must be mounted as a filesystem
#mkdir /hugepages
#mount -t hugetlbfs none /hugepages
Saturday, 29 January 2011
RAID Performance
CHUNK
Calculate the chunk size by using iostat
#iostat -x
Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s avgrq-sz avgqu-sz await svctm %util
dm-0 0.00 0.00 0.00 4548.51 0.00 36388.12 40.00 203.91 44.83 0.02 10.69
Take the average request size and times it by 512 then divide by the amount of disks in the RAID array. So if you have a raid 5 with three disks you would divide by 2 as one of the disks is for parity, for example
(40*512)/1024/2=10
So the optimum chucnk size for this would be 8K because you always round down to the nearest 2 to the power. You can create the array with this chunk witht he following commamd
#mdadm -C /dev/md1 -l5 -n3 --chunk=8/dev/sd[def] -a yes
STRIDE
The optimum stride is calculated using chunk divided by block size so for the above array it will be
8/4=2
mke2fs -j -b 4096 -E stride=2 /dev/md1
Calculate the chunk size by using iostat
#iostat -x
Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s avgrq-sz avgqu-sz await svctm %util
dm-0 0.00 0.00 0.00 4548.51 0.00 36388.12 40.00 203.91 44.83 0.02 10.69
Take the average request size and times it by 512 then divide by the amount of disks in the RAID array. So if you have a raid 5 with three disks you would divide by 2 as one of the disks is for parity, for example
(40*512)/1024/2=10
So the optimum chucnk size for this would be 8K because you always round down to the nearest 2 to the power. You can create the array with this chunk witht he following commamd
#mdadm -C /dev/md1 -l5 -n3 --chunk=8/dev/sd[def] -a yes
STRIDE
The optimum stride is calculated using chunk divided by block size so for the above array it will be
8/4=2
mke2fs -j -b 4096 -E stride=2 /dev/md1
Subscribe to:
Posts (Atom)