Table of Contents

Linux misc


docker commands

docker pull ubuntu:14.04
docker run -i -t ubuntu:14.04 /bin/bash
docker attach mycontainername
docker start mycontainername
docker stop mycontainername
sudo docker export mycontainername | gzip -9 > mycontainername.tar.gz

To detach after attach, use Ctrl + p + q (maintain Ctrl, then type p then q).

To set a specific ip address to a container on the docker bridge :

sudo docker run --name=cont1 --privileged -i -t ubuntu:14.04 /bin/bash
sudo docker start cont1
sudo docker exec cont1 ifconfig eth0 172.17.0.50 netmask 255.255.0.0 up

To set mac address, use :

--mac-address=92:20:de:b0:6b:61

Script to analyse logs by regrouping similar lines

This is a home made script I made to analyse generically logs. It regroups lines by similarity considering a ratio given by the user and save results. It is in beta, use it with precaution.

    # Generic Log analyser v1.0
    # Zaft
 
    # To use this script, set the logfile name and location,
    # and adjust 'match' variable between 0 and 1 (0.7 and 0.8 are good)
 
    logfile = input('Enter log file position (ex "/var/log/auth.log", do not forget brackets) :\n')
    #logfile="log4"
    match = input('Enter sensibility value (match factor), between 0 and 1 (0.6 to 0.7 is a good starting range of values) :\n')
    #match=0.7
 
    # Import sys
    import sys
    sys.stdout.flush()
 
    # Import and define sequence matcher
    from difflib import SequenceMatcher
    def similar(a, b):
        return SequenceMatcher(None, a, b).ratio()
 
    # Open log file
    with open (logfile, "r" ) as myfile:
        data = myfile.readlines()
 
    # Allocate memory
    datasize = len(data)
    data_tag = range(datasize)
 
    knowndata_s = range(datasize) # more than needed, to be optimized
    knowndata_tot = range(datasize) # same
 
    # Init first patern
    knowndatasize = 1
    knowndata_s[0] = data[0]
    knowndata_tot[0] = 1
 
    flag = 0
    c = 0
 
    print
    print 'Analysing....  ',
 
    # Loop on all lines of file
    for i in range (1,datasize):
       # Check if data is already registered as patern
        # Loop on all already known patern
        for j in range (0,knowndatasize):
            if similar(data[i],knowndata_s[j]) > match: # 0.8 seems good for auth.log
                # Data already known, increment and skip to next data
                knowndata_tot[j] = knowndata_tot[j] + 1
                data_tag[i] = j
                flag = 1
                break
        # Data not already registered, create a new patern
        if flag == 0:
            knowndata_s[knowndatasize] = data[i]
            knowndata_tot[knowndatasize] = 1
            data_tag[i] = knowndatasize
            knowndatasize = knowndatasize + 1
        else:
            flag = 0
 
        # Display work in progress
        if c == 20: 
            sys.stdout.write('\b/')
        elif c == 40: 
            sys.stdout.write('\b-')
        elif c == 60:
            sys.stdout.write('\b\\')
        elif c == 80: 
            sys.stdout.write('\b|')
            c = 0
        sys.stdout.flush()
        c = c + 1
 
    print '\b\b done!'
 
 
    print 'Sorting results....  ',
    knowndata_tag = range(knowndatasize)
    # Sort results for display only
    knowndata_tot_d, knowndata_s_d = (list(t) for t in zip(*sorted(zip(knowndata_tot[0:knowndatasize], knowndata_s[0:knowndatasize]))))
    knowndata_tot_d, knowndata_tag_d = (list(t) for t in zip(*sorted(zip(knowndata_tot[0:knowndatasize], knowndata_tag[0:knowndatasize]))))
    print '\b\b done!'
 
    # Print results
    print
    print "########################################################"
    print "############# Found ",knowndatasize," different paterns"
    print "#####"
    print
 
    for j in range (0,knowndatasize):
        print "----> tag :",knowndata_tag_d[j]," found ",knowndata_tot_d[j], " time(s) : ", knowndata_s_d[j]
 
    # Extract desired tag for lvl 2 analysis
 
    #while exit
    print
    print "What do you want to do ?"
    print "0 . Redo analysis with a different match parameter"
    print "1 . Lvl 2 analysis (redo analysis on part or previous analysis to expand a specific part)"
    print "2 . Purge log from specific lines and write result to file"
    #print "      Note : you can redo an analysis on this file after with a different match parameter to expand"
    value = input("      Note : you can redo an analysis on this file after with a different match parameter to expand\n" )
 
    if value == 2:
        flag = 0
        data_tag_remove = range(knowndatasize)
        rm = 0
        data_tag_remove[0] = input("Which tag do you want to remove ?\n" )
        while flag == 0:
            value = input("Which tag do you also want to remove ? (-1 to print current state, -2 validate and write)\n" )
            if value == -2:
                flag = 1
            elif value == -1:
                print
                print "########################################################"
                print "############# Update"
                print "#####"
                print
                print "Tags to be removed :",data_tag_remove[0:rm]
                print
                print "To be written :"
                print
                for j in range (0,knowndatasize):
                    flag1 = 0
                    for i in range (0,rm+1):
                        if knowndata_tag_d[j] == data_tag_remove[i]:
                            flag1 = 1
                    if flag1 == 0:
                        print "tag :",knowndata_tag_d[j]," found ",knowndata_tot_d[j], " time(s) : ", knowndata_s_d[j]
            else:
                rm = rm + 1
                data_tag_remove[rm] = value
        filename = input('Enter file name and position (ex : "/home/sphen/log.out" , do not forget brackets) :\n')
        print 'Writing to file....  ',
        with open (filename, "w" ) as myfile2:
            for i in range (0,datasize):
                flag = 0
                for j in range (0,rm+1):
                    if data_tag[i] == data_tag_remove[j]:
                        flag = 1
                if flag == 0:
                    myfile2.write(data[i])
        print '\b\b done!'
        quit()
 
    elif value == 1:
        l2_tag = input("Which tag for lvl 2 analysis ?\n" )
        print "Which match for lvl 2 analysis ? Must be > ",match
        l2_match = input("\n" )
        c = 0
 
 
        # Extract lvl 2 data from lvl 1 data
        l2_datasize = knowndata_tot[l2_tag]
        l2_data = range(l2_datasize)
        for i in range (0,datasize):
            if data_tag[i] == l2_tag:
                l2_data[c] = data[i]
                c = c + 1
 
        l2_knowndata_s = range(l2_datasize) # more than needed, to be optimized
        l2_knowndata_tot = range(l2_datasize) # same
 
        # Init first patern
        l2_knowndatasize = 1
        l2_knowndata_s[0] = l2_data[0]
        l2_knowndata_tot[0] = 1
 
 
        c = 0
 
        print
        print 'Analysing l2....  ',
 
 
        for i in range (1,l2_datasize):
           # Check if data is already registered as patern
            # Loop on all already known patern
            for j in range (0,l2_knowndatasize):
                if similar(l2_data[i],l2_knowndata_s[j]) > l2_match: # 0.9 seems good for auth.log
                    # Data already known, increment and skip to next data
                    l2_knowndata_tot[j] = l2_knowndata_tot[j] + 1
                    flag = 1
                    break
            # Data not already registered, create a new patern
            if flag == 0:
                l2_knowndata_s[l2_knowndatasize] = l2_data[i]
                l2_knowndata_tot[l2_knowndatasize] = 1
                l2_knowndatasize = l2_knowndatasize + 1
            else:
                flag = 0
 
            # Display work in progress
            if c == 4: 
                sys.stdout.write('\b/')
            elif c == 8: 
                sys.stdout.write('\b-')
            elif c == 12:
                sys.stdout.write('\b\\')
            elif c == 16: 
                sys.stdout.write('\b|')
                c = 0
            sys.stdout.flush()
            c = c + 1
 
        print '\b\b done!'
 
        print 'Sorting results l2....  ',
        # Sort results for display only
        l2_knowndata_tot_d, l2_knowndata_s_d = (list(t) for t in zip(*sorted(zip(l2_knowndata_tot[0:l2_knowndatasize], l2_knowndata_s[0:l2_knowndatasize]))))
        print '\b\b done!'
 
        # Print results
        print "###############################"
        print "# Found ",l2_knowndatasize," different paterns"
        print "###############################"
        for j in range (0,l2_knowndatasize):
            print " found ",l2_knowndata_tot_d[j], " time(s) : ", l2_knowndata_s_d[j]

gstack

To display the instruction stack presently used by a process (let's say 23478), very usefull to detect dead lock in parallel computation :

gstack 23478

Compress - Extract

Unzip an archive file :

*.tar.bz2 : tar xvjf
*.tar.gz : tar xvzf
*.tar.xz : tar Jxvf
*.bz2 : bunzip2 or bzip2 -dk
*.rar : unrar x
*.gz : gunzip
*.tar : tar xvf
*.tbz2 : tar xvjf
*.tgz : tar xvzf
*.zip : unzip
*.Z : uncompress
*.7z : 7z x

The -dk prevent the command to removing the original file after extraction. If you want it to be deleted, use only -d.

Zip to a tar.gz file :

tar cvzf file.tar.gz myfoldertozip

Tool Scripts

du -k | sort -n | perl -ne 'if ( /^(\d+)\s+(.*$)/){$l=log($1+.1);$m=int($l/log(1024)); printf  ("%6.1f\t%s\t%25s  %s\n",($1/(2**(10*$m))),(("K","M","G","T","P")[$m]),"*"x (1.5*$l),$2);}'

Without taking files with spaces inside names (faster)

find $1 -type f | xargs stat --format '%Y :%y %n' | sort -nr | cut -d: -f2- | head

With spaces :

find $1 -type f -exec stat --format '%Y :%y %n' {} \; | sort -nr | cut -d: -f2- | head

Want to replace /opt/softs by /newopt/newsofts in all files in this repertory :

sed -i 's/\/opt\/softs/\/newopt\/newsofts/g' *

Script at boot on Debian

http://www.debian-administration.org/article/28/Making_scripts_run_at_boot_time_with_Debian

Install msi

msiexec /i 7z920-x64.msi

Mount ISO

mount -o loop /home/gg/my.iso /cdrom

Reduce swap usage on Ubuntu

This modification is important, especialy if you use Virtual Machines. Edit file /etc/sysctl.conf and add :

# Decrease swap usage to a reasonable level
vm.swappiness=10
# Improve cache management
vm.vfs_cache_pressure=50

At end of file. Reboot.

Produce patch from diff and apply it

diff -u0 -Nr file1 file2 > patch.txt
patch file3 < patch.txt