Mon Feb 15 19:06:07 PST 2016

Floyd-Warshall in Awk

I was intrigued recently by the Floyd-Warshall algorithm for shortest path evaluation. Here is a short test script which computes the shortest path between vertices in a graph based on two cubes. (From a chemical perspective this might be two cubane molecules bonded by a carbon-carbon bridge, which I think might be called '1-(cuban-1-yl)cubane').

I thought that I would share the script here. I configured it so that it reads its list of bonds for the 'molecule' based on the tail end of the script file (so that there is only one file to execute).


#!/bin/sh

awk 'function printdistmatrix (k){
    printf "\nITERATION: %3d\n\n", k
    printf "    "
    for(i1=1;i1<=natoms;i1++){
            printf "%3d", i1 
    }
    printf "\n"
    printf "    "
    for(i1=1;i1<=natoms;i1++){
            printf "---", i1 
    }
    printf "\n"
    for(i1=1;i1<=natoms;i1++){
        printf "%3d|", i1
        for(j1=1;j1<=natoms;j1++){
            printf "%3d", dist[i1,j1]
        }
        printf "\n"
    } 
}
BEGIN{
    # input is the bond section from a sci file - the indices are adjusted to start at 1
    dt=0
    natoms=0
    while(getline<ARGV[1]>0){
        if(match($0,"BONDS-LIST")){
            dt++
            continue;
        }
        if(dt != 2)continue;
        if(match($0,"END"))break;
        nbonds++
        ib[nbonds,1]=$1+1
        ib[nbonds,2]=$2+1
        if($1>natoms)natoms=$1
        if($2>natoms)natoms=$2
    }
    natoms++
    print "THERE ARE " natoms " ATOMS"
    print "THERE ARE " nbonds " BONDS"

    # initialize the distance matrix
    for(i=1;i<=natoms;i++){
        for(j=1;j<=natoms;j++){
            dist[i,j]=99
        }
    }

    # initialize atom-atom distances to zero
    for(i=1;i<=natoms;i++){
        dist[i,i]=0
    }

    # set distances between bonded atoms to 1
    for(i=1;i<=nbonds;i++){
        dist[ib[i,1],ib[i,2]]=1
        dist[ib[i,2],ib[i,1]]=1
        nextatom[ib[i,1],ib[i,2]]=ib[i,2]
        nextatom[ib[i,2],ib[i,1]]=ib[i,1]
    }

    # this is the Floyd-Warshall algorithm
    for(k=1;k<=natoms;k++){
        printdistmatrix(k) 
        for(i=1;i<=natoms;i++){
            for(j=1;j<=natoms;j++){
                if(dist[i,j]>(dist[i,k]+dist[k,j])){
                    dist[i,j]=dist[i,k]+dist[k,j]
                    nextatom[i,j]=nextatom[i,k]
                }
            }
        }
    }

    istart=9
    iend=3
    print "\nTHE DISTANCE BETWEEN ISTART " istart " AND IEND " iend " IS " dist[istart,iend] "\n"
    print "CONNECTION: " istart
    while(istart!=iend){
        istart=nextatom[istart,iend]
        print "CONNECTION: " istart
    }
     
    
}' $0

exit

# bond data follow

BONDS-LIST
1 0 0 0
2 1 0 0
3 2 0 0
4 2 0 0
5 1 0 0
6 0 0 0
7 3 0 0
3 0 0 0
7 6 0 0
6 5 0 0
5 4 0 0
7 4 0 0
9 8 0 0
10 9 0 0
11 10 0 0
12 10 0 0
13 9 0 0
14 8 0 0
15 11 0 0
11 8 0 0
15 14 0 0
14 13 0 0
13 12 0 0
15 12 0 0
12 6 0 0
END
                    6______2
   10_____11        /\    /\
    /\   /\        /  \5 /  \
   /14\_/__\_____7/____-/----\3
 9/___//12 /13    \   /\1   /
  \  / \  /        \ /  \  /
 15\/___\/16       8\____\/4

With a start of 9 and and end of 3 a shortest path is:

9-10-11-13-7-1-2-3


Posted by ZFS | Permanent link | File under: chemistry, bash