Apply gradient descent to optimize the objective function of model 1

1/10/2018 - 3:56 PM

Apply gradient descent to optimize the objective function of model 1

import argparse
import time
from utils import *

# set a random seed to make experiments reproducible
np.random.seed(10)


# create parser
def create_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', default=None, help='Input data file')
    parser.add_argument('--epochs', default=None, help='The number of epochs during training')
    parser.add_argument('--lr', default=None, help='Learning rate')
    return parser


# helper function; get the value from matrix
def get_value(A, i, j):
    if i < j:
        return A[j, i]
    else:
        return A[i, j]


# helper function for initialization
def init_posi(r):
    x = np.random.uniform(-r, r, 3)

    while True:
        if x[0] ** 2 + x[1] ** 2 + x[2] ** 2 > r ** 2:
            x = np.random.uniform(-r, r, 3)
        else:
            return x


# initialize the coordinates of beads
def initialize(num_node, r=4.):
    # init variables
    node = np.zeros((num_node, 3), dtype=float)
    for i in range(num_node):
        node[i] = init_posi(r)
    return node


# update coordinate thru each epochs
def solver(file_name, epochs, init_lr=0.01):
    ## initialize variables
    loss = []

    ## load data
    with open(file_name, 'r') as f:
        # get the number of nodes
        num_node = int(f.readline().strip())

        # initialize matrics
        real_dist = np.zeros((num_node, num_node), dtype=float) - 1

        for line in f:
            submatrix = line.strip().split()
            idx1 = int(submatrix[0])
            idx2 = int(submatrix[1])
            real_dist[idx1][idx2] = float(submatrix[2])
            real_dist[idx2][idx1] = float(submatrix[2])

    # get initialized
    node_posi = initialize(num_node)

    ## go thru epochs
    for epoch in range(epochs):
        lr = init_lr / np.power(2, (epoch / 200))
        print('---' + str(epoch) + '---')
        # calculate previous loss
        node_dist = pairwise_dist(node_posi)
        current_loss = cal_loss(node_dist, real_dist)
        print('Current loss is ', current_loss)
        loss += [current_loss]

        # calculate gradient
        # 1 gradient cache
        cache = derive_cache(node_dist, real_dist)
        # 2 update x, y, z
        for i in range(num_node):
            dx, dy, dz = 0., 0., 0.
            # calulate updated loss
            count = 0
            for j in range(num_node):
                dx += get_value(cache, i, j) * (node_posi[i][0] - node_posi[j][0])
                dy += get_value(cache, i, j) * (node_posi[i][1] - node_posi[j][1])
                dz += get_value(cache, i, j) * (node_posi[i][2] - node_posi[j][2])
                count += 1
            node_posi[i][0] -= lr * dx / count
            node_posi[i][1] -= lr * dy / count
            node_posi[i][2] -= lr * dz / count
    return loss, node_posi


# backpropogation
def derive_cache(node_dist, real_dist):
    num_node = len(node_dist)
    dist = np.zeros((num_node, num_node), dtype=float)

    for i in range(num_node):
        for j in range(i+1, num_node):
            if real_dist[i][j] > 0:
                cache = 2 * (node_dist[i][j] - real_dist[i][j]) / (real_dist[i][j]**2 * node_dist[i][j])
                dist[i][j] = cache
                dist[j][i] = cache
    return dist


# calculate pairwise distance; helper function for solver
def pairwise_dist(node_posi):
    # init variables
    num_node = len(node_posi)
    dist = np.zeros((num_node, num_node), dtype=float)

    for i in range(num_node):
        for j in range(i+1, num_node):
            current_dist = get_dist(node_posi[i, :], node_posi[j, :])
            dist[i][j] = current_dist
            dist[j][i] = current_dist
    return dist


# whole pipeline
def main():
    # create parser and load data
    args = create_parser().parse_args()
    start = time.time()
    loss, posi = solver(args.input_file, args.epochs, init_lr=args.lr)
    print(time.time() - start)


if __name__ == "__main__":
    main()

utils.py

import numpy as np
import math

# calculate the square of sech function
def sech2(x):
    """
    sech(x)
    Uses numpy's cosh(x).
    """
    return np.power(1. / np.cosh(x), 2)

# calculate euclidean distance between two nodes
def get_dist(x, y):
    return np.linalg.norm(x-y)


# convert frequency to distance
def convert(freq):
    dist = math.pow(10, (math.log10(30.0) - math.log10(freq))/1.5)
    return dist


# calculate loss from model1
def cal_loss(node_posi, real_dist):
    num = len(node_posi)
    loss = 0.0
    count = 0
    for i in range(num):
        for j in range(i+1, num):
            if real_dist[i][j] > 0:
                loss += (node_posi[i][j] - real_dist[i][j])**2 / real_dist[i][j]**2
                count += 1
    return 2 * loss / count


# 3D visulation
def simple_visulization(node_posi):
    import matplotlib as mpl
    import matplotlib.pyplot as plt

    mpl.rcParams['legend.fontsize'] = 10

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    N = len(node_posi)

    ax.plot(node_posi[:, 0], node_posi[:, 1], node_posi[:, 2],
            label='parametric curve')
    ax.legend()
    fig.savefig('test.pdf')  # save the figure to file

Cacher is the code snippet organizer for pro developers

We empower you and your team to get more done, faster

Apply gradient descent to optimize the objective function of model 1