longw010
1/10/2018 - 3:56 PM

Apply gradient descent to optimize the objective function of model 1

Apply gradient descent to optimize the objective function of model 1

import argparse
import time
from utils import *

# set a random seed to make experiments reproducible
np.random.seed(10)


# create parser
def create_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', default=None, help='Input data file')
    parser.add_argument('--epochs', default=None, help='The number of epochs during training')
    parser.add_argument('--lr', default=None, help='Learning rate')
    return parser


# helper function; get the value from matrix
def get_value(A, i, j):
    if i < j:
        return A[j, i]
    else:
        return A[i, j]


# helper function for initialization
def init_posi(r):
    x = np.random.uniform(-r, r, 3)

    while True:
        if x[0] ** 2 + x[1] ** 2 + x[2] ** 2 > r ** 2:
            x = np.random.uniform(-r, r, 3)
        else:
            return x


# initialize the coordinates of beads
def initialize(num_node, r=4.):
    # init variables
    node = np.zeros((num_node, 3), dtype=float)
    for i in range(num_node):
        node[i] = init_posi(r)
    return node


# update coordinate thru each epochs
def solver(file_name, epochs, init_lr=0.01):
    ## initialize variables
    loss = []

    ## load data
    with open(file_name, 'r') as f:
        # get the number of nodes
        num_node = int(f.readline().strip())

        # initialize matrics
        real_dist = np.zeros((num_node, num_node), dtype=float) - 1

        for line in f:
            submatrix = line.strip().split()
            idx1 = int(submatrix[0])
            idx2 = int(submatrix[1])
            real_dist[idx1][idx2] = float(submatrix[2])
            real_dist[idx2][idx1] = float(submatrix[2])

    # get initialized
    node_posi = initialize(num_node)

    ## go thru epochs
    for epoch in range(epochs):
        lr = init_lr / np.power(2, (epoch / 200))
        print('---' + str(epoch) + '---')
        # calculate previous loss
        node_dist = pairwise_dist(node_posi)
        current_loss = cal_loss(node_dist, real_dist)
        print('Current loss is ', current_loss)
        loss += [current_loss]

        # calculate gradient
        # 1 gradient cache
        cache = derive_cache(node_dist, real_dist)
        # 2 update x, y, z
        for i in range(num_node):
            dx, dy, dz = 0., 0., 0.
            # calulate updated loss
            count = 0
            for j in range(num_node):
                dx += get_value(cache, i, j) * (node_posi[i][0] - node_posi[j][0])
                dy += get_value(cache, i, j) * (node_posi[i][1] - node_posi[j][1])
                dz += get_value(cache, i, j) * (node_posi[i][2] - node_posi[j][2])
                count += 1
            node_posi[i][0] -= lr * dx / count
            node_posi[i][1] -= lr * dy / count
            node_posi[i][2] -= lr * dz / count
    return loss, node_posi


# backpropogation
def derive_cache(node_dist, real_dist):
    num_node = len(node_dist)
    dist = np.zeros((num_node, num_node), dtype=float)

    for i in range(num_node):
        for j in range(i+1, num_node):
            if real_dist[i][j] > 0:
                cache = 2 * (node_dist[i][j] - real_dist[i][j]) / (real_dist[i][j]**2 * node_dist[i][j])
                dist[i][j] = cache
                dist[j][i] = cache
    return dist


# calculate pairwise distance; helper function for solver
def pairwise_dist(node_posi):
    # init variables
    num_node = len(node_posi)
    dist = np.zeros((num_node, num_node), dtype=float)

    for i in range(num_node):
        for j in range(i+1, num_node):
            current_dist = get_dist(node_posi[i, :], node_posi[j, :])
            dist[i][j] = current_dist
            dist[j][i] = current_dist
    return dist


# whole pipeline
def main():
    # create parser and load data
    args = create_parser().parse_args()
    start = time.time()
    loss, posi = solver(args.input_file, args.epochs, init_lr=args.lr)
    print(time.time() - start)


if __name__ == "__main__":
    main()
import numpy as np
import math

# calculate the square of sech function
def sech2(x):
    """
    sech(x)
    Uses numpy's cosh(x).
    """
    return np.power(1. / np.cosh(x), 2)

# calculate euclidean distance between two nodes
def get_dist(x, y):
    return np.linalg.norm(x-y)


# convert frequency to distance
def convert(freq):
    dist = math.pow(10, (math.log10(30.0) - math.log10(freq))/1.5)
    return dist


# calculate loss from model1
def cal_loss(node_posi, real_dist):
    num = len(node_posi)
    loss = 0.0
    count = 0
    for i in range(num):
        for j in range(i+1, num):
            if real_dist[i][j] > 0:
                loss += (node_posi[i][j] - real_dist[i][j])**2 / real_dist[i][j]**2
                count += 1
    return 2 * loss / count


# 3D visulation
def simple_visulization(node_posi):
    import matplotlib as mpl
    import matplotlib.pyplot as plt

    mpl.rcParams['legend.fontsize'] = 10

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    N = len(node_posi)

    ax.plot(node_posi[:, 0], node_posi[:, 1], node_posi[:, 2],
            label='parametric curve')
    ax.legend()
    fig.savefig('test.pdf')  # save the figure to file