ryoakg
9/26/2017 - 3:27 AM

tensor-flow-label-image.clj

;;; original: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java
;;; cf. https://fxapps.blogspot.jp/2017/04/using-tensorflow-from-java-application.html

(set-env! :dependencies '[[org.tensorflow/tensorflow "1.3.0"]
                          [commons-io/commons-io "2.5"]])

(import (java.nio.file Paths Files)
        (org.apache.commons.io IOUtils)
        (org.tensorflow Graph
                        Session
                        Tensor
                        DataType
                        Output))

(defn op-set [^Graph g]
  (let [binary-op (fn [^String type ^Output in1 ^Output in2]
                    (-> g
                        (.opBuilder type type)
                        (.addInput in1)
                        (.addInput in2)
                        .build
                        (.output 0)))]
    {:div             #(binary-op "Div" %1 %2)
     :sub             #(binary-op "Sub" %1 %2)
     :resize-bilinear #(binary-op "ResizeBilinear" %1 %2)
     :expand-dims     #(binary-op "ExpandDims" %1 %2)
     :constant        (fn constant [^String name ^Object v]
                        (with-open [t (Tensor/create v)]
                          (-> g
                              (.opBuilder "Const" name)
                              (.setAttr "dtype" (.dataType t))
                              (.setAttr "value" t)
                              .build
                              (.output 0))))
     :cast            (fn cast [^Output v ^DataType type]
                        (-> g
                            (.opBuilder "Cast" "Cast")
                            (.addInput v)
                            (.setAttr "DstT" type)
                            .build
                            (.output 0)))
     :decode-jpeg     (fn decode-jpeg [^Output contents #_long channels]
                        (-> g
                            (.opBuilder "DecodeJpeg" "DecodeJpeg")
                            (.addInput contents)
                            (.setAttr "channels" channels)
                            .build
                            (.output 0)))}))

(let [size  [224 224]                   ;height x width
      mean  (float 117.)
      scale (float 1.)]
  (defn construct-and-execute-graph-to-normalize-image [image-bytes]
    (with-open [^Graph   g (Graph.)
                ^Session s (Session. g)]
      (let [{:keys [constant cast
                    div sub
                    resize-bilinear expand-dims
                    decode-jpeg]} (op-set g)
            output                (-> (constant "input" image-bytes)
                                      (decode-jpeg 3)
                                      (cast DataType/FLOAT)
                                      (expand-dims (constant "make_batch" (int 0)))
                                      (resize-bilinear (constant "size" (int-array size)))
                                      (sub (constant "mean" mean))
                                      (div (constant "scale" scale)))]
        (-> s
            .runner
            (.fetch (-> output .op .name))
            .run
            (.get 0))))))

(defn ^"[F" execute-inception-graph [graph-def
                                    ^Tensor image]
  (with-open [g (Graph.)]
    (.importGraphDef g graph-def)
    (with-open [^Session s      (Session. g)
                ^Tensor  result (-> s
                                    .runner
                                    (.feed "input" image)
                                    (.fetch "output")
                                    .run
                                    (.get 0))]
      (let [[may-be-1 n-labels :as shape] (.shape result)]
        (when (or (not= 2 (.numDimensions result))
                  (not= may-be-1 1))
          (throw (Exception.
                  (format "Expected model to produce a [1 N] shaped tensor where N is the number of labels, instead it produced one with shape %s"
                          shape))))
        (let [arr (to-array [(float-array n-labels)])]
          (.copyTo result arr)
          (first arr))))))

(let [labels (-> (Paths/get (System/getProperty "user.dir") (into-array ["imagenet_comp_graph_label_strings.txt"]))
                 Files/readAllLines
                 vec)]
  (defn guess-image [image-url]
    (with-open [^Tensor image (-> image-url
                                  java.net.URL.
                                  IOUtils/toByteArray
                                  construct-and-execute-graph-to-normalize-image)]
      (-> (Paths/get (System/getProperty "user.dir") (into-array ["tensorflow_inception_graph.pb"]))
          Files/readAllBytes
          (execute-inception-graph image)
          (->> (take (count labels))
               ;; なぜか (count labels) より
               ;; tensorflow_inception_graph.pb からのデータが小さくなる
               ;; データの整合性が合ってない気がするけど
               ;; とりあえず問題が出ない様にしておく

               (map-indexed (fn [i probability]
                              [(labels i) (* probability 100.)]))
               (sort-by second)
               reverse)))))

;;; unzip https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip
;;; before execute the code following.
(guess-image "https://upload.wikimedia.org/wikipedia/commons/e/e0/Giant_panda01_960.jpg")
(guess-image "https://upload.wikimedia.org/wikipedia/commons/a/ad/Vulpes_macrotis_mutica_sitting.jpg")
(guess-image "https://upload.wikimedia.org/wikipedia/commons/1/18/Vombatus_ursinus_-Maria_Island_National_Park.jpg")
(guess-image "https://upload.wikimedia.org/wikipedia/commons/0/0a/Standing_jaguar.jpg")