trybeetle

take it slow!!

Advanced ML with TF on GCP (course 3)振り返り


Posted on Oct. 30, 2018, 11:31 p.m.



CourseraのAdvanced Machine Learning with TensorFlow on Google Cloud Platform Specializationのcourse3の振り返りです。Image Understanding with TensorFlow on GCPについてです。


概要

このコースでは、Image recognitionでのGCPの活用方法に紹介されていました。

  • TensorflowによるCNNモデル
  • Data augmentation
  • Transfer learning
  • TPUを利用したLarge Network
  • Cloud VisionやAutoMLを利用した簡易的な方法

簡易的なMachine learningの実験をする場合は、下記のプロダクトを使うと有効です。実験結果を考慮しながら、ML systemを設計するとより効率的です。

  • structured_data: BigQueryのML機能を使用する
  • unstructured_data: Cloud visionの様なPre-build MLを使用する

CNN with Data Augumentation

以下、datalabでの操作です。

Bucket等の設定


    import os
    PROJECT = 'cloud-training-demos' # REPLACE WITH YOUR PROJECT ID
    BUCKET = 'cloud-training-demos-ml' # REPLACE WITH YOUR BUCKET NAME
    REGION = 'us-central1' # REPLACE WITH YOUR BUCKET REGION e.g. us-central1
    MODEL_TYPE = 'cnn'

    # do not change these
    os.environ['PROJECT'] = PROJECT
    os.environ['BUCKET'] = BUCKET
    os.environ['REGION'] = REGION
    os.environ['MODEL_TYPE'] = MODEL_TYPE
    os.environ['TFVERSION'] = '1.8'  # Tensorflow version

    %bash
    gcloud config set project $PROJECT
    gcloud config set compute/region $REGION

flowersmodel/task.pyを下記の内容で作成


    import argparse
    import json
    import os

    import model
    import tensorflow as tf

    if __name__ == '__main__':
      parser = argparse.ArgumentParser()
      # Input Arguments
      parser.add_argument(
          '--batch_size',
          help='Batch size for training steps',
          type=int,
          default=100
      )
      parser.add_argument(
          '--learning_rate',
          help='Initial learning rate for training',
          type=float,
          default=0.01
      )
      parser.add_argument(
          '--train_steps',
          help="""\
          Steps to run the training job for. A step is one batch-size,\
          """,
          type=int,
          default=100
      )
      parser.add_argument(
          '--output_dir',
          help='GCS location to write checkpoints and export models',
          required=True
      )
      parser.add_argument(
          '--train_data_path',
          help='location of train file containing eval URLs',
          default='gs://cloud-ml-data/img/flower_photos/train_set.csv'
      )
      parser.add_argument(
          '--eval_data_path',
          help='location of eval file containing img URLs',
          default='gs://cloud-ml-data/img/flower_photos/eval_set.csv'
      )
      #build list of model fn's for help message
      model_names = [name.replace('_model','') \
                       for name in dir(model) \
                         if name.endswith('_model')]
      parser.add_argument(
          '--model',
          help='Type of model. Supported types are {}'.format(model_names),
          required=True
      )
      parser.add_argument(
          '--job-dir',
          help='this model ignores this field, but it is required by gcloud',
          default='junk'
      )
      parser.add_argument(
          '--augment',
          help='if specified, augment image data',
          dest='augment', action='store_true')
      parser.set_defaults(augment=False)

      # optional hyperparameters used by cnn
      parser.add_argument(
          '--ksize1',
          help='kernel size of first layer for CNN',
          type=int,
          default=5)
      parser.add_argument(
          '--ksize2',
          help='kernel size of second layer for CNN',
          type=int,
          default=5)
      parser.add_argument(
          '--nfil1',
          help='number of filters in first layer for CNN',
          type=int,
          default=10)
      parser.add_argument(
          '--nfil2',
          help='number of filters in second layer for CNN',
          type=int,
          default=20)
      parser.add_argument(
          '--dprob',
          help='dropout probability for CNN',
          type=float,
          default=0.25)
      parser.add_argument(
          '--batch_norm',
          help='if specified, do batch_norm for CNN',
          dest='batch_norm',
          action='store_true')
      parser.set_defaults(batch_norm=False)

      args = parser.parse_args()
      hparams = args.__dict__

      output_dir = hparams.pop('output_dir')
      # Append trial_id to path for hptuning
      output_dir = os.path.join(
          output_dir,
          json.loads(
              os.environ.get('TF_CONFIG', '{}')
          ).get('task', {}).get('trial', '')
      )

      # Run the training job
      model.train_and_evaluate(output_dir, hparams)

flowersmodel/model.pyを下記の内容で作成


    from __future__ import absolute_import
    from __future__ import division
    from __future__ import print_function

    import tensorflow as tf

    tf.logging.set_verbosity(tf.logging.INFO)

    LIST_OF_LABELS = 'daisy,dandelion,roses,sunflowers,tulips'.split(',')
    HEIGHT = 299
    WIDTH = 299
    NUM_CHANNELS = 3
    NCLASSES = 5

    def linear_model(img, mode, hparams):
      X = tf.reshape(img,[-1,HEIGHT*WIDTH*NUM_CHANNELS]) #flatten
      ylogits = tf.layers.dense(X,NCLASSES,activation=None)
      return ylogits, NCLASSES

    def dnn_model(img, mode, hparams):
      X = tf.reshape(img, [-1, HEIGHT*WIDTH*NUM_CHANNELS]) #flatten
      h1 = tf.layers.dense(X, 300, activation=tf.nn.relu)
      h2 = tf.layers.dense(h1,100, activation=tf.nn.relu)
      h3 = tf.layers.dense(h2, 30, activation=tf.nn.relu)
      ylogits = tf.layers.dense(h3, NCLASSES, activation=None)
      return ylogits, NCLASSES

    def dnn_dropout_model(img, mode, hparams):
      dprob = hparams.get('dprob', 0.1)

      X = tf.reshape(img, [-1, HEIGHT*WIDTH*NUM_CHANNELS]) #flatten
      h1 = tf.layers.dense(X, 300, activation=tf.nn.relu)
      h2 = tf.layers.dense(h1,100, activation=tf.nn.relu)
      h3 = tf.layers.dense(h2, 30, activation=tf.nn.relu)
      h3d = tf.layers.dropout(h3, rate=dprob, training=(
          mode == tf.estimator.ModeKeys.TRAIN)) #only dropout when training
      ylogits = tf.layers.dense(h3d, NCLASSES, activation=None)
      return ylogits, NCLASSES

    def cnn_model(img, mode, hparams):
      ksize1 = hparams.get('ksize1', 5)
      ksize2 = hparams.get('ksize2', 5)
      nfil1 = hparams.get('nfil1', 10)
      nfil2 = hparams.get('nfil2', 20)
      dprob = hparams.get('dprob', 0.25)

      c1 = tf.layers.conv2d(img, filters=nfil1,
                              kernel_size=ksize1, strides=1,
                              padding='same', activation=tf.nn.relu)
      p1 = tf.layers.max_pooling2d(c1,pool_size=2, strides=2)
      c2 = tf.layers.conv2d(p1, filters=nfil2,
                              kernel_size=ksize2, strides=1,
                              padding='same', activation=tf.nn.relu)
      p2 = tf.layers.max_pooling2d(c2,pool_size=2, strides=2)

      outlen = p2.shape[1]*p2.shape[2]*p2.shape[3]
      p2flat = tf.reshape(p2, [-1, outlen]) # flattened

      #apply batch normalization
      if hparams['batch_norm']:
        h3 = tf.layers.dense(p2flat, 300, activation=None)
        h3 = tf.layers.batch_normalization(
            h3, training=(mode == tf.estimator.ModeKeys.TRAIN)) #only batchnorm when training
        h3 = tf.nn.relu(h3)
      else:
        h3 = tf.layers.dense(p2flat, 300, activation=tf.nn.relu)

      #apply dropout
      h3d = tf.layers.dropout(h3, rate=dprob, training=(mode == tf.estimator.ModeKeys.TRAIN))

      ylogits = tf.layers.dense(h3d, NCLASSES, activation=None)

      #apply batch normalization once more
      if hparams['batch_norm']:
         ylogits = tf.layers.batch_normalization(
             ylogits, training=(mode == tf.estimator.ModeKeys.TRAIN))

      return ylogits, NCLASSES

    def read_and_preprocess_with_augment(image_bytes, label=None):
        return read_and_preprocess(image_bytes, label, augment=True)

    def read_and_preprocess(image_bytes, label=None, augment=False):
        # decode the image
        # end up with pixel values that are in the -1, 1 range
        image = tf.image.decode_jpeg(image_bytes, channels=NUM_CHANNELS)
        image = tf.image.convert_image_dtype(image, dtype=tf.float32) # 0-1
        image = tf.expand_dims(image, 0) # resize_bilinear needs batches

        if augment:
           image = tf.image.resize_bilinear(
               image, [HEIGHT+10, WIDTH+10], align_corners=False)
           image = tf.squeeze(image) #remove batch dimension
           image = tf.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS])
           image = tf.image.random_flip_left_right(image)
           image = tf.image.random_brightness(image, max_delta=63.0/255.0)
           image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
        else:
           image = tf.image.resize_bilinear(image, [HEIGHT, WIDTH], align_corners=False)
           image = tf.squeeze(image) #remove batch dimension

        #pixel values are in range [0,1], convert to [-1,1]
        image = tf.subtract(image, 0.5)
        image = tf.multiply(image, 2.0)
        return {'image':image}, label

    def serving_input_fn():
        # Note: only handles one image at a time
        feature_placeholders = {'image_bytes':
                                tf.placeholder(tf.string, shape=())}
        image, _ = read_and_preprocess(
            tf.squeeze(feature_placeholders['image_bytes']))
        image['image'] = tf.expand_dims(image['image'],0)
        return tf.estimator.export.ServingInputReceiver(image, feature_placeholders)

    def make_input_fn(csv_of_filenames, batch_size, mode, augment=False):
        def _input_fn():
            def decode_csv(csv_row):
                filename, label = tf.decode_csv(
                    csv_row, record_defaults = [[''],['']])
                image_bytes = tf.read_file(filename)
                return image_bytes, label

            # Create tf.data.dataset from filename
            dataset = tf.data.TextLineDataset(csv_of_filenames).map(decode_csv)

            if augment:
                dataset = dataset.map(read_and_preprocess_with_augment)
            else:
                dataset = dataset.map(read_and_preprocess)

            if mode == tf.estimator.ModeKeys.TRAIN:
                num_epochs = None # indefinitely
                dataset = dataset.shuffle(buffer_size = 10 * batch_size)
            else:
                num_epochs = 1 # end-of-input after this

            dataset = dataset.repeat(num_epochs).batch(batch_size)
            return dataset.make_one_shot_iterator().get_next()
        return _input_fn

    def image_classifier(features, labels, mode, params):
      model_functions = {
          'linear':linear_model,
          'dnn':dnn_model,
          'dnn_dropout':dnn_dropout_model,
          'cnn':cnn_model}
      model_function = model_functions[params['model']]
      ylogits, nclasses = model_function(features['image'], mode, params)

      probabilities = tf.nn.softmax(ylogits)
      class_int = tf.cast(tf.argmax(probabilities, 1), tf.uint8)
      class_str = tf.gather(LIST_OF_LABELS, tf.cast(class_int, tf.int32))

      if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        #convert string label to int
        labels_table = tf.contrib.lookup.index_table_from_tensor(
          tf.constant(LIST_OF_LABELS))
        labels = labels_table.lookup(labels)

        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=ylogits, labels=tf.one_hot(labels, nclasses)))
        evalmetrics =  {'accuracy': tf.metrics.accuracy(class_int, labels)}
        if mode == tf.estimator.ModeKeys.TRAIN:
          # this is needed for batch normalization, but has no effect otherwise
          update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
          with tf.control_dependencies(update_ops):
             train_op = tf.contrib.layers.optimize_loss(
                 loss,
                 tf.train.get_global_step(),
                 learning_rate=params['learning_rate'],
                 optimizer="Adam")
        else:
          train_op = None
      else:
        loss = None
        train_op = None
        evalmetrics = None

      return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions={"probabilities": probabilities,
                         "classid": class_int, "class": class_str},
            loss=loss,
            train_op=train_op,
            eval_metric_ops=evalmetrics,
            export_outputs={'classes': tf.estimator.export.PredictOutput(
                {"probabilities": probabilities, "classid": class_int,
                 "class": class_str})}
        )

    def train_and_evaluate(output_dir, hparams):
      EVAL_INTERVAL = 300 #every 5 minutes
      estimator = tf.estimator.Estimator(model_fn = image_classifier,
                                         params = hparams,
                                         config= tf.estimator.RunConfig(
                                             save_checkpoints_secs = EVAL_INTERVAL),
                                         model_dir = output_dir)
      train_spec = tf.estimator.TrainSpec(input_fn = make_input_fn(
                                            hparams['train_data_path'],
                                            hparams['batch_size'],
                                            mode = tf.estimator.ModeKeys.TRAIN,
                                            augment = hparams['augment']),
                                          max_steps = hparams['train_steps'])
      exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
      eval_spec = tf.estimator.EvalSpec(input_fn = make_input_fn(
                                            hparams['eval_data_path'],
                                            hparams['batch_size'],
                                            mode = tf.estimator.ModeKeys.EVAL),
                                        steps = None,
                                        exporters = exporter,
                                        start_delay_secs = EVAL_INTERVAL,
                                        throttle_secs = EVAL_INTERVAL)
      tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

initファイルの作成


%bash
touch flowersmodel/__init__.py

local環境(dalatlab)での実行


    %bash
    rm -rf flowersmodel.tar.gz flowers_trained
    gcloud ml-engine local train \
       --module-name=flowersmodel.task \
       --package-path=${PWD}/flowersmodel \
       -- \
       --output_dir=${PWD}/flowers_trained \
       --train_steps=5 \
       --learning_rate=0.01 \
       --batch_size=2 \
       --model=$MODEL_TYPE \
       --augment \
       --train_data_path=gs://cloud-ml-data/img/flower_photos/train_set.csv \
       --eval_data_path=gs://cloud-ml-data/img/flower_photos/eval_set.csv

ML engineでの実行


    %bash
    OUTDIR=gs://${BUCKET}/flowers/trained_${MODEL_TYPE}
    JOBNAME=flowers_${MODEL_TYPE}_$(date -u +%y%m%d_%H%M%S)
    echo $OUTDIR $REGION $JOBNAME
    gsutil -m rm -rf $OUTDIR
    gcloud ml-engine jobs submit training $JOBNAME \
       --region=$REGION \
       --module-name=flowersmodel.task \
       --package-path=${PWD}/flowersmodel \
       --job-dir=$OUTDIR \
       --staging-bucket=gs://$BUCKET \
       --scale-tier=BASIC_GPU \
       --runtime-version=$TFVERSION \
       -- \
       --output_dir=$OUTDIR \
       --train_steps=1000 \
       --learning_rate=0.01 \
       --batch_size=40 \
       --model=$MODEL_TYPE \
       --augment \
       --batch_norm \
       --train_data_path=gs://cloud-ml-data/img/flower_photos/train_set.csv \
       --eval_data_path=gs://cloud-ml-data/img/flower_photos/eval_set.csv
   

tensorboardで確認


    from google.datalab.ml import TensorBoard
    TensorBoard().start('gs://{}/mnist/trained_{}'.format(BUCKET, MODEL_TYPE))

ML engineでmodel/versionの作成


    %bash
    MODEL_NAME="flowers"
    MODEL_VERSION=${MODEL_TYPE}
    MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/flowers/trained_${MODEL_TYPE}/export/exporter | tail -1)
    echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION ... this will take a few minutes"
    #gcloud ml-engine versions delete --quiet ${MODEL_VERSION} --model ${MODEL_NAME}
    #gcloud ml-engine models delete ${MODEL_NAME}
    gcloud ml-engine models create ${MODEL_NAME} --regions $REGION
    gcloud ml-engine versions create ${MODEL_VERSION} --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --runtime-version=$TFVERSION

test用imageの作成(base64)


    %%bash
    IMAGE_URL=gs://cloud-ml-data/img/flower_photos/sunflowers/1022552002_2b93faf9e7_n.jpg

    # Copy the image to local disk.
    gsutil cp $IMAGE_URL flower.jpg

    # Base64 encode and create request message in json format.
    python -c 'import base64, sys, json; img = base64.b64encode(open("flower.jpg", "rb").read()); print json.dumps({"image_bytes":{"b64": img}})' &> request.json

predictの実行


    %%bash
    gcloud ml-engine predict \
      --model=flowers \
      --version=${MODEL_TYPE} \
      --json-instances=./request.json

Category:ML
Tag: tensorflow ML python
Oct. 30, 2018, 11:31 p.m.

Comments