/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nutch.parse;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.VersionMismatchException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.commons.cli.Options;
import org.apache.nutch.util.NutchConfiguration;

/* The text conversion of page's content, stored using gzip compression.
 * @see Parse#getText()
 */
public final class ParseText implements Writable {
  public static final String DIR_NAME = "parse_text";

  private static final byte VERSION = 2;

  public ParseText() {
    //default constructor
  }

  private String text;

  public ParseText(String text) {
    this.text = text;
  }

  public void readFields(DataInput in) throws IOException {
    byte version = in.readByte();
    switch (version) {
    case 1:
      text = WritableUtils.readCompressedString(in);
      break;
    case VERSION:
      text = Text.readString(in);
      break;
    default:
      throw new VersionMismatchException(VERSION, version);
    }
  }

  public final void write(DataOutput out) throws IOException {
    out.write(VERSION);
    Text.writeString(out, text);
  }

  public static final ParseText read(DataInput in) throws IOException {
    ParseText parseText = new ParseText();
    parseText.readFields(in);
    return parseText;
  }

  //
  // Accessor methods
  //
  public String getText() {
    return text;
  }

  @Override
  public boolean equals(Object o) {
    if (!(o instanceof ParseText))
      return false;
    ParseText other = (ParseText) o;
    return this.text.equals(other.text);
  }

  @Override
  public String toString() {
    return text;
  }

  public static void main(String argv[]) throws Exception {
    String usage = "ParseText (-local | -dfs <namenode:port>) recno segment";

    if (argv.length < 3) {
      System.out.println("usage:" + usage);
      return;
    }
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();

    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);

    String[] remainingArgs = parser.getRemainingArgs();

    try (FileSystem fs = FileSystem.get(conf)) {
      int recno = Integer.parseInt(remainingArgs[0]);
      String segment = remainingArgs[1];
      String filename = new Path(segment, ParseText.DIR_NAME).toString();

      ParseText parseText = new ParseText();
      ArrayFile.Reader parseTexts = new ArrayFile.Reader(fs, filename, conf);

      parseTexts.get(recno, parseText);
      System.out.println("Retrieved " + recno + " from file " + filename);
      System.out.println(parseText);
      parseTexts.close();
    }
  }
}
