public abstract class IntervalRevisionETLReader<KEYIN,VALUEIN> extends DefaultRevisionETLReader<KEYIN,VALUEIN>
RevisionETLReader.Ack
Modifier and Type | Field and Description |
---|---|
static String |
DAY_SCALE_OPT |
static String |
END_TIME_OPT |
static String |
HOUR_SCALE_OPT |
static String |
MONTH_SCALE_OPT |
static String |
SCALE_OPT |
static String |
START_TIME_OPT |
static String |
WEEK_SCALE_OPT |
skipNonArticles, skipRedirect
DEFAULT_MAX_BLOCK_SIZE, extractor
Constructor and Description |
---|
IntervalRevisionETLReader() |
Modifier and Type | Method and Description |
---|---|
void |
initialize(org.apache.hadoop.mapreduce.InputSplit input,
org.apache.hadoop.mapreduce.TaskAttemptContext tac) |
protected void |
processMetaData(org.apache.hadoop.io.DataOutputBuffer buffer,
RevisionHeader meta)
This method processes after caching the currently visited revision.
|
protected RevisionETLReader.Ack |
readToNextRevision(org.apache.hadoop.io.DataOutputBuffer buffer,
RevisionHeader meta)
This method reads bytes inside the input stream into the buffer
until reaching EOF or the revision close tag.
|
initializeMeta, readToPageHeader
clearRevisions, close, fetchMore, freeKey, freeValue, getCurrentKey, getCurrentValue, getProgress, getTaskAttemptContext, hasData, initializeExtractor, initializeKey, initializeValue, nextByte, nextKeyValue, setBlockSize, updateRevision
public static final String START_TIME_OPT
public static final String END_TIME_OPT
public static final String SCALE_OPT
public static final String HOUR_SCALE_OPT
public static final String DAY_SCALE_OPT
public static final String WEEK_SCALE_OPT
public static final String MONTH_SCALE_OPT
public void initialize(org.apache.hadoop.mapreduce.InputSplit input, org.apache.hadoop.mapreduce.TaskAttemptContext tac) throws IOException, InterruptedException
initialize
in class DefaultRevisionETLReader<KEYIN,VALUEIN>
IOException
InterruptedException
protected RevisionETLReader.Ack readToNextRevision(org.apache.hadoop.io.DataOutputBuffer buffer, RevisionHeader meta) throws IOException
RevisionETLReader
readToNextRevision
in class RevisionETLReader<KEYIN,VALUEIN,RevisionHeader>
IOException
protected void processMetaData(org.apache.hadoop.io.DataOutputBuffer buffer, RevisionHeader meta)
buffer
- meta
- Copyright © 2014. All rights reserved.