public class AnnotatingMarkupParser extends Object implements info.bliki.wiki.filter.ITextConverter
#convert(String) and #getWikiLinks() methods.
Due to the constraints imposed by the ITextConverter /
WikiModel API, this class is not thread safe: only one instance
should be run by thread.| Modifier and Type | Class and Description |
|---|---|
class |
AnnotatingMarkupParser.CountingAppendable |
| Modifier and Type | Field and Description |
|---|---|
protected List<Annotation> |
headers |
static Set<String> |
HEADING_TAGS |
static String |
HREF_ATTR_KEY |
static Pattern |
INTERWIKI_PATTERN |
protected String |
languageCode |
protected info.bliki.wiki.model.WikiModel |
model |
static Set<String> |
PARAGRAPH_TAGS |
protected List<Annotation> |
paragraphs |
static int |
RECURSION_LIMIT |
protected String |
redirect |
protected static Map<String,Pattern> |
REDIRECT_PATTERNS |
protected Pattern |
redirectPattern |
protected String |
text |
static String |
WIKILINK_TARGET_ATTR_KEY |
static String |
WIKILINK_TITLE_ATTR_KEY |
protected List<Annotation> |
wikilinks |
static String |
WIKIOBJECT_ATTR_KEY |
| Constructor and Description |
|---|
AnnotatingMarkupParser() |
AnnotatingMarkupParser(String languageCode) |
| Modifier and Type | Method and Description |
|---|---|
List<Annotation> |
getHeaderAnnotations() |
List<String> |
getHeaders() |
List<Annotation> |
getParagraphAnnotations() |
List<String> |
getParagraphs() |
String |
getRedirect() |
List<Annotation> |
getWikiLinkAnnotations() |
void |
imageNodeToText(info.bliki.htmlcleaner.TagNode tagNode,
info.bliki.wiki.model.ImageFormat imageFormat,
Appendable buffer,
info.bliki.wiki.model.IWikiModel model) |
info.bliki.wiki.model.WikiModel |
makeWikiModel(String languageCode) |
void |
nodesToText(List<? extends Object> nodes,
Appendable buffer,
info.bliki.wiki.model.IWikiModel model) |
boolean |
noLinks() |
String |
parse(String rawWikiMarkup)
Convert WikiMarkup to a simple text representation suitable for NLP
analysis.
|
static String |
titleToUri(String title,
String languageCode) |
public static final String HREF_ATTR_KEY
public static final String WIKILINK_TITLE_ATTR_KEY
public static final String WIKILINK_TARGET_ATTR_KEY
public static final String WIKIOBJECT_ATTR_KEY
public static final int RECURSION_LIMIT
public static final Pattern INTERWIKI_PATTERN
protected final List<Annotation> wikilinks
protected final List<Annotation> headers
protected final List<Annotation> paragraphs
protected String languageCode
protected final info.bliki.wiki.model.WikiModel model
protected String redirect
protected String text
protected Pattern redirectPattern
public AnnotatingMarkupParser()
public AnnotatingMarkupParser(String languageCode)
public info.bliki.wiki.model.WikiModel makeWikiModel(String languageCode)
public String parse(String rawWikiMarkup)
#getWikiLinks().rawWikiMarkup - public void nodesToText(List<? extends Object> nodes, Appendable buffer, info.bliki.wiki.model.IWikiModel model) throws IOException
nodesToText in interface info.bliki.wiki.filter.ITextConverterIOExceptionpublic void imageNodeToText(info.bliki.htmlcleaner.TagNode tagNode,
info.bliki.wiki.model.ImageFormat imageFormat,
Appendable buffer,
info.bliki.wiki.model.IWikiModel model)
throws IOException
imageNodeToText in interface info.bliki.wiki.filter.ITextConverterIOExceptionpublic boolean noLinks()
noLinks in interface info.bliki.wiki.filter.ITextConverterpublic List<Annotation> getWikiLinkAnnotations()
public List<Annotation> getHeaderAnnotations()
public List<Annotation> getParagraphAnnotations()
public String getRedirect()
Copyright © 2014. All rights reserved.