public class AnnotatingMarkupParser extends Object implements info.bliki.wiki.filter.ITextConverter
#convert(String)
and #getWikiLinks()
methods.
Due to the constraints imposed by the ITextConverter
/
WikiModel
API, this class is not thread safe: only one instance
should be run by thread.Modifier and Type | Class and Description |
---|---|
class |
AnnotatingMarkupParser.CountingAppendable |
Modifier and Type | Field and Description |
---|---|
protected List<Annotation> |
headers |
static Set<String> |
HEADING_TAGS |
static String |
HREF_ATTR_KEY |
static Pattern |
INTERWIKI_PATTERN |
protected String |
languageCode |
protected info.bliki.wiki.model.WikiModel |
model |
static Set<String> |
PARAGRAPH_TAGS |
protected List<Annotation> |
paragraphs |
static int |
RECURSION_LIMIT |
protected String |
redirect |
protected static Map<String,Pattern> |
REDIRECT_PATTERNS |
protected Pattern |
redirectPattern |
protected String |
text |
static String |
WIKILINK_TARGET_ATTR_KEY |
static String |
WIKILINK_TITLE_ATTR_KEY |
protected List<Annotation> |
wikilinks |
static String |
WIKIOBJECT_ATTR_KEY |
Constructor and Description |
---|
AnnotatingMarkupParser() |
AnnotatingMarkupParser(String languageCode) |
Modifier and Type | Method and Description |
---|---|
List<Annotation> |
getHeaderAnnotations() |
List<String> |
getHeaders() |
List<Annotation> |
getParagraphAnnotations() |
List<String> |
getParagraphs() |
String |
getRedirect() |
List<Annotation> |
getWikiLinkAnnotations() |
void |
imageNodeToText(info.bliki.htmlcleaner.TagNode tagNode,
info.bliki.wiki.model.ImageFormat imageFormat,
Appendable buffer,
info.bliki.wiki.model.IWikiModel model) |
info.bliki.wiki.model.WikiModel |
makeWikiModel(String languageCode) |
void |
nodesToText(List<? extends Object> nodes,
Appendable buffer,
info.bliki.wiki.model.IWikiModel model) |
boolean |
noLinks() |
String |
parse(String rawWikiMarkup)
Convert WikiMarkup to a simple text representation suitable for NLP
analysis.
|
static String |
titleToUri(String title,
String languageCode) |
public static final String HREF_ATTR_KEY
public static final String WIKILINK_TITLE_ATTR_KEY
public static final String WIKILINK_TARGET_ATTR_KEY
public static final String WIKIOBJECT_ATTR_KEY
public static final int RECURSION_LIMIT
public static final Pattern INTERWIKI_PATTERN
protected final List<Annotation> wikilinks
protected final List<Annotation> headers
protected final List<Annotation> paragraphs
protected String languageCode
protected final info.bliki.wiki.model.WikiModel model
protected String redirect
protected String text
protected Pattern redirectPattern
public AnnotatingMarkupParser()
public AnnotatingMarkupParser(String languageCode)
public info.bliki.wiki.model.WikiModel makeWikiModel(String languageCode)
public String parse(String rawWikiMarkup)
#getWikiLinks()
.rawWikiMarkup
- public void nodesToText(List<? extends Object> nodes, Appendable buffer, info.bliki.wiki.model.IWikiModel model) throws IOException
nodesToText
in interface info.bliki.wiki.filter.ITextConverter
IOException
public void imageNodeToText(info.bliki.htmlcleaner.TagNode tagNode, info.bliki.wiki.model.ImageFormat imageFormat, Appendable buffer, info.bliki.wiki.model.IWikiModel model) throws IOException
imageNodeToText
in interface info.bliki.wiki.filter.ITextConverter
IOException
public boolean noLinks()
noLinks
in interface info.bliki.wiki.filter.ITextConverter
public List<Annotation> getWikiLinkAnnotations()
public List<Annotation> getHeaderAnnotations()
public List<Annotation> getParagraphAnnotations()
public String getRedirect()
Copyright © 2014. All rights reserved.