public class BoilerpipeContentExtractor extends BaseContentExtractor
_inBody, _inHead, _inTitle
Constructor and Description |
---|
BoilerpipeContentExtractor()
Defaults to using
DefaultExtractor when setting up
the BoilerpipeContentHandler |
BoilerpipeContentExtractor(java.lang.Class<? extends de.l3s.boilerpipe.extractors.ExtractorBase> extractorClass)
BoilerpipeExtractor doesn't implement Serializable, but a caller can work around
this limitation by specifying the BoilerpipeExtractor class to use with
the BoilerpipeContentHandler (this would work for most extractors;
it won't work for KeepEverythingWithMinKWordsExtractor which takes a parameter). |
Modifier and Type | Method and Description |
---|---|
void |
characters(char[] ch,
int start,
int length) |
void |
endDocument() |
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName) |
void |
endPrefixMapping(java.lang.String prefix) |
java.lang.String |
getContent()
getContent returns the boilerpipe extracted text.
|
void |
ignorableWhitespace(char[] ch,
int start,
int length) |
protected void |
init() |
void |
processingInstruction(java.lang.String target,
java.lang.String data) |
void |
reset() |
void |
setDocumentLocator(org.xml.sax.Locator locator) |
void |
skippedEntity(java.lang.String name) |
void |
startDocument() |
void |
startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts) |
void |
startPrefixMapping(java.lang.String prefix,
java.lang.String uri) |
addContent, addContent
public BoilerpipeContentExtractor()
DefaultExtractor
when setting up
the BoilerpipeContentHandler
public BoilerpipeContentExtractor(java.lang.Class<? extends de.l3s.boilerpipe.extractors.ExtractorBase> extractorClass)
BoilerpipeExtractor
doesn't implement Serializable, but a caller can work around
this limitation by specifying the BoilerpipeExtractor class to use with
the BoilerpipeContentHandler
(this would work for most extractors;
it won't work for KeepEverythingWithMinKWordsExtractor
which takes a parameter).public void startPrefixMapping(java.lang.String prefix, java.lang.String uri) throws org.xml.sax.SAXException
startPrefixMapping
in interface org.xml.sax.ContentHandler
startPrefixMapping
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void endPrefixMapping(java.lang.String prefix) throws org.xml.sax.SAXException
endPrefixMapping
in interface org.xml.sax.ContentHandler
endPrefixMapping
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void processingInstruction(java.lang.String target, java.lang.String data) throws org.xml.sax.SAXException
processingInstruction
in interface org.xml.sax.ContentHandler
processingInstruction
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void setDocumentLocator(org.xml.sax.Locator locator)
setDocumentLocator
in interface org.xml.sax.ContentHandler
setDocumentLocator
in class org.xml.sax.helpers.DefaultHandler
public void startDocument() throws org.xml.sax.SAXException
startDocument
in interface org.xml.sax.ContentHandler
startDocument
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void endDocument() throws org.xml.sax.SAXException
endDocument
in interface org.xml.sax.ContentHandler
endDocument
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void startElement(java.lang.String uri, java.lang.String localName, java.lang.String qName, org.xml.sax.Attributes atts) throws org.xml.sax.SAXException
startElement
in interface org.xml.sax.ContentHandler
startElement
in class BaseContentExtractor
org.xml.sax.SAXException
public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws org.xml.sax.SAXException
endElement
in interface org.xml.sax.ContentHandler
endElement
in class BaseContentExtractor
org.xml.sax.SAXException
public void characters(char[] ch, int start, int length) throws org.xml.sax.SAXException
characters
in interface org.xml.sax.ContentHandler
characters
in class BaseContentExtractor
org.xml.sax.SAXException
public void ignorableWhitespace(char[] ch, int start, int length) throws org.xml.sax.SAXException
ignorableWhitespace
in interface org.xml.sax.ContentHandler
ignorableWhitespace
in class BaseContentExtractor
org.xml.sax.SAXException
public void skippedEntity(java.lang.String name) throws org.xml.sax.SAXException
skippedEntity
in interface org.xml.sax.ContentHandler
skippedEntity
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public java.lang.String getContent()
getContent
in class BaseContentExtractor
public void reset()
reset
in class BaseContentExtractor
protected void init()
Copyright © 2012 Bixo Labs