public class FetcherPolicy
extends java.lang.Object
implements java.io.Serializable
Modifier and Type | Class and Description |
---|---|
static class |
FetcherPolicy.FetcherMode |
static class |
FetcherPolicy.RedirectMode |
Modifier and Type | Field and Description |
---|---|
protected long |
_crawlDelay |
static java.lang.String |
DEFAULT_ACCEPT_LANGUAGE |
protected static long |
DEFAULT_CRAWL_DELAY
Deprecated.
|
static long |
DEFAULT_CRAWL_END_TIME |
static int |
DEFAULT_MAX_CONNECTIONS_PER_HOST |
static int |
DEFAULT_MAX_CONTENT_SIZE |
static int |
DEFAULT_MAX_REDIRECTS |
protected static int |
DEFAULT_MAX_REQUESTS_PER_CONNECTION |
static long |
DEFAULT_MIN_PAGE_FETCH_INTERVAL |
static int |
DEFAULT_MIN_RESPONSE_RATE |
static long |
NO_CRAWL_END_TIME |
static int |
NO_MIN_RESPONSE_RATE |
static int |
NO_REDIRECTS |
Constructor and Description |
---|
FetcherPolicy() |
FetcherPolicy(int minResponseRate,
int maxContentSize,
long crawlEndTime,
long crawlDelay,
int maxRedirects) |
Modifier and Type | Method and Description |
---|---|
void |
addValidMimeType(java.lang.String validMimeType) |
void |
addValidMimeTypes(java.util.Set<java.lang.String> validMimeTypes) |
protected int |
calcMaxUrls() |
boolean |
equals(java.lang.Object obj) |
java.lang.String |
getAcceptLanguage() |
long |
getCrawlDelay()
Deprecated.
|
long |
getCrawlEndTime() |
long |
getDefaultCrawlDelay()
Deprecated.
|
FetcherPolicy.FetcherMode |
getFetcherMode() |
int |
getMaxConnectionsPerHost() |
int |
getMaxContentSize()
Deprecated.
|
int |
getMaxRedirects() |
int |
getMaxRequestsPerConnection() |
int |
getMaxUrls()
Calculate the maximum number of URLs that could be fetched in the remaining time.
|
long |
getMinPageFetchInterval() |
int |
getMinResponseRate()
Return the minimum response rate.
|
FetcherPolicy.RedirectMode |
getRedirectMode() |
long |
getRequestTimeout() |
java.util.Set<java.lang.String> |
getValidMimeTypes() |
int |
hashCode() |
boolean |
isTerminateFetch() |
void |
seMinPageFetchInterval(long minPageFetchInterval)
Set the minimum time (in milliseconds) between each page fetch request, when
fetching a FetchSet worth of URLs using a single connection.
|
void |
setAcceptLanguage(java.lang.String acceptLanguage) |
void |
setCrawlDelay(long crawlDelay)
Deprecated.
|
void |
setCrawlEndTime(long crawlEndTime) |
void |
setFetcherMode(FetcherPolicy.FetcherMode mode) |
void |
setMaxConnectionsPerHost(int maxConnectionsPerHost) |
void |
setMaxContentSize(int maxContentSize)
Deprecated.
|
void |
setMaxRedirects(int maxRedirects) |
void |
setMaxRequestsPerConnection(int maxRequestsPerConnection) |
void |
setMinResponseRate(int minResponseRate) |
void |
setRedirectMode(FetcherPolicy.RedirectMode mode) |
void |
setRequestTimeout(long requestTimeout) |
void |
setValidMimeTypes(java.util.Set<java.lang.String> validMimeTypes) |
java.lang.String |
toString() |
public static final int NO_MIN_RESPONSE_RATE
public static final int NO_REDIRECTS
public static final int DEFAULT_MIN_RESPONSE_RATE
public static final int DEFAULT_MAX_CONTENT_SIZE
public static final int DEFAULT_MAX_CONNECTIONS_PER_HOST
public static final int DEFAULT_MAX_REDIRECTS
public static final java.lang.String DEFAULT_ACCEPT_LANGUAGE
public static final long DEFAULT_MIN_PAGE_FETCH_INTERVAL
protected static final int DEFAULT_MAX_REQUESTS_PER_CONNECTION
@Deprecated protected static final long DEFAULT_CRAWL_DELAY
public static final long NO_CRAWL_END_TIME
public static final long DEFAULT_CRAWL_END_TIME
protected long _crawlDelay
public FetcherPolicy()
public FetcherPolicy(int minResponseRate, int maxContentSize, long crawlEndTime, long crawlDelay, int maxRedirects)
@Deprecated public long getDefaultCrawlDelay()
public long getCrawlEndTime()
public void setCrawlEndTime(long crawlEndTime)
public int getMaxConnectionsPerHost()
public void setMaxConnectionsPerHost(int maxConnectionsPerHost)
public int getMaxRequestsPerConnection()
public void setMaxRequestsPerConnection(int maxRequestsPerConnection)
public int getMinResponseRate()
public void setMinResponseRate(int minResponseRate)
@Deprecated public int getMaxContentSize()
@Deprecated public void setMaxContentSize(int maxContentSize)
@Deprecated public long getCrawlDelay()
@Deprecated public void setCrawlDelay(long crawlDelay)
public int getMaxRedirects()
public void setMaxRedirects(int maxRedirects)
public java.lang.String getAcceptLanguage()
public void setAcceptLanguage(java.lang.String acceptLanguage)
public java.util.Set<java.lang.String> getValidMimeTypes()
public void setValidMimeTypes(java.util.Set<java.lang.String> validMimeTypes)
public void addValidMimeTypes(java.util.Set<java.lang.String> validMimeTypes)
public void addValidMimeType(java.lang.String validMimeType)
public FetcherPolicy.RedirectMode getRedirectMode()
public void setRedirectMode(FetcherPolicy.RedirectMode mode)
public long getRequestTimeout()
public void setRequestTimeout(long requestTimeout)
public FetcherPolicy.FetcherMode getFetcherMode()
public void setFetcherMode(FetcherPolicy.FetcherMode mode)
public void seMinPageFetchInterval(long minPageFetchInterval)
minPageFetchInterval
- Minimum interval in milliseconds between requests.public long getMinPageFetchInterval()
public int getMaxUrls()
protected int calcMaxUrls()
public boolean isTerminateFetch()
public int hashCode()
hashCode
in class java.lang.Object
public boolean equals(java.lang.Object obj)
equals
in class java.lang.Object
public java.lang.String toString()
toString
in class java.lang.Object
Copyright © 2012 Bixo Labs