Class ScraperRange
Helper class for implementing screen scrapers.
public class ScraperRange
- Inheritance
-
ScraperRange
- Derived
- Inherited Members
Constructors
ScraperRange(string)
Creates a new ScraperRange instance that wraps the entire HTML string.
public ScraperRange(string sHtml)
Parameters
sHtml
stringUsually the text of a complete HTML page. If this is
null
, then an empty ScraperRange will be created.
ScraperRange(string, int)
Creates a new ScraperRange instance that wraps the given HTML string
starting at nOffset
up to the end of the string.
public ScraperRange(string sHtml, int nOffset)
Parameters
sHtml
stringThe HTML string to wrap. This must not be
null
.nOffset
intThe starting offset. This must not be negative or larger than the length of the
sHtml
string.
ScraperRange(string, int, int)
public ScraperRange(string sHtml, int nOffset, int nLength)
Parameters
Fields
Null
public static readonly ScraperRange Null
Field Value
Properties
EndOffset
The offset into HtmlPage after this range ends.
public int EndOffset { get; }
Property Value
HtmlPage
Returns the complete HTML page that contains this range.
public string HtmlPage { get; }
Property Value
HtmlRange
Returns the raw HTML string of this range. This should rarely be used. To get the text of this range, the property HtmlText shall be used instead.
public string HtmlRange { get; }
Property Value
HtmlText
The plain text, excluding all HTML tags, contained in this range.
public string HtmlText { get; }
Property Value
IsNull
public bool IsNull { get; }
Property Value
Offset
The offset into HtmlPage where this range begins.
public int Offset { get; }
Property Value
Methods
AddAllInputs(UrlQueryParams, bool)
public void AddAllInputs(UrlQueryParams aQuery, bool fExceptSubmit = false)
Parameters
aQuery
UrlQueryParamsfExceptSubmit
bool
ContainsString(string, StringComparison)
public bool ContainsString(string s, StringComparison nComparisonType = StringComparison.OrdinalIgnoreCase)
Parameters
s
stringnComparisonType
StringComparison
Returns
ScrapeAllElements(string, string, string, int, ScraperElementPart)
Returns all child elements that match the given tag and attribute value.
public ScraperRange[] ScrapeAllElements(string sTag, string sAttrName = null, string sWithAttrValue = null, int nMaxCount = 2147483647, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTag
stringsAttrName
stringOptional name of an attribute that must be contained in the tag. If this attribute name is "class", then special CSS class comparison semantics for
sWithAttrValue
are used.sWithAttrValue
stringOptional value of the required attribute. Only relevant if a nonempty
sAttrName
was given. If this isnull
and an attribute name was given, then the value of the attribute is ignored. The presence of the attribute is enough for a match.Special case: When
sAttrName
is "class", then this parameter is treated as a blank separated list of CSS class names. If any of the given CSS class names matches any of the class names in the elements "class" attribute, then it is considered a match.nMaxCount
intThe max number of matches that shall be returned. This defaults to Int32.MaxValue. If this is 1, then only the first match is returned. Usually it is not required to place a limit on the number of matches, as the range has an implicit limit by its length.
nPart
ScraperElementPartThe part of the matching element to be returned in the ranges.
Returns
- ScraperRange[]
Array of all matching element ranges. If there is no match, then an empty array will be returned. This method never returns
null
.
ScrapeAttribute(string, string)
public string ScrapeAttribute(string sTag, string sValueAttribute)
Parameters
sTag
stringThe XML tag of the element that shall be scraped.
sValueAttribute
stringThe name of the attribute which's value shall be returned.
Returns
ScrapeAttribute(string, string, string, string)
public string ScrapeAttribute(string sTag, string sName, string sNameAttribute, string sValueAttribute)
Parameters
sTag
stringThe XML tag of the element that shall be scraped.
sName
stringThe content of an attribute of that element that identifies it.
sNameAttribute
stringThe name of the attribute that shall contain sName.
sValueAttribute
stringThe name of the attribute which's value shall be returned.
Returns
ScrapeAttributes(string, string, string, string)
public string[] ScrapeAttributes(string sTag, string sName, string sNameAttribute, string sValueAttribute)
Parameters
sTag
stringThe name of the HTML tag from which the attribute values shall be scraped.
sName
stringOptional name or ID of the HTML element that is sought. If given, then this name must appear in the attribute named
sNameAttribute
of the element.sNameAttribute
stringOptional name of the tag attribute that uniquely identifies the correct HTML element. Usually this is either "name" or "id".
sValueAttribute
stringThe name of the value attribute that shall be returned.
Returns
- string[]
Returns an array with the values of all matching attributes in the given HTML page. If there was not any match, then
null
is returned.
ScrapeElement(string, ScraperElementPart)
public ScraperRange ScrapeElement(string sTag, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTag
stringnPart
ScraperElementPart
Returns
ScrapeElement(string, string, string, ScraperElementPart)
public ScraperRange ScrapeElement(string sTag, string sAttrName, string sWithAttrValue, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTag
stringsAttrName
stringsWithAttrValue
stringnPart
ScraperElementPart
Returns
ScrapeElementByClassname(string, string, ScraperElementPart)
public ScraperRange ScrapeElementByClassname(string sTag, string sClassnames, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTag
stringsClassnames
stringOne or more CSS class names, separated by a single blank. All elements that specify at least one of the given class names are matched and returned.
nPart
ScraperElementPart
Returns
ScrapeElementByPosition(string, int, ScraperElementPart)
DEPRECATED: Please use ScrapeAllElements(string, string, string, int, ScraperElementPart) instead.
public ScraperRange ScrapeElementByPosition(string sTag, int nPos, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTag
stringnPos
intnPart
ScraperElementPart
Returns
ScrapeElementByPosition(string, string, string, int, ScraperElementPart)
DEPRECATED: Please use ScrapeAllElements(string, string, string, int, ScraperElementPart) instead.
public ScraperRange ScrapeElementByPosition(string sTag, string sAttribute, string sValue, int nPos, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTag
stringsAttribute
stringsValue
stringnPos
intnPart
ScraperElementPart
Returns
ScrapeForm(out string, string, string)
Extracts a from and its action attribute value.
public ScraperRange ScrapeForm(out string sFormAction, string sFormName, string sNameAttribute = "name")
Parameters
sFormAction
stringReturns the value of the "action" attribute of the form.
sFormName
stringThe name of the form that is matched against the
sNameAttribute
. If this isnull
, then the first form is scraped, regardless of its name.sNameAttribute
stringThe attribute that contains the name of the form. By default this is "name", but another attribute, e.g. "id", may be given here. If
sFormName
isnull
, then this parameter is ignored.
Returns
ScrapeFormAction(string, string)
DEPRECATED: Use ScrapeForm(out string, string, string) instead.
public string ScrapeFormAction(string sFormName, string sNameAttribute = "name")
Parameters
Returns
ScrapeInputValue(string)
Extracts the value of the first occurence of a named input field.
public string ScrapeInputValue(string sInputName)
Parameters
sInputName
string
Returns
ScrapeInputValues(string)
Extracts all values of all occurences of a named input field.
public string[] ScrapeInputValues(string sInputName)
Parameters
sInputName
string
Returns
- string[]
ScrapeLink(string, string)
public string ScrapeLink(string sLinkName, string sNameAttribute = "id")
Parameters
sLinkName
stringsNameAttribute
stringThe attribute that contains the name of the link. By default this is the "id" attribute, but another attribute, e.g. "class" may be given here.
Returns
ScrapeLinkContaining(string)
Scrape the href of an anchor that contains the given string in its element content.
public string ScrapeLinkContaining(string sContains)
Parameters
sContains
string
Returns
ScrapeValue(string)
public string ScrapeValue(string sRegex)
Parameters
sRegex
string
Returns
- string
If the regex contains at least one group, then the match of the first regex group is returned. If there is no group, then the complete match is returned.
SubRange(int)
public ScraperRange SubRange(int nOffset)
Parameters
Returns
SubRange(int, int)
public ScraperRange SubRange(int nOffset, int nLength)
Parameters
nOffset
intThe offset into the entire HtmlPage where the new range shall begin.
nLength
intLength of the range.
Returns
ToString()
public override string ToString()