Class ScraperRange
Helper class for implementing screen scrapers.
public class ScraperRange
- Inheritance
-
ScraperRange
- Derived
- Inherited Members
Constructors
ScraperRange(string)
Creates a new ScraperRange instance that wraps the entire HTML string.
public ScraperRange(string sHtml)
Parameters
sHtmlstringUsually the text of a complete HTML page. If this is
null, then an empty ScraperRange will be created.
ScraperRange(string, int)
Creates a new ScraperRange instance that wraps the given HTML string
starting at nOffset up to the end of the string.
public ScraperRange(string sHtml, int nOffset)
Parameters
sHtmlstringThe HTML string to wrap. This must not be
null.nOffsetintThe starting offset. This must not be negative or larger than the length of the
sHtmlstring.
ScraperRange(string, int, int)
public ScraperRange(string sHtml, int nOffset, int nLength)
Parameters
Fields
Null
public static readonly ScraperRange Null
Field Value
Properties
EndOffset
The offset into HtmlPage after this range ends.
public int EndOffset { get; }
Property Value
HtmlPage
Returns the complete HTML page that contains this range.
public string HtmlPage { get; }
Property Value
HtmlRange
Returns the raw HTML string of this range. This should rarely be used. To get the text of this range, the property HtmlText shall be used instead.
public string HtmlRange { get; }
Property Value
HtmlText
The plain text, excluding all HTML tags, contained in this range.
public string HtmlText { get; }
Property Value
IsNull
public bool IsNull { get; }
Property Value
Offset
The offset into HtmlPage where this range begins.
public int Offset { get; }
Property Value
Methods
AddAllInputs(UrlQueryParams, bool)
public void AddAllInputs(UrlQueryParams aQuery, bool fExceptSubmit = false)
Parameters
aQueryUrlQueryParamsfExceptSubmitbool
ContainsString(string, StringComparison)
public bool ContainsString(string s, StringComparison nComparisonType = StringComparison.OrdinalIgnoreCase)
Parameters
sstringnComparisonTypeStringComparison
Returns
ScrapeAllElements(string, string, string, int, ScraperElementPart)
Returns all child elements that match the given tag and attribute value.
public ScraperRange[] ScrapeAllElements(string sTag, string sAttrName = null, string sWithAttrValue = null, int nMaxCount = 2147483647, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTagstringsAttrNamestringOptional name of an attribute that must be contained in the tag. If this attribute name is "class", then special CSS class comparison semantics for
sWithAttrValueare used.sWithAttrValuestringOptional value of the required attribute. Only relevant if a nonempty
sAttrNamewas given. If this isnulland an attribute name was given, then the value of the attribute is ignored. The presence of the attribute is enough for a match.Special case: When
sAttrNameis "class", then this parameter is treated as a blank separated list of CSS class names. If any of the given CSS class names matches any of the class names in the elements "class" attribute, then it is considered a match.nMaxCountintThe max number of matches that shall be returned. This defaults to Int32.MaxValue. If this is 1, then only the first match is returned. Usually it is not required to place a limit on the number of matches, as the range has an implicit limit by its length.
nPartScraperElementPartThe part of the matching element to be returned in the ranges.
Returns
- ScraperRange[]
Array of all matching element ranges. If there is no match, then an empty array will be returned. This method never returns
null.
ScrapeAttribute(string, string)
public string ScrapeAttribute(string sTag, string sValueAttribute)
Parameters
sTagstringThe XML tag of the element that shall be scraped.
sValueAttributestringThe name of the attribute which's value shall be returned.
Returns
ScrapeAttribute(string, string, string, string)
public string ScrapeAttribute(string sTag, string sName, string sNameAttribute, string sValueAttribute)
Parameters
sTagstringThe XML tag of the element that shall be scraped.
sNamestringThe content of an attribute of that element that identifies it.
sNameAttributestringThe name of the attribute that shall contain sName.
sValueAttributestringThe name of the attribute which's value shall be returned.
Returns
ScrapeAttributes(string, string, string, string)
public string[] ScrapeAttributes(string sTag, string sName, string sNameAttribute, string sValueAttribute)
Parameters
sTagstringThe name of the HTML tag from which the attribute values shall be scraped.
sNamestringOptional name or ID of the HTML element that is sought. If given, then this name must appear in the attribute named
sNameAttributeof the element.sNameAttributestringOptional name of the tag attribute that uniquely identifies the correct HTML element. Usually this is either "name" or "id".
sValueAttributestringThe name of the value attribute that shall be returned.
Returns
- string[]
Returns an array with the values of all matching attributes in the given HTML page. If there was not any match, then
nullis returned.
ScrapeElement(string, ScraperElementPart)
public ScraperRange ScrapeElement(string sTag, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTagstringnPartScraperElementPart
Returns
ScrapeElement(string, string, string, ScraperElementPart)
public ScraperRange ScrapeElement(string sTag, string sAttrName, string sWithAttrValue, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTagstringsAttrNamestringsWithAttrValuestringnPartScraperElementPart
Returns
ScrapeElementByClassname(string, string, ScraperElementPart)
public ScraperRange ScrapeElementByClassname(string sTag, string sClassnames, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTagstringsClassnamesstringOne or more CSS class names, separated by a single blank. All elements that specify at least one of the given class names are matched and returned.
nPartScraperElementPart
Returns
ScrapeElementByPosition(string, int, ScraperElementPart)
DEPRECATED: Please use ScrapeAllElements(string, string, string, int, ScraperElementPart) instead.
public ScraperRange ScrapeElementByPosition(string sTag, int nPos, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTagstringnPosintnPartScraperElementPart
Returns
ScrapeElementByPosition(string, string, string, int, ScraperElementPart)
DEPRECATED: Please use ScrapeAllElements(string, string, string, int, ScraperElementPart) instead.
public ScraperRange ScrapeElementByPosition(string sTag, string sAttribute, string sValue, int nPos, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
sTagstringsAttributestringsValuestringnPosintnPartScraperElementPart
Returns
ScrapeForm(out string, string, string)
Extracts a from and its action attribute value.
public ScraperRange ScrapeForm(out string sFormAction, string sFormName, string sNameAttribute = "name")
Parameters
sFormActionstringReturns the value of the "action" attribute of the form.
sFormNamestringThe name of the form that is matched against the
sNameAttribute. If this isnull, then the first form is scraped, regardless of its name.sNameAttributestringThe attribute that contains the name of the form. By default this is "name", but another attribute, e.g. "id", may be given here. If
sFormNameisnull, then this parameter is ignored.
Returns
ScrapeFormAction(string, string)
DEPRECATED: Use ScrapeForm(out string, string, string) instead.
public string ScrapeFormAction(string sFormName, string sNameAttribute = "name")
Parameters
Returns
ScrapeInputValue(string)
Extracts the value of the first occurence of a named input field.
public string ScrapeInputValue(string sInputName)
Parameters
sInputNamestring
Returns
ScrapeInputValues(string)
Extracts all values of all occurences of a named input field.
public string[] ScrapeInputValues(string sInputName)
Parameters
sInputNamestring
Returns
- string[]
ScrapeLink(string, string)
public string ScrapeLink(string sLinkName, string sNameAttribute = "id")
Parameters
sLinkNamestringsNameAttributestringThe attribute that contains the name of the link. By default this is the "id" attribute, but another attribute, e.g. "class" may be given here.
Returns
ScrapeLinkContaining(string)
Scrape the href of an anchor that contains the given string in its element content.
public string ScrapeLinkContaining(string sContains)
Parameters
sContainsstring
Returns
ScrapeValue(string)
public string ScrapeValue(string sRegex)
Parameters
sRegexstring
Returns
- string
If the regex contains at least one group, then the match of the first regex group is returned. If there is no group, then the complete match is returned.
SubRange(int)
public ScraperRange SubRange(int nOffset)
Parameters
Returns
SubRange(int, int)
public ScraperRange SubRange(int nOffset, int nLength)
Parameters
nOffsetintThe offset into the entire HtmlPage where the new range shall begin.
nLengthintLength of the range.
Returns
ToString()
public override string ToString()