Class ScraperRange
Helper class for implementing screen scrapers.
Inheritance
System.Object
ScraperRange
Inherited Members
System.Object.Equals(System.Object)
System.Object.Equals(System.Object, System.Object)
System.Object.GetHashCode()
System.Object.GetType()
System.Object.MemberwiseClone()
System.Object.ReferenceEquals(System.Object, System.Object)
Assembly: Subsembly.Scraper.dll
Syntax
public class ScraperRange
Constructors
ScraperRange(String)
Creates a new ScraperRange instance that wraps the entire HTML string.
Declaration
public ScraperRange(string sHtml)
Parameters
Type |
Name |
Description |
System.String |
sHtml |
Usually the text of a complete HTML page. If this is null , then an empty
ScraperRange will be created.
|
ScraperRange(String, Int32)
Creates a new ScraperRange instance that wraps the given HTML string
starting at nOffset
up to the end of the string.
Declaration
public ScraperRange(string sHtml, int nOffset)
Parameters
Type |
Name |
Description |
System.String |
sHtml |
The HTML string to wrap. This must not be null .
|
System.Int32 |
nOffset |
The starting offset. This must not be negative or larger than the length of the
sHtml string.
|
ScraperRange(String, Int32, Int32)
Declaration
public ScraperRange(string sHtml, int nOffset, int nLength)
Parameters
Type |
Name |
Description |
System.String |
sHtml |
|
System.Int32 |
nOffset |
|
System.Int32 |
nLength |
|
Fields
Null
Declaration
public static readonly ScraperRange Null
Field Value
Properties
EndOffset
The offset into HtmlPage after this range ends.
Declaration
public int EndOffset { get; }
Property Value
Type |
Description |
System.Int32 |
|
HtmlPage
Returns the complete HTML page that contains this range.
Declaration
public string HtmlPage { get; }
Property Value
Type |
Description |
System.String |
|
HtmlRange
Returns the raw HTML string of this range. This should rarely be used.
To get the text of this range, the property HtmlText shall
be used instead.
Declaration
public string HtmlRange { get; }
Property Value
Type |
Description |
System.String |
If this range IsNull, then this property returns null .
|
HtmlText
The plain text, excluding all HTML tags, contained in this range.
Declaration
public string HtmlText { get; }
Property Value
Type |
Description |
System.String |
|
IsNull
Declaration
public bool IsNull { get; }
Property Value
Type |
Description |
System.Boolean |
|
Offset
The offset into HtmlPage where this range begins.
Declaration
public int Offset { get; }
Property Value
Type |
Description |
System.Int32 |
|
Methods
Declaration
public void AddAllInputs(UrlQueryParams aQuery)
Parameters
Type |
Name |
Description |
Subsembly.Interweb.UrlQueryParams |
aQuery |
|
ContainsString(String, StringComparison)
Declaration
public bool ContainsString(string s, StringComparison nComparisonType = StringComparison.OrdinalIgnoreCase)
Parameters
Type |
Name |
Description |
System.String |
s |
|
System.StringComparison |
nComparisonType |
|
Returns
Type |
Description |
System.Boolean |
|
ScrapeAllElements(String, String, String, Int32, ScraperElementPart)
Returns all child elements that match the given tag and attribute value.
Declaration
public ScraperRange[] ScrapeAllElements(string sTag, string sAttrName = null, string sWithAttrValue = null, int nMaxCount = 2147483647, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
Type |
Name |
Description |
System.String |
sTag |
|
System.String |
sAttrName |
Optional name of an attribute that must be contained in the tag. If this attribute
name is "class", then special CSS class comparison semantics for
sWithAttrValue are used.
|
System.String |
sWithAttrValue |
Optional value of the required attribute. Only relevant if a nonempty
sAttrName was given. If this is null and an
attribute name was given, then the value of the attribute is ignored. The
presence of the attribute is enough for a match.
Special case: When sAttrName is "class", then this parameter
is treated as a blank separated list of CSS class names. If any of the given CSS
class names matches any of the class names in the elements "class" attribute,
then it is considered a match.
|
System.Int32 |
nMaxCount |
The max number of matches that shall be returned. This defaults to Int32.MaxValue.
If this is 1, then only the first match is returned. Usually it is not required to
place a limit on the number of matches, as the range has an implicit limit by its
length.
|
ScraperElementPart |
nPart |
The part of the matching element to be returned in the ranges.
|
Returns
Type |
Description |
ScraperRange[] |
Array of all matching element ranges. If there is no match, then an empty array
will be returned. This method never returns null .
|
ScrapeAttribute(String, String)
Declaration
public string ScrapeAttribute(string sTag, string sValueAttribute)
Parameters
Type |
Name |
Description |
System.String |
sTag |
The XML tag of the element that shall be scraped.
|
System.String |
sValueAttribute |
The name of the attribute which's value shall be returned.
|
Returns
Type |
Description |
System.String |
|
ScrapeAttribute(String, String, String, String)
Declaration
public string ScrapeAttribute(string sTag, string sName, string sNameAttribute, string sValueAttribute)
Parameters
Type |
Name |
Description |
System.String |
sTag |
The XML tag of the element that shall be scraped.
|
System.String |
sName |
The content of an attribute of that element that identifies it.
|
System.String |
sNameAttribute |
The name of the attribute that shall contain sName.
|
System.String |
sValueAttribute |
The name of the attribute which's value shall be returned.
|
Returns
Type |
Description |
System.String |
|
ScrapeAttributes(String, String, String, String)
Declaration
public string[] ScrapeAttributes(string sTag, string sName, string sNameAttribute, string sValueAttribute)
Parameters
Type |
Name |
Description |
System.String |
sTag |
The name of the HTML tag from which the attribute values shall be scraped.
|
System.String |
sName |
Optional name or ID of the HTML element that is sought. If given, then this name
must appear in the attribute named sNameAttribute of the element.
|
System.String |
sNameAttribute |
Optional name of the tag attribute that uniquely identifies the correct HTML element.
Usually this is either "name" or "id".
|
System.String |
sValueAttribute |
The name of the value attribute that shall be returned.
|
Returns
Type |
Description |
System.String[] |
Returns an array with the values of all matching attributes in the given HTML page.
If there was not any match, then null is returned.
|
ScrapeElement(String, ScraperElementPart)
Declaration
public ScraperRange ScrapeElement(string sTag, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
Returns
ScrapeElement(String, String, String, ScraperElementPart)
Declaration
public ScraperRange ScrapeElement(string sTag, string sAttrName, string sWithAttrValue, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
Type |
Name |
Description |
System.String |
sTag |
|
System.String |
sAttrName |
|
System.String |
sWithAttrValue |
|
ScraperElementPart |
nPart |
|
Returns
ScrapeElementByClassname(String, String, ScraperElementPart)
Declaration
public ScraperRange ScrapeElementByClassname(string sTag, string sClassnames, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
Type |
Name |
Description |
System.String |
sTag |
|
System.String |
sClassnames |
One or more CSS class names, separated by a single blank. All elements that
specify at least one of the given class names are matched and returned.
|
ScraperElementPart |
nPart |
|
Returns
ScrapeElementByPosition(String, Int32, ScraperElementPart)
Declaration
public ScraperRange ScrapeElementByPosition(string sTag, int nPos, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
Returns
ScrapeElementByPosition(String, String, String, Int32, ScraperElementPart)
Declaration
public ScraperRange ScrapeElementByPosition(string sTag, string sAttribute, string sValue, int nPos, ScraperElementPart nPart = ScraperElementPart.InnerHtml)
Parameters
Type |
Name |
Description |
System.String |
sTag |
|
System.String |
sAttribute |
|
System.String |
sValue |
|
System.Int32 |
nPos |
|
ScraperElementPart |
nPart |
|
Returns
Extracts a from and its action attribute value.
Declaration
public ScraperRange ScrapeForm(out string sFormAction, string sFormName, string sNameAttribute = "name")
Parameters
Type |
Name |
Description |
System.String |
sFormAction |
Returns the value of the "action" attribute of the form.
|
System.String |
sFormName |
The name of the form that is matched against the sNameAttribute .
If this is null , then the first form is scraped, regardless of its name.
|
System.String |
sNameAttribute |
The attribute that contains the name of the form. By default this is "name", but
another attribute, e.g. "id", may be given here. If sFormName is
null , then this parameter is ignored.
|
Returns
Declaration
public string ScrapeFormAction(string sFormName, string sNameAttribute = "name")
Parameters
Type |
Name |
Description |
System.String |
sFormName |
|
System.String |
sNameAttribute |
|
Returns
Type |
Description |
System.String |
|
Extracts the value of the first occurence of a named input field.
Declaration
public string ScrapeInputValue(string sInputName)
Parameters
Type |
Name |
Description |
System.String |
sInputName |
|
Returns
Type |
Description |
System.String |
|
Extracts all values of all occurences of a named input field.
Declaration
public string[] ScrapeInputValues(string sInputName)
Parameters
Type |
Name |
Description |
System.String |
sInputName |
|
Returns
Type |
Description |
System.String[] |
|
ScrapeLink(String, String)
Declaration
public string ScrapeLink(string sLinkName, string sNameAttribute = "id")
Parameters
Type |
Name |
Description |
System.String |
sLinkName |
|
System.String |
sNameAttribute |
The attribute that contains the name of the link. By
default this is the "id" attribute, but another attribute, e.g. "class" may be
given here.
|
Returns
Type |
Description |
System.String |
|
ScrapeLinkContaining(String)
Scrape the href of an anchor that contains the given string in its element content.
Declaration
public string ScrapeLinkContaining(string sContains)
Parameters
Type |
Name |
Description |
System.String |
sContains |
|
Returns
Type |
Description |
System.String |
|
ScrapeValue(String)
Declaration
public string ScrapeValue(string sRegex)
Parameters
Type |
Name |
Description |
System.String |
sRegex |
|
Returns
Type |
Description |
System.String |
If the regex contains at least one group, then the match of the first regex group
is returned. If there is no group, then the complete match is returned.
|
SubRange(Int32)
Declaration
public ScraperRange SubRange(int nOffset)
Parameters
Type |
Name |
Description |
System.Int32 |
nOffset |
The offset into the entire HtmlPage where the new range
shall begin.
|
Returns
SubRange(Int32, Int32)
Declaration
public ScraperRange SubRange(int nOffset, int nLength)
Parameters
Type |
Name |
Description |
System.Int32 |
nOffset |
The offset into the entire HtmlPage where the new range
shall begin.
|
System.Int32 |
nLength |
Length of the range.
|
Returns
ToString()
Declaration
public override string ToString()
Returns
Type |
Description |
System.String |
|
Overrides
System.Object.ToString()