Class ScraperRange

Namespace
Subsembly.Scraper
Assembly
Subsembly.Scraper.dll

Helper class for implementing screen scrapers.

public class ScraperRange
Inheritance
ScraperRange
Derived
Inherited Members

Constructors

ScraperRange(string)

Creates a new ScraperRange instance that wraps the entire HTML string.

public ScraperRange(string sHtml)

Parameters

sHtml string

Usually the text of a complete HTML page. If this is null, then an empty ScraperRange will be created.

ScraperRange(string, int)

Creates a new ScraperRange instance that wraps the given HTML string starting at nOffset up to the end of the string.

public ScraperRange(string sHtml, int nOffset)

Parameters

sHtml string

The HTML string to wrap. This must not be null.

nOffset int

The starting offset. This must not be negative or larger than the length of the sHtml string.

ScraperRange(string, int, int)

public ScraperRange(string sHtml, int nOffset, int nLength)

Parameters

sHtml string
nOffset int
nLength int

Fields

Null

public static readonly ScraperRange Null

Field Value

ScraperRange

Properties

EndOffset

The offset into HtmlPage after this range ends.

public int EndOffset { get; }

Property Value

int

HtmlPage

Returns the complete HTML page that contains this range.

public string HtmlPage { get; }

Property Value

string

HtmlRange

Returns the raw HTML string of this range. This should rarely be used. To get the text of this range, the property HtmlText shall be used instead.

public string HtmlRange { get; }

Property Value

string

If this range IsNull, then this property returns null.

HtmlText

The plain text, excluding all HTML tags, contained in this range.

public string HtmlText { get; }

Property Value

string

IsNull

public bool IsNull { get; }

Property Value

bool

Offset

The offset into HtmlPage where this range begins.

public int Offset { get; }

Property Value

int

Methods

AddAllInputs(UrlQueryParams, bool)

public void AddAllInputs(UrlQueryParams aQuery, bool fExceptSubmit = false)

Parameters

aQuery UrlQueryParams
fExceptSubmit bool

ContainsString(string, StringComparison)

public bool ContainsString(string s, StringComparison nComparisonType = StringComparison.OrdinalIgnoreCase)

Parameters

s string
nComparisonType StringComparison

Returns

bool

ScrapeAllElements(string, string, string, int, ScraperElementPart)

Returns all child elements that match the given tag and attribute value.

public ScraperRange[] ScrapeAllElements(string sTag, string sAttrName = null, string sWithAttrValue = null, int nMaxCount = 2147483647, ScraperElementPart nPart = ScraperElementPart.InnerHtml)

Parameters

sTag string
sAttrName string

Optional name of an attribute that must be contained in the tag. If this attribute name is "class", then special CSS class comparison semantics for sWithAttrValue are used.

sWithAttrValue string

Optional value of the required attribute. Only relevant if a nonempty sAttrName was given. If this is null and an attribute name was given, then the value of the attribute is ignored. The presence of the attribute is enough for a match.

Special case: When sAttrName is "class", then this parameter is treated as a blank separated list of CSS class names. If any of the given CSS class names matches any of the class names in the elements "class" attribute, then it is considered a match.

nMaxCount int

The max number of matches that shall be returned. This defaults to Int32.MaxValue. If this is 1, then only the first match is returned. Usually it is not required to place a limit on the number of matches, as the range has an implicit limit by its length.

nPart ScraperElementPart

The part of the matching element to be returned in the ranges.

Returns

ScraperRange[]

Array of all matching element ranges. If there is no match, then an empty array will be returned. This method never returns null.

ScrapeAttribute(string, string)

public string ScrapeAttribute(string sTag, string sValueAttribute)

Parameters

sTag string

The XML tag of the element that shall be scraped.

sValueAttribute string

The name of the attribute which's value shall be returned.

Returns

string

ScrapeAttribute(string, string, string, string)

public string ScrapeAttribute(string sTag, string sName, string sNameAttribute, string sValueAttribute)

Parameters

sTag string

The XML tag of the element that shall be scraped.

sName string

The content of an attribute of that element that identifies it.

sNameAttribute string

The name of the attribute that shall contain sName.

sValueAttribute string

The name of the attribute which's value shall be returned.

Returns

string

ScrapeAttributes(string, string, string, string)

public string[] ScrapeAttributes(string sTag, string sName, string sNameAttribute, string sValueAttribute)

Parameters

sTag string

The name of the HTML tag from which the attribute values shall be scraped.

sName string

Optional name or ID of the HTML element that is sought. If given, then this name must appear in the attribute named sNameAttribute of the element.

sNameAttribute string

Optional name of the tag attribute that uniquely identifies the correct HTML element. Usually this is either "name" or "id".

sValueAttribute string

The name of the value attribute that shall be returned.

Returns

string[]

Returns an array with the values of all matching attributes in the given HTML page. If there was not any match, then null is returned.

ScrapeElement(string, ScraperElementPart)

public ScraperRange ScrapeElement(string sTag, ScraperElementPart nPart = ScraperElementPart.InnerHtml)

Parameters

sTag string
nPart ScraperElementPart

Returns

ScraperRange

ScrapeElement(string, string, string, ScraperElementPart)

public ScraperRange ScrapeElement(string sTag, string sAttrName, string sWithAttrValue, ScraperElementPart nPart = ScraperElementPart.InnerHtml)

Parameters

sTag string
sAttrName string
sWithAttrValue string
nPart ScraperElementPart

Returns

ScraperRange

ScrapeElementByClassname(string, string, ScraperElementPart)

public ScraperRange ScrapeElementByClassname(string sTag, string sClassnames, ScraperElementPart nPart = ScraperElementPart.InnerHtml)

Parameters

sTag string
sClassnames string

One or more CSS class names, separated by a single blank. All elements that specify at least one of the given class names are matched and returned.

nPart ScraperElementPart

Returns

ScraperRange

ScrapeElementByPosition(string, int, ScraperElementPart)

public ScraperRange ScrapeElementByPosition(string sTag, int nPos, ScraperElementPart nPart = ScraperElementPart.InnerHtml)

Parameters

sTag string
nPos int
nPart ScraperElementPart

Returns

ScraperRange

ScrapeElementByPosition(string, string, string, int, ScraperElementPart)

public ScraperRange ScrapeElementByPosition(string sTag, string sAttribute, string sValue, int nPos, ScraperElementPart nPart = ScraperElementPart.InnerHtml)

Parameters

sTag string
sAttribute string
sValue string
nPos int
nPart ScraperElementPart

Returns

ScraperRange

ScrapeForm(out string, string, string)

Extracts a from and its action attribute value.

public ScraperRange ScrapeForm(out string sFormAction, string sFormName, string sNameAttribute = "name")

Parameters

sFormAction string

Returns the value of the "action" attribute of the form.

sFormName string

The name of the form that is matched against the sNameAttribute. If this is null, then the first form is scraped, regardless of its name.

sNameAttribute string

The attribute that contains the name of the form. By default this is "name", but another attribute, e.g. "id", may be given here. If sFormName is null, then this parameter is ignored.

Returns

ScraperRange

ScrapeFormAction(string, string)

DEPRECATED: Use ScrapeForm(out string, string, string) instead.

public string ScrapeFormAction(string sFormName, string sNameAttribute = "name")

Parameters

sFormName string
sNameAttribute string

Returns

string

ScrapeInputValue(string)

Extracts the value of the first occurence of a named input field.

public string ScrapeInputValue(string sInputName)

Parameters

sInputName string

Returns

string

ScrapeInputValues(string)

Extracts all values of all occurences of a named input field.

public string[] ScrapeInputValues(string sInputName)

Parameters

sInputName string

Returns

string[]
public string ScrapeLink(string sLinkName, string sNameAttribute = "id")

Parameters

sLinkName string
sNameAttribute string

The attribute that contains the name of the link. By default this is the "id" attribute, but another attribute, e.g. "class" may be given here.

Returns

string

ScrapeLinkContaining(string)

Scrape the href of an anchor that contains the given string in its element content.

public string ScrapeLinkContaining(string sContains)

Parameters

sContains string

Returns

string

ScrapeValue(string)

public string ScrapeValue(string sRegex)

Parameters

sRegex string

Returns

string

If the regex contains at least one group, then the match of the first regex group is returned. If there is no group, then the complete match is returned.

SubRange(int)

public ScraperRange SubRange(int nOffset)

Parameters

nOffset int

The offset into the entire HtmlPage where the new range shall begin.

Returns

ScraperRange

SubRange(int, int)

public ScraperRange SubRange(int nOffset, int nLength)

Parameters

nOffset int

The offset into the entire HtmlPage where the new range shall begin.

nLength int

Length of the range.

Returns

ScraperRange

ToString()

public override string ToString()

Returns

string