Class ScraperHelper

Namespace
Subsembly.Scraper
Assembly
Subsembly.Scraper.dll

Some static utility methods for screen scrapers.

public static class ScraperHelper
Inheritance
ScraperHelper
Inherited Members

Methods

ScrapeAttribute(string, string, string)

public static string ScrapeAttribute(string sHtml, string sTag, string sValueAttribute)

Parameters

sHtml string
sTag string

The XML tag of the element that shall be scraped.

sValueAttribute string

The name of the attribute which's value shall be returned.

Returns

string

ScrapeAttribute(string, string, string, string, string)

public static string ScrapeAttribute(string sHtml, string sTag, string sName, string sNameAttribute, string sValueAttribute)

Parameters

sHtml string
sTag string

The XML tag of the element that shall be scraped.

sName string

The content of an attribute of that element that identifies it.

sNameAttribute string

The name of the attribute that shall contain sName.

sValueAttribute string

The name of the attribute which's value shall be returned.

Returns

string

ScrapeElement(string, string)

public static string ScrapeElement(string sHtml, string sTag)

Parameters

sHtml string
sTag string

Returns

string

ScrapeElement(string, string, string, string, bool)

Scrape the content of an element with a given tag and attribute value.

public static string ScrapeElement(string sHtml, string sTag, string sAttribute, string sValue, bool fOuterHtml = false)

Parameters

sHtml string
sTag string
sAttribute string

The attribute name of an attribute that must be contained in the tag.

sValue string

The value of the required attribute.

fOuterHtml bool

Returns

string

ScrapeElementByClassname(string, string, string, bool)

public static string ScrapeElementByClassname(string sHtml, string sTag, string sClassname, bool fOuterHtml = false)

Parameters

sHtml string
sTag string
sClassname string

The name of a CSS class that must appear in the class attribute of the element.

fOuterHtml bool

Returns

string

ScrapeElementByPosition(string, string, int, bool)

Returns the content of the n'th occurence of an element with a given tag.

public static string ScrapeElementByPosition(string sHtml, string sTag, int nPos, bool fOuterHtml = false)

Parameters

sHtml string
sTag string
nPos int
fOuterHtml bool

Returns

string

ScrapeElementByPosition(string, string, string, string, int, bool)

Scrape the content of an element with a given tag and attribute value.

public static string ScrapeElementByPosition(string sHtml, string sTag, string sAttribute, string sValue, int nPos, bool fOuterHtml = false)

Parameters

sHtml string
sTag string
sAttribute string

The attribute name of an attribute that must be contained in the tag.

sValue string

The value of the required attribute.

nPos int
fOuterHtml bool

Returns

string

ScrapeFormAction(string, string, string)

Extracts the action attribute value of a given form.

public static string ScrapeFormAction(string sHtml, string sFormName, string sNameAttribute = "name")

Parameters

sHtml string

Form or complete page with holds form.

sFormName string

Name of the form.

sNameAttribute string

The attribute that contains the name of the form. By default this is the "name" attribute, but another attribute, e.g. "id" may be given here.

Returns

string

ScrapeFormParams(out string, string, string, string)

Extracts action attribute and all available input parameters from given form.

public static UrlQueryParams ScrapeFormParams(out string sFormAction, string sHtml, string sFormName, string sNameAttribute = "name")

Parameters

sFormAction string

Returns the form action.

sHtml string

Form or complete page with holds form.

sFormName string

Name of the form.

sNameAttribute string

The attribute that contains the name of the form. By default this is the "name" attribute, but another attribute, e.g. "id" may be given here.

Returns

UrlQueryParams

Returns all available input parameters of the form to be used as HTTP post or get query parameters.

ScrapeInputValue(string, string)

Extracts the value of the first occurence of a named input field.

public static string ScrapeInputValue(string sHtml, string sInputName)

Parameters

sHtml string
sInputName string

Returns

string

ScrapeInputValues(string, string)

Extracts all values of all occurences of a named input field.

public static string[] ScrapeInputValues(string sHtml, string sInputName)

Parameters

sHtml string
sInputName string

Returns

string[]
public static string ScrapeLink(string sHtml, string sLinkName, string sNameAttribute = "id")

Parameters

sHtml string
sLinkName string
sNameAttribute string

The attribute that contains the name of the link. By default this is the "id" attribute, but another attribute, e.g. "class" may be given here.

Returns

string

ScrapeLinkContaining(string, string)

Scrape the href of an anchor that contains the given string in its element content.

public static string ScrapeLinkContaining(string sHtml, string sContains)

Parameters

sHtml string
sContains string

Returns

string

ScrapeValue(string, string)

public static string ScrapeValue(string sHtml, string sRegex)

Parameters

sHtml string
sRegex string

Returns

string

Resulting match of the first regex group.