Class ScraperHelper
Some static utility methods for screen scrapers.
Inheritance
Inherited Members
Namespace: Subsembly.Scraper
Assembly: Subsembly.Scraper.dll
Syntax
public static class ScraperHelper
Methods
ScrapeAttribute(String, String, String)
Declaration
public static string ScrapeAttribute(string sHtml, string sTag, string sValueAttribute)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sTag | The XML tag of the element that shall be scraped. |
System.String | sValueAttribute | The name of the attribute which's value shall be returned. |
Returns
Type | Description |
---|---|
System.String |
ScrapeAttribute(String, String, String, String, String)
Declaration
public static string ScrapeAttribute(string sHtml, string sTag, string sName, string sNameAttribute, string sValueAttribute)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sTag | The XML tag of the element that shall be scraped. |
System.String | sName | The content of an attribute of that element that identifies it. |
System.String | sNameAttribute | The name of the attribute that shall contain sName. |
System.String | sValueAttribute | The name of the attribute which's value shall be returned. |
Returns
Type | Description |
---|---|
System.String |
ScrapeElement(String, String)
Declaration
public static string ScrapeElement(string sHtml, string sTag)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sTag |
Returns
Type | Description |
---|---|
System.String |
ScrapeElement(String, String, String, String, Boolean)
Scrape the content of an element with a given tag and attribute value.
Declaration
public static string ScrapeElement(string sHtml, string sTag, string sAttribute, string sValue, bool fOuterHtml = false)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sTag | |
System.String | sAttribute | The attribute name of an attribute that must be contained in the tag. |
System.String | sValue | The value of the required attribute. |
System.Boolean | fOuterHtml |
Returns
Type | Description |
---|---|
System.String |
ScrapeElementByClassname(String, String, String, Boolean)
Declaration
public static string ScrapeElementByClassname(string sHtml, string sTag, string sClassname, bool fOuterHtml = false)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sTag | |
System.String | sClassname | The name of a CSS class that must appear in the class attribute of the element. |
System.Boolean | fOuterHtml |
Returns
Type | Description |
---|---|
System.String |
ScrapeElementByPosition(String, String, Int32, Boolean)
Returns the content of the n'th occurence of an element with a given tag.
Declaration
public static string ScrapeElementByPosition(string sHtml, string sTag, int nPos, bool fOuterHtml = false)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sTag | |
System.Int32 | nPos | |
System.Boolean | fOuterHtml |
Returns
Type | Description |
---|---|
System.String |
ScrapeElementByPosition(String, String, String, String, Int32, Boolean)
Scrape the content of an element with a given tag and attribute value.
Declaration
public static string ScrapeElementByPosition(string sHtml, string sTag, string sAttribute, string sValue, int nPos, bool fOuterHtml = false)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sTag | |
System.String | sAttribute | The attribute name of an attribute that must be contained in the tag. |
System.String | sValue | The value of the required attribute. |
System.Int32 | nPos | |
System.Boolean | fOuterHtml |
Returns
Type | Description |
---|---|
System.String |
ScrapeFormAction(String, String, String)
Extracts the action attribute value of a given form.
Declaration
public static string ScrapeFormAction(string sHtml, string sFormName, string sNameAttribute = "name")
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | Form or complete page with holds form. |
System.String | sFormName | Name of the form. |
System.String | sNameAttribute | The attribute that contains the name of the form. By default this is the "name" attribute, but another attribute, e.g. "id" may be given here. |
Returns
Type | Description |
---|---|
System.String |
ScrapeFormParams(out String, String, String, String)
Extracts action attribute and all available input parameters from given form.
Declaration
public static UrlQueryParams ScrapeFormParams(out string sFormAction, string sHtml, string sFormName, string sNameAttribute = "name")
Parameters
Type | Name | Description |
---|---|---|
System.String | sFormAction | Returns the form action. |
System.String | sHtml | Form or complete page with holds form. |
System.String | sFormName | Name of the form. |
System.String | sNameAttribute | The attribute that contains the name of the form. By default this is the "name" attribute, but another attribute, e.g. "id" may be given here. |
Returns
Type | Description |
---|---|
Subsembly.Interweb.UrlQueryParams | Returns all available input parameters of the form to be used as HTTP post or get query parameters. |
ScrapeInputValue(String, String)
Extracts the value of the first occurence of a named input field.
Declaration
public static string ScrapeInputValue(string sHtml, string sInputName)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sInputName |
Returns
Type | Description |
---|---|
System.String |
ScrapeInputValues(String, String)
Extracts all values of all occurences of a named input field.
Declaration
public static string[] ScrapeInputValues(string sHtml, string sInputName)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sInputName |
Returns
Type | Description |
---|---|
System.String[] |
ScrapeLink(String, String, String)
Declaration
public static string ScrapeLink(string sHtml, string sLinkName, string sNameAttribute = "id")
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sLinkName | |
System.String | sNameAttribute | The attribute that contains the name of the link. By default this is the "id" attribute, but another attribute, e.g. "class" may be given here. |
Returns
Type | Description |
---|---|
System.String |
ScrapeLinkContaining(String, String)
Scrape the href of an anchor that contains the given string in its element content.
Declaration
public static string ScrapeLinkContaining(string sHtml, string sContains)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sContains |
Returns
Type | Description |
---|---|
System.String |
ScrapeValue(String, String)
Declaration
public static string ScrapeValue(string sHtml, string sRegex)
Parameters
Type | Name | Description |
---|---|---|
System.String | sHtml | |
System.String | sRegex |
Returns
Type | Description |
---|---|
System.String | Resulting match of the first regex group. |