PDA

View Full Version : Java Script for In Page Text Search



WebDevWannaBe
12 Aug 2010, 08:09 PM
I need some help developing a client side JavaScript. I am pretty new with this stuff and just can’t figure out where I am going wrong. It’s probably something really basic that I am missing. Here’s what I want to do.

1. Include a source link on a frame to a webpage that points to another site. So when
my webpage loads, that site will appear in my frame.
2. Once loaded run a script on the page that will do the following:
a. Search the contents of the web page that was loaded into my frame for a specific value.
i. The value I want to find is located in a standard html table.
1. Value 1 is in one cell (this identifies the correct field to evaluate).
2. Value 2 is in the following cell. (this is the value that I am interested in).
3. These table cells do not have an name or id attributes to them. It is just text in a cell.
b. If that value is found I want to notify myself via email or something.
c. If not found,
i. then delay for 15 minutes
ii. refresh the page in my frame.
iii. Search the page again.
3. Continue processing until I tell it to stop or until the value is found.

I am stuck on 2.a. I have tried just about every script I could find on the internet that does a webpage search. I have not been able to get any of them to work. It seems the common points of failure in any example I have tried is when it attempts to run a line of code that attempts to access a document Element like var x=document.getElementById("myframe"); or something like element.innerHTML = tempinnerHTML.replace(regex,'>$1<span class="highlighted term'+termid+'">$2</span>$3<');. Also, doing something like var textContainerNode = document.getElementById("content"); will work, but returns null. So basically I am not getting at the Document Object and any subsequent actions on it will cause it to fail.

I am using Chrome, but I could also use IE. Those are my two choices. Here’s my html file. It shows only 3 of my many attempts to get this to work. There is


<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<style type=’text/css’>
span.highlighted {
background-color: #161616;
font-weight: bold;
}
span.term0 {
background-color: #161633;
}
span.term1 {
background-color: #331616;
}
span.term2 {
background-color: #163316;
}
</style>
<script type="text/javascript">
function ScrapeWebPage()
{
var x=document.getElementById("myframe");
var y=(x.contentWindow || x.contentDocument);
alert(y.title);

if (y.document){
alert("did it");
y=y.document;
}
y.body.style.backgroundColor="#0000ff";
}

</script>

<script type="text/javascript">


function FirstAttempt() {
alert("starting");
// Get search string
var searchString = getSearchString();
// Starting node, parent to all nodes you want to search
var textContainerNode = document.getElementById("content");
alert(textContainerNode);
// Informational message for search
var searchInfo = 'Search Results for: ';
// Split search terms on '|' and iterate over resulting array
var searchTerms = searchString.split('|');
for (var i in searchTerms) {
// The regex is the secret, it prevents text within tag declarations to be affected
var regex = new RegExp(">([^<]*)?("+searchTerms[i]+")([^>]*)?<","ig");
alert(regex);
highlightTextNodes(textContainerNode, regex, i);
alert("back from highlighting");
// Add to info-string
searchInfo += ' <span class="highlighted term'+i+'">'+searchTerms[i]+'</span> ';
}
// Create div describing the search
var searchTermDiv = document.createElement("H2");
searchTermDiv.className = 'searchterms';
searchTermDiv.innerHTML = searchInfo;
// Insert as very first child in searched node
textContainerNode.insertBefore(searchTermDiv, textContainerNode.childNodes[0]);
}

// Pull the search string out of the URL
function getSearchString() {
// Return sanitized search string if it exists
var rawSearchString="The Text To Find";
return rawSearchString;
}

function highlightTextNodes(element, regex, termid) {
alert("highlighting");
var tempinnerHTML = element.innerHTML;
// Do regex replace
alert("right before the regex replace");
// Inject span with class of 'highlighted termX' for google style highlighting
element.innerHTML = tempinnerHTML.replace(regex,'>$1<span class="highlighted term'+termid+'">$2</span>$3<');
alert("done highlighting");
}

</script>
<script type="text/javascript">
function SecondAttempt () {
alert("start");
var myhtml = document.getElementById('myframe').document.body.innerHTML;
alert("start replace");
myHTML = myHTML.replace(/>/,">");
myHTML = myHTML.replace(/</,"<");
document.getElementById('myFrame').document.body.innerHTML= myHTML;
}

function ThirdAttempt(){
var frame = content.document.getElementById("mainFrame");
var doc = frame.contentDocument;
alert(doc);
}

</script>
</head>
<body>

<iframe id="myframe" src="http://TheWebSiteToSearch.com" height=800 width=1200>
<p>Your browser does not support iframes.</p>
</iframe>
<br /><br />
<input type="button" onclick="FirstAttempt();" value="Start Watching" />

</body>
</html>