diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..365f7e4 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,15 @@ +Version 0.2 (2008-05-11) +=========== +- Implement tableExtractor.class.php to more smartly parse out the carrier + information, allowing for network and region based lookups +- Add form elements to support the new network/region functionality +- Add user-controlled verbosity setting +- Add output of links needed to check manually +- Add quick BBFAQ debug script to spit out the table array +- Minor bugfix to PHP_SELF coding error +- Add CREDITS and ChangeLog files + +Version 0.1 (2007-11-07) +=========== +- Initial release + diff --git a/README b/README index 29dc53d..59551c4 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ bboschecker - check for new BB device OS releases -Copyright (C) 2007 troyengel +Copyright (C) 2008 troyengel About ===== diff --git a/bboschecker.php b/bboschecker.php index 69b89b5..d4f2fc3 100644 --- a/bboschecker.php +++ b/bboschecker.php @@ -1,7 +1,7 @@ @@ -54,6 +67,14 @@ curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($curl_handle, CURLOPT_COOKIEFILE, $SCRIPT_CJ); curl_setopt($curl_handle, CURLOPT_COOKIEJAR, $SCRIPT_CJ); +function chatty($text) { + global $VERBOSE; + if ($VERBOSE) { + echo $text; + flush(); + } +} + function getPage($url, $method="GET", $postfields="") { global $curl_handle; if (!is_string($url)) { @@ -85,40 +106,84 @@ function getPage($url, $method="GET", $postfields="") { } } -if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) { +if (isset($_REQUEST['BB_DEVICE']) && is_numeric($_REQUEST['BB_DEVICE'])) { + + global $VERBOSE; + $rimDevice = $_REQUEST['BB_DEVICE']; + $rimNetwork = $_REQUEST['BB_NETWORK']; + $rimRegion = $_REQUEST['BB_REGION']; + if (isset($_REQUEST['BB_VERBOSE']) && $_REQUEST['BB_VERBOSE']) { + $VERBOSE = true; + } - $device = $_REQUEST['device']; // IE hack - it buffers and won't display unless 256b print(str_repeat(" ", 300) . "\n"); + echo "Searching for BlackBerry OS releases for:
\n". + "Device: $rimDevice
\n". + "Network: $rimNetwork
\n". + "Region: $rimRegion
\n". + "
\n"; + flush(); + echo "Downloading carrier list from BlackBerryFAQ... "; flush(); - $carriers = getPage($BBF_LIST); - echo "done.
\n"; + chatty("\n
  URL:".$BBF_LIST."
\n"); + $tx = new tableExtractor; + $tx->source = file_get_contents($BBF_LIST); + $tx->anchor = $BBF_ANCHOR; + $tx->anchorWithin = false; + $carrierArray = $tx->extractTable(); + echo "done.

\n"; flush(); echo "Parsing carrier list for links... "; flush(); + chatty("
\n"); + reset($carrierArray); $link_array = array(); - $cnt_array = explode("\n", $carriers); - foreach ($cnt_array as $line) { - if (stristr($line, "www.blackberry.com")) { - // http://www.the-art-of-web.com/php/parse-links/ - $regexp = "]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>"; - if (preg_match_all("/$regexp/siU", $line, $matches, PREG_SET_ORDER)) { - foreach ($matches as $match) { - # $match[2] = link address - # $match[3] = link text - $link_array[$match[2]] = $match[3]; - } - } - } - } + $reject_array = array(); + foreach($carrierArray as $carrier) { + if (($carrier['Network'] == $rimNetwork) || + ($rimNetwork == "All")) { + if (($carrier['Region'] == $rimRegion) || + ($rimRegion == "All")) { + if (stristr($carrier['Carrier'], "www.blackberry.com")) { + // http://www.the-art-of-web.com/php/parse-links/ + $regexp = "]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>"; + if (preg_match_all("/$regexp/siU", $carrier['Carrier'], $matches, + PREG_SET_ORDER)) { + foreach ($matches as $match) { + # $match[2] = link address + # $match[3] = link text + chatty("\n  Adding: " . $match[3] . + " (Network: " . $rimNetwork . ", Region: " . + $rimRegion . ")
\n"); + $link_array[$match[2]] = $match[3]; + } // foreach link + } // rexexp match + } else { + chatty("\n  Rejecting: " . $carrier['Carrier'] . + " (not hosted on RIM server)
\n"); + $reject_array[] = $carrier['Carrier']; + } // blackberry.com match + } // region match + } // network match + } // foreach carrier echo "done.

\n"; flush(); - echo "Searching for OS releases for the BlackBerry $device

\n"; - flush(); + // output the list of sites to check by hand + if (count($reject_array) > 0) { + echo "These downloads are not hosted by RIM, check manually:
\n"; + flush(); + reset($reject_array); + foreach($reject_array as $reject) { + echo "  " . $reject . "
\n"; + } + echo "
\n"; + } + reset($link_array); while (list($link, $title) = each($link_array)) { echo "Checking $title...
\n"; @@ -131,7 +196,7 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) { $code = ""; foreach ($rim_array as $line) { // get the value - if (stristr($line, $device)) { + if (stristr($line, $rimDevice)) { $regexp = "(.*)<\/option>"; if (preg_match("/$regexp/siU", $line, $match)) { $got_v = 1; @@ -148,9 +213,10 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) { } } if ($got_v && $got_c) { - echo "  Found one, retrieving product page...
\n"; - flush(); + chatty("  Found one, retrieving product page...
\n"); $postvars = "productName=$value&code=$code"; + $os_url = $RIM_DLURL . ", POST, " . $postvars; + chatty("  URL Data: ".$os_url."
\n"); $os_page = getPage($RIM_DLURL, "POST", $postvars); $os_array = explode("\n", $os_page); foreach ($os_array as $osline) { @@ -168,19 +234,47 @@ if (isset($_REQUEST['device']) && is_numeric($_REQUEST['device'])) { } else { ?> -Enter the model number ONLY of the device.
-(7290, 8100, 8320, 8800, etc.)
-
-
- - + + Device:  + +
+ Enter the model number ONLY of the device.
+ (7290, 8100, 8320, 8703, 8800, etc.) +

+ Verbose Output: + +

+ Network: +
+ +

+ Region: +
+ +

+
+
Note: This tool will only search carriers who
host their downloads on www.blackberry.com
diff --git a/debugbbfarray.php b/debugbbfarray.php new file mode 100644 index 0000000..d9751fe --- /dev/null +++ b/debugbbfarray.php @@ -0,0 +1,54 @@ + +BBOS Carrier Array Check + + +source = file_get_contents($BBF_LIST); + +// hopefully nobody will change this text right above the table +$tx->anchor = 'desktop and device software download page'; +$tx->anchorWithin = false; + +$tableArray = $tx->extractTable(); + +print("
\n");
+print_r($tableArray);
+print("
\n"); + +?> + + + + diff --git a/tableExtractor.class.php b/tableExtractor.class.php new file mode 100644 index 0000000..5dcef7c --- /dev/null +++ b/tableExtractor.class.php @@ -0,0 +1,383 @@ +cleanHTML(); + $this->prepareArray(); + + return $this->createArray(); + + } + + /*-------------------------------------------------- + + --------------------------------------------------*/ + + function cleanHTML() { + + // php 4 compatibility functions + if(!function_exists('stripos')) { + function stripos($haystack,$needle,$offset = 0) { + return(strpos(strtolower($haystack),strtolower($needle),$offset)); + } + } + + // find unique string that appears before the table you want to extract + if ($this->anchorWithin) { + /*------------------------------------------------------------ + With thanks to Khary Sharp for suggesting and writing + the anchor within functionality. + ------------------------------------------------------------*/ + $anchorPos = stripos($this->source, $this->anchor) + strlen($this->anchor); + $sourceSnippet = strrev(substr($this->source, 0, $anchorPos)); + $tablePos = stripos($sourceSnippet, strrev(("source, $this->anchor); + } + + // extract table + $startTable = stripos($this->source, 'source, '', $startTable) + 8; + $table = substr($this->source, $startTable, $endTable - $startTable); + + if(!function_exists('lcase_tags')) { + function lcase_tags($input) { + return strtolower($input[0]); + } + } + + // lowercase all table related tags + $table = preg_replace_callback('/<(\/?)(table|tr|th|td)/is', 'lcase_tags', $table); + + // remove all thead and tbody tags + $table = preg_replace('/<\/?(thead|tbody).*?>/is', '', $table); + + // replace th tags with td tags + $table = preg_replace('/<(\/?)th(.*?)>/is', '<$1td$2>', $table); + + // clean string + $table = trim($table); + $table = str_replace("\r\n", "", $table); + + $this->cleanHTML = $table; + + } + + /*-------------------------------------------------- + + --------------------------------------------------*/ + + function prepareArray() { + + // split table into individual elements + $pattern = '/(<\/?(?:tr|td).*?>)/is'; + $table = preg_split($pattern, $this->cleanHTML, -1, PREG_SPLIT_DELIM_CAPTURE); + + // define array for new table + $tableCleaned = array(); + + // define variables for looping through table + $rowCount = 0; + $colCount = 1; + $trOpen = false; + $tdOpen = false; + + // loop through table + foreach($table as $item) { + + // trim item + $item = str_replace(' ', '', $item); + $item = trim($item); + + // save the item + $itemUnedited = $item; + + // clean if tag + $item = preg_replace('/<(\/?)(table|tr|td).*?>/is', '<$1$2>', $item); + + // pick item type + switch ($item) { + + case '': + // start a new row + $rowCount++; + $colCount = 1; + $trOpen = true; + break; + + case '': + // save the td tag for later use + $tdTag = $itemUnedited; + $tdOpen = true; + break; + + case '': + $tdOpen = false; + break; + + case '': + $trOpen = false; + break; + + default : + + // if a TD tag is open + if($tdOpen) { + + // check if td tag contained colspan + if(preg_match('/]*colspan\s*=\s*(?:\'|")?\s*([0-9]+)[^>]*>/is', $tdTag, $matches)) + $colspan = $matches[1]; + else + $colspan = 1; + + // check if td tag contained rowspan + if(preg_match('/]*rowspan\s*=\s*(?:\'|")?\s*([0-9]+)[^>]*>/is', $tdTag, $matches)) + $rowspan = $matches[1]; + else + $rowspan = 0; + + // loop over the colspans + for($c = 0; $c < $colspan; $c++) { + + // if the item data has not already been defined by a rowspan loop, set it + if(!isset($tableCleaned[$rowCount][$colCount])) + $tableCleaned[$rowCount][$colCount] = $item; + else + $tableCleaned[$rowCount][$colCount + 1] = $item; + + // create new rowCount variable for looping through rowspans + $futureRows = $rowCount; + + // loop through row spans + for($r = 1; $r < $rowspan; $r++) { + $futureRows++; + if($colspan > 1) + $tableCleaned[$futureRows][$colCount + 1] = $item; + else + $tableCleaned[$futureRows][$colCount] = $item; + } + + // increase column count + $colCount++; + + } + + // sort the row array by the column keys (as inserting rowspans screws up the order) + ksort($tableCleaned[$rowCount]); + + } + break; + + } + + } + + // set row count + if($this->headerRow) + $this->rowCount = count($tableCleaned) - 1; + else + $this->rowCount = count($tableCleaned); + + $this->rawArray = $tableCleaned; + + } + + /*-------------------------------------------------- + + --------------------------------------------------*/ + + function createArray() { + + // define array to store table data + $tableData = array(); + + // get column headers + if($this->headerRow) { + + // trim string + $row = $this->rawArray[$this->headerRow]; + + // set column names array + $columnNames = array(); + $uniqueNames = array(); + + // loop over column names + $colCount = 0; + foreach($row as $cell) { + + $colCount++; + + $cell = strip_tags($cell); + $cell = trim($cell); + + // save name if there is one, otherwise save index + if($cell) { + + if(isset($uniqueNames[$cell])) { + $uniqueNames[$cell]++; + $cell .= ' ('.($uniqueNames[$cell] + 1).')'; + } + else { + $uniqueNames[$cell] = 0; + } + + $columnNames[$colCount] = $cell; + + } + else + $columnNames[$colCount] = $colCount; + + } + + // remove the headers row from the table + unset($this->rawArray[$this->headerRow]); + + } + + // remove rows to drop + foreach(explode(',', $this->dropRows) as $key => $value) { + unset($this->rawArray[$value]); + } + + // set the end row + if($this->maxRows) + $endRow = $this->startRow + $this->maxRows - 1; + else + $endRow = count($this->rawArray); + + // loop over row array + $rowCount = 0; + $newRowCount = 0; + foreach($this->rawArray as $row) { + + $rowCount++; + + // if the row was requested then add it + if($rowCount >= $this->startRow && $rowCount <= $endRow) { + + $newRowCount++; + + // create new array to store data + $tableData[$newRowCount] = array(); + + //$tableData[$newRowCount]['origRow'] = $rowCount; + //$tableData[$newRowCount]['data'] = array(); + $tableData[$newRowCount] = array(); + + // set the end column + if($this->maxCols) + $endCol = $this->startCol + $this->maxCols - 1; + else + $endCol = count($row); + + // loop over cell array + $colCount = 0; + $newColCount = 0; + foreach($row as $cell) { + + $colCount++; + + // if the column was requested then add it + if($colCount >= $this->startCol && $colCount <= $endCol) { + + $newColCount++; + + if($this->extraCols) { + foreach($this->extraCols as $extraColumn) { + if($extraColumn['column'] == $colCount) { + if(preg_match($extraColumn['regex'], $cell, $matches)) { + if(is_array($extraColumn['names'])) { + $this->extraColsCount = 0; + foreach($extraColumn['names'] as $extraColumnSub) { + $this->extraColsCount++; + $tableData[$newRowCount][$extraColumnSub] = $matches[$this->extraColsCount]; + } + } else { + $tableData[$newRowCount][$extraColumn['names']] = $matches[1]; + } + } else { + $this->extraColsCount = 0; + if(is_array($extraColumn['names'])) { + $this->extraColsCount = 0; + foreach($extraColumn['names'] as $extraColumnSub) { + $this->extraColsCount++; + $tableData[$newRowCount][$extraColumnSub] = ''; + } + } else { + $tableData[$newRowCount][$extraColumn['names']] = ''; + } + } + } + } + } + + if($this->stripTags) + $cell = strip_tags($cell); + + // set the column key as the column number + $colKey = $newColCount; + + // if there is a table header, use the column name as the key + if($this->headerRow) + if(isset($columnNames[$colCount])) + $colKey = $columnNames[$colCount]; + + // add the data to the array + //$tableData[$newRowCount]['data'][$colKey] = $cell; + $tableData[$newRowCount][$colKey] = $cell; + + } + + } + + } + + } + + $this->finalArray = $tableData; + + return $tableData; + + } + + } + +?> \ No newline at end of file