blob: 7453875c36c7b4f1590a73f892c44f2463c4eab7 [file] [log] [blame]
<?php
require_once('news.class.php');
require_once('post.class.php');
require_once('config.php');
require_once('utils.php');
require_once('insiders.class.php');
require_once('groups.class.php');
require_once('hostname.php');
require_once "/home/data/httpd/eclipse-php-classes/system/dbconnection_dashboard_rw.class.php";
echo "<html>\n<body>\n";
echo "<b>Starting at: </b>".date("H:i:s")."<br>\n";
# Establish a connection to the newsgroups server
$news = new News();
$news->connect(NEWS_SERVER,NEWS_PORT,NEWS_TIMEOUT);
$news->authenticate(NEWS_USER,NEWS_PASS);
# Establish a connection to the database server
$db_connection = new DBConnectionDashboard();
$db_handle = $db_connection->connect();
echo "Connected to DB<br>\n";
# Get group list from file;
$groups = New Groups();
$groups->loadGroups();
$grouplist = $groups->getNewsgroups();
# Group information
$ginfo = new GInfo();
# Insider object
$ins = new Insider();
$mysql_date = date('Y-m-d');
# How many days ago do we want stats for?
$days = explode(",",DAYS);
$num_days = count($days);
if (ob_get_level() == 0){
ob_start();
}
while(count($grouplist)){ # For every group
$text = ""; # New log for this group
$tgroup = array_pop($grouplist);
$project = $groups->getProjectFromNewsgroup($tgroup);
flush_print("<b>Processing group: ".$tgroup."</b>");
# Select group and get info
$ginfo = $news->selectGroup($tgroup);
if ($ginfo->getNewsGroupName() != -1){ # Non existant newsgroup
# Cache all message IDs & Timestamps for later search from 2 months ago
$messages = null;
$two_month_date = getDateBefore(DAYS_IN_TWO_MONTHS);
$two_month_post = $news->getFirstArticleFromDate($ginfo->getFirstPost(),$ginfo->getLastPost(),$two_month_date);
if ($two_month_post != -1){ # If there are posts 2 months or older
$messages = $news->cacheMessageID($two_month_post,$ginfo->getLastPost());
$cachedposts = $news->cachePosts($two_month_post,$ginfo->getLastPost());
# Get standard deviation and mean from last two months posts time
$extra_time = get_times($cachedposts);
list($mean,$std) = get_stats($cachedposts);
}
}
for($ptr=0;$ptr<$num_days;$ptr++){ # For every expected date
$day = $days[$ptr];
$text .= "\r\n\r\nINFORMATION FOR THE LAST ".$day. " DAYS\r\n\r\n";
if ($ginfo->getNewsGroupName() == -1){ # Non existant newsgroup
echo "Newsgroup $tgroup for project $project does not exist<br>\n";
$text = "Newsgroup: $tgroup for project $project does not exist\r\n";
updateLog($db_handle,$project,$text);
updateNewsStats($db_handle,$project,$mysql_date,$day,-1,-1,-1,-1,-1);
continue;
}
# Get the date for $day days ago
$date = getDateBefore($day);
# Initialize variables
$top_level = array();
$unanswered = 0;
$avg_time = 0;
$total_time = 0;
$responded = 0;
$insidercount = 0;
# Get first article from date. Start looking from the first post from
# two months ago until the most recent.
$num = $news->getFirstArticleFromDate($two_month_post,$ginfo->getLastPost(),$date);
# No posts (since date || in newsgroup)
if ($num == -1 || $ginfo->getNumberPosts() == 0){
echo "No posts since date or no posts in group<br>\n";
$text .= "No posts since date or no posts in group\r\n";
updateLog($db_handle,$project,$text);
updateNewsStats($db_handle,$project,$mysql_date,$day,0,0,-1,0,0);
continue;
}
# Get total number of posts since date
$numposts = $news->numPostsSinceDate($num,$ginfo->getLastPost());
$text .= "Number posts: ".$numposts."\r\n";
# Average number of posts
$average = round(($numposts / $day),1);
$text .= "Average: ".$average."\r\n";
# Cache the new posts for processing
$cposts = $news->cachePosts($num,$ginfo->getLastPost());
$total = count($cposts);
# Insiders Vs. Outsiders
$insidercount = get_insiders($text,$ins,$cposts,
$groups->getProjectfromNewsgroup($tgroup));
$insidercount /= $total; // Pct of insiders
# Answered & Unanswered posts
list($avg_time,$unanswered) = answered_and_unanswered($text,$cposts,
$extra_time,$mean,
$std);
flush_print("DAY ".$day.": ".$numposts."|".$average."|".$avg_time."|".$unanswered."|".$insidercount);
updateNewsStats($db_handle,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount);
}
if ($ginfo->getNewsGroupName() != -1) // Non existant newsgroup
updateLog($db_handle,$project,$text);
}
echo "<b>Ending at: </b>".date("H:i:s")."<br>\n";
echo "</body>\n</html>\n";
$db_handle = null;
$db_connection = null;
$news->disconnect();
$news = null;
function updateLog($db_handle,$project,$text){
$text = str_replace("\"","'",$text); // mysql can't handle
$query = "SELECT * FROM ".log_table()." WHERE project = \"".$project."\"";
$result = mysql_query($query,$db_handle);
if ($result && mysql_num_rows($result)){
$query = "UPDATE ".log_table()." SET news_text = \"".$text."\"";
$query .= " WHERE project = \"".$project."\"";
}else{
$query = "INSERT INTO ".log_table()." (project, news_text) ";
$query .= "VALUES(\"".$project."\",\"".$text."\")";
}
#echo "<pre>".$text."</pre><br>\n";
mysql_query($query,$db_handle); // or die("Error: ".mysql_error());
}
function updateNewsStats($db_handle,$project,$mysql_date,$day,$numposts,
$average,$avg_time,$unanswered,$insidercount){
$query = "SELECT * FROM ".stats_table()." WHERE project = \"".$project;
$query .= "\" AND stats_date = \"".$mysql_date."\"";
$result = mysql_query($query,$db_handle);
if ($result && mysql_num_rows($result)){
updateNews($db_handle,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount);
}else{
insertNews($db_handle,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount);
}
}
function updateNews($db_handle,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount){
$query = "UPDATE ".stats_table()." SET news_".$day."_answer_average_time = ";
$query .= $avg_time.", news_".$day."_unanswered_number = ".$unanswered;
$query .= ", news_".$day."_insider_posts = ".$insidercount.", news_".$day;
$query .= "_number_posts = ".$numposts.", news_".$day."_average_posts = ";
$query .= $average." WHERE project = \"".$project."\" AND stats_date = \"";
$query .= $mysql_date."\"";
mysql_query($query,$db_handle); // or die("could not update");
}
function insertNews($db_handle,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount){
$query = "INSERT INTO ".stats_table()." (project,stats_date,news_".$day;
$query .= "_answer_average_time, news_".$day."_unanswered_number, news_";
$query .= $day."_insider_posts, news_".$day."_number_posts, news_".$day;
$query .= "_average_posts) VALUES(\"".$project."\",\"".$mysql_date."\",";
$query .= $avg_time.",".$unanswered.",".$insidercount.",".$numposts.",";
$query .= $average.")";
mysql_query($query,$db_handle); // or die("could not insert");
}
function get_insiders(&$text,$ins,$cached_posts,$project){
$text .= "Project insiders:\r\n\r\n";
$total = count($cached_posts);
for($i=0;$i<$total;$i++){
$post = $cached_posts[$i];
if ($ins->isInsider($post->getAuthor(),$project)){
$text .= $post->getAuthor()."\r\n";
$insidercount++;
}
}
$text .= "\r\n";
return $insidercount;
}
function get_times($cached_posts){
$times = array();
$total = count($cached_posts);
for($i=0;$i<$total;$i++){
$post = $cached_posts[$i];
$time = getUnixTimestamp($post->getDate(),$post->getTime());
$times[$post->getMessageID()] = $time;
}
return $times;
}
function get_timediffs($cached_posts){
$answers = array();
$times = array();
$timediff = array();
$top_level = array();
$k = 0;
$unanswered = 0;
# Populate data structures
$total = count($cached_posts);
for($i=0;$i<$total;$i++){
$post = $cached_posts[$i];
if (!isset($answers[$post->getMessageID()])) # First time
$answers[$post->getMessageID()] = "";
if ($post->getReference() != ""){ # Not for top-level
if (isset($answers[$post->getReference()])) # Only for found posts
$answers[$post->getReference()] .= $post->getMessageID().",";
}else
$top_level[$post->getMessageID()] = 1;
$time = getUnixTimestamp($post->getDate(),$post->getTime());
$times[$post->getMessageID()] = $time;
}
# Get time differences
$total = count($answers);
$keys = array_keys($answers);
for($i=0;$i<$total;$i++){
$key = $keys[$i];
if ($answers[$key] == ""){ # No replies
if (isset($top_level[$key]))
$unanswered++;
continue;
}
$original_post_time = $times[$key];
$tmp_array = explode(",",$answers[$key]);
$tmp_total = count($tmp_array)-1;
for($j=0;$j<$tmp_total;$j++)
$timediff[$k++] = $times[$tmp_array[$j]] - $original_post_time;
}
return array($timediff,$unanswered);
}
function get_answers(&$text,$cached_posts,$extra_times){
$answers = array();
$times = array();
$timediff = array();
$top_level = array();
$k = 0;
$unanswered = 0;
$old_unanswered = -1; // Bug # 115497
$text .= "Getting top-level and reply posts\r\n\r\n";
# Populate data structures
$total = count($cached_posts);
for($i=0;$i<$total;$i++){
$post = $cached_posts[$i];
if (!isset($answers[$post->getMessageID()])) # First time
$answers[$post->getMessageID()] = "";
if ($post->getReference() != ""){ # Not for top-level
$text .= "Reply post: ".$post->getMessageID()." to post: ";
$text .= $post->getReference()."\r\n";
$answers[$post->getReference()] .= $post->getMessageID().",";
}else{
$text .= "Top level post: ".$post->getMessageID()."\r\n";
$top_level[$post->getMessageID()] = 1;
}
$time = getUnixTimestamp($post->getDate(),$post->getTime());
$times[$post->getMessageID()] = $time;
}
$text .= "\r\nGetting time differences\r\n\r\n";
# Get time differences
$total = count($answers);
$keys = array_keys($answers);
for($i=0;$i<$total;$i++){
$key = $keys[$i];
if ($answers[$key] == ""){ # No replies
if (isset($top_level[$key])){
$text .= "Unanswered top level post: ".$key."\r\n";
$unanswered++;
if (is_before_today($times[$key])){
$old_unanswered = 0;
}
}
continue;
}
if (isset($times[$key])){
$ans = substr($answers[$key],0,-1);
$text .= "Answer ".$ans." found on main search window\r\n";
$original_post_time = $times[$key];
}else
if (isset($extra_times[$key])){
$ans = substr($answers[$key],0,-1);
$text .= "Answer ".$ans." found on extended search ";
$text .= "window\r\n";
$original_post_time = $extra_times[$key];
}else{
$ans = substr($answers[$key],0,-1);
$text .= "Answer ".$ans." for post $key not found on ";
$text .= "allowed window\r\n";
continue; # Not found in the extra time window
}
$tmp_array = explode(",",$answers[$key]);
$tmp_total = count($tmp_array)-1;
for($j=0;$j<$tmp_total;$j++)
$timediff[$k++] = $times[$tmp_array[$j]] - $original_post_time;
}
$text .= "\r\n";
return array($timediff,$unanswered,$old_unanswered);
}
function is_before_today($date){
$today = mktime(0, 0, 0, date("m"), date("d")+1, date("Y"));
if (($date + ONE_DAY) < $today)
return 1;
return 0;
}
function get_stats($cached_posts){
$timediffs = array();
$tmp_sum = 0;
$mean = 0;
$std = 0;
list($timediffs,$dummy) = get_timediffs($cached_posts);
# Compute mean and std. deviation
$total = count($timediffs);
for($i=0;$i<$total;$i++)
$mean += $timediffs[$i];
if ($total != 0){
$mean /= $total;
$mean = round($mean);
for($i=0;$i<$total;$i++)
$tmp_sum += pow(($timediffs[$i] - $mean),2);
$total = ($total != 1)?--$total:$total;
$std = round(sqrt($tmp_sum/$total));
}
#echo "<br>\nMEAN: ".realDate($mean)." STD: ".realDate($std)."[".$std."]"."<br>\n";
return array($mean,$std);
}
function answered_and_unanswered(&$text,$cached_posts,$times,$mean,$std_dev){
$timediffs = array();
$average_ttr = 0;
$skip_count = 0;
$count = 0;
$sum = 0;
# Get ranges
$low = $mean - (TIMES_STD * $std_dev);
$low = ($low < 0)?0:$low;
$high = $mean + (TIMES_STD * $std_dev);
$text .= "Comparison ranges: mean +/- (".TIMES_STD." * standard deviation)";
$text .= "\r\n";
$text .= "low: ".$low."\r\n";
$text .= "high: ".$high."\r\n\r\n";
list($timediffs,$unanswered,$old) = get_answers($text,$cached_posts,$times);
$total = count($timediffs);
for($i=0;$i<$total;$i++){
if ($timediffs[$i] >= $low && $timediffs[$i] <= $high){
$sum += $timediffs[$i];
$count++;
}else{
$skipcount++;
}
}
$text .= $skipcount." articles not within comparison range\r\n\r\n";
if ($count != 0)
$average_ttr = round($sum / $count);
else // Bug 115497
$average_ttr = $old;
return array($average_ttr,$unanswered);
}
function flush_print($text){
echo $text."<br>\n";
ob_flush();
flush();
}
?>