blob: 13d4710bdfc6b486fd251398f0511b91b0f0e4b7 [file] [log] [blame]
<?php
require_once "/home/data/httpd/eclipse-php-classes/system/dbconnection_dashboard_rw.class.php";
require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/news.class.php");
require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/post.class.php");
require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/config.php");
require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/utils.php");
require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/insiders.class.php");
require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/stats/hostname.php");
require_once($_SERVER['DOCUMENT_ROOT'] . "/projects/common/projects-info.class.php");
echo "<html>\n<body>\n";
echo "<b>Starting at: </b>".date("H:i:s")."<br>\n";
# Establish a connection to the newsgroups server
$news = new News();
$news->connect(NEWS_SERVER,NEWS_PORT,NEWS_TIMEOUT);
$news->authenticate(NEWS_USER,NEWS_PASS);
# Establish a connection to the database server
$dbc = new DBConnectionDashboard();
$dbh = $dbc->connect();
echo "<br>\nConnected to DB<br>\n";
# List of projects
$projects = New ProjectsInfo();
# Group information
$ginfo = new GInfo(); // Part of news.class.php
# Insider object
$ins = new Insider();
$mysql_date = date('Y-m-d');
# How many days ago do we want stats for
$days = explode(",",DAYS);
$num_days = count($days);
if (ob_get_level() == 0){
ob_start();
}
# For every project available
$project_count = $projects->hasProjects();
echo "Total number of projects to process: ".$project_count."<br>\n";
for($pindex=0;$pindex<$project_count;$pindex++){
$project = $projects->getProject($pindex);
if ($project->exclude_from_dashboard()) // Don't process projects with
continue; // this XML tag
flush_print("<b>Processing project: ".$project->getName()."</b>");
# Initialize all variables for this project
$text = ""; # New log for this group
$num_valid_newsgroups = -1;
$all_messages = array();
$all_cachedposts = array();
$messages = null;
$cachedposts = null;
$two_month_date = getDateBefore(DAYS_IN_TWO_MONTHS);
# Does this project have newsgroups? If so cache the data for all newsgroups
# for the last two months if available
if ($project->hasNewsgroups() > 0){ # At least one to process
for($nindex=0;$nindex<$project->hasNewsgroups();$nindex++){
$newsgroup = $project->getNewsgroup($nindex);
flush_print("&nbsp;&nbsp;Getting information for newsgroup $newsgroup<br>\n");
# Select group and get info
$ginfo = $news->selectGroup($newsgroup);
if ($ginfo->getNewsgroupName() == -1){ // Non-existant newsgroup
$text .= "Non-existant newsgroup $newsgroup\r\n";
continue;
}
if ($ginfo->getNumberPosts() == 0){ // No posts in newsgroup
$text .= "No posts found for newsgroup $newsgroup\r\n";
continue;
}
$num_valid_newsgroups++; // At least one valid newsgroup
$two_month_post = $news->getFirstArticleFromDate($ginfo->getFirstPost(),
$ginfo->getLastPost(),
$two_month_date);
if ($two_month_post == -1) // No posts from 2 months ago
continue;
# Cache all message IDs & Timestamps for later search from 2 months ago
$messages = $news->cacheMessageID($two_month_post,$ginfo->getLastPost());
$all_messages = $all_messages + $messages; // Message IDs are unique (RFC977)
$cachedposts = $news->cachePosts($two_month_post,$ginfo->getLastPost());
$all_cachedposts = $all_cachedposts + $cachedposts;
}
}
# Get standard deviation and mean from last two months posts time
$extra_time = get_times($all_cachedposts);
list($mean,$std) = get_stats($all_cachedposts);
for($ptr=0;$ptr<$num_days;$ptr++){ # For every expected date
$day = $days[$ptr];
flush_print("Info for the last $day days");
$text .= "\r\n\r\nINFORMATION FOR THE LAST ".$day. " DAYS\r\n\r\n";
if ($num_valid_newsgroups == -1){ // No valid newsgroups
echo "No newsgroups found for project ".$project->getName()."<br>\n";
$text = "No newsgroups found for project ".$project->getName()."\r\n";
updateLog($dbh,$project->getProjectID(),$text);
updateNewsStats($dbh,$project->getProjectID(),$mysql_date,$day,-1,-1,-1,-1,-1);
continue;
}
# Get the date for $day days ago
$date = getDateBefore($day);
# Initialize variables
$top_level = array();
$unanswered = 0;
$avg_time = 0;
$total_time = 0;
$responded = 0;
$insidercount = 0;
$all_cposts = array(); // Cached posts from all newsgroups
$group_numposts = 0; // Total number of posts for all newsgroups
$total = 0;
$total_numposts = 0; // Total number of posts since date
$gen_num = -1;
# Get info for all newsgroups for this project
for($nindex=0;$nindex<$project->hasNewsgroups();$nindex++){
$newsgroup = $project->getNewsgroup($nindex);
$ginfo = $news->selectGroup($newsgroup); // Select the newsgroup to read
if ($ginfo->getNumberPosts() == 0) // No posts in newsgroup
continue;
// Get first article from date. Start looking from the first
// post from two months ago until the most recent.
$num = $news->getFirstArticleFromDate($two_month_post,
$ginfo->getLastPost(),
$date);
$group_numposts += $ginfo->getNumberPosts();
// No posts since date
if ($num == -1)
continue;
$gen_num++; // At least one post in group or since date
// Get total number of posts since date
$numposts = $news->numPostsSinceDate($num,$ginfo->getLastPost());
$text .= "Number of posts for newsgroup $newsgroup: $numposts\r\n";
$total_numposts += $numposts; // Total for all newsgroups
// Cache new posts for processing
$cposts = $news->cachePosts($num,$ginfo->getLastPost());
$all_cposts = $all_cposts + $cposts;
$total += count($cposts);
// Insiders vs. Outsiders (Committers?)
// $project --> $groups->getProjectFromNewsgroup($tgroup);
$insidercount += get_insiders($text,$ins,$cposts,$project);
}
if ($gen_num == -1 || $group_numposts == 0){
echo "No posts found since date or no posts in group for all newsgroups<br>\n";
$text .= "No posts found since date or no posts in group for all newsgroups\r\n";
updateLog($dbh,$project->getProjectID(),$text);
updateNewsStats($dbh,$project->getProjectID(),$mysql_date,$day,0,0,-1,0,0);
continue; // Next DAY
}
// Average number of posts
$average = round(($total_numposts / $day),1);
$text .= "Average number of posts for all newsgroups: $average\r\n";
// Insider count as a percentage
$insidercount /= $total;
// Answered vs. unanswered posts
list($avg_time,$unanswered) = answered_and_unanswered($text, $all_cposts,
$extra_time,
$mean, $std);
flush_print("DAY ".$day.": ".$total_numposts."|".$average."|".$avg_time."|".$unanswered."|".$insidercount);
updateNewsStats($dbh,$project->getProjectID(),$mysql_date,$day,
$total_numposts,$average,$avg_time,$unanswered,
$insidercount);
}
if ($num_valid_newsgroups != -1) // Non existant newsgroup still
updateLog($dbh,$project->getProjectID(),$text); // update the DB log
}
echo "<b>Ending at: </b>".date("H:i:s")."<br>\n";
echo "</body>\n</html>\n";
$dbh = null;
$dbc = null;
$news->disconnect();
$news = null;
// function updateLog($dbh,$project,$text)
//
// Update the DB log for users to retrive how processing was done.
// The log will be downloaded by
// $_SERVER['DOCUMENT_ROOT'] . "projects/dashboard/index.php"
// and
// $_SERVER['DOCUMENT_ROOT'] . "projects/dashboard/dashboard_detail.php"
function updateLog($dbh,$project,$text){
$text = str_replace("\"","'",$text); // mysql can't handle
$query = "SELECT * FROM ".log_table()." WHERE project_id = \"".$project."\"";
$result = mysql_query($query,$dbh);
if ($result && mysql_num_rows($result)){
$query = "UPDATE ".log_table()." SET news_text = \"".$text."\"";
$query .= " WHERE project_id = \"".$project."\"";
}else{
$query = "INSERT INTO ".log_table()." (project_id, news_text) ";
$query .= "VALUES(\"".$project."\",\"".$text."\")";
}
#echo "<pre>".$text."</pre><br>\n";
mysql_query($query,$dbh); // or die("Error: ".mysql_error());
}
// function updateNewsStats($dbh,$project,$mysql_date,$day,$numposts,
// $average,$avg_time,$unanswered,$insidercount)
// Update the newsgroups stats in the DB. Perform a query to determine if
// there is already information in the table as we only allow one stat per
// project per day. Also prevents getting errors if a different stats program
// (mail, bugs, articles, etc) created the row.
function updateNewsStats($dbh,$project,$mysql_date,$day,$numposts,
$average,$avg_time,$unanswered,$insidercount){
$query = "SELECT * FROM ".stats_table()." WHERE project_id = \"".$project;
$query .= "\" AND stats_date = \"".$mysql_date."\"";
$result = mysql_query($query,$dbh) or die(mysql_errno($dbh).": ".mysql_error($dbh));
if ($result && mysql_num_rows($result)){
updateNews($dbh,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount);
}else{
insertNews($dbh,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount);
}
}
// function updateNews($dbh,$project,$mysql_date,$day,$numposts,$average,
// $avg_time,$unanswered,$insidercount)
//
// This will perform an update on an alreay existing row for a project on
// the stats table.
function updateNews($dbh,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount){
$query = "UPDATE ".stats_table()." SET news_".$day."_answer_average_time = ";
$query .= $avg_time.", news_".$day."_unanswered_number = ".$unanswered;
$query .= ", news_".$day."_insider_posts = ".$insidercount.", news_".$day;
$query .= "_number_posts = ".$numposts.", news_".$day."_average_posts = ";
$query .= $average." WHERE project_id = \"".$project."\" AND stats_date = \"";
$query .= $mysql_date."\"";
mysql_query($query,$dbh); // or die("could not update");
}
// function insertNews($dbh,$project,$mysql_date,$day,$numposts,$average,
// $avg_time,$unanswered,$insidercount)
//
// This will perform a single project/row insert on the stats table.
function insertNews($dbh,$project,$mysql_date,$day,$numposts,$average,
$avg_time,$unanswered,$insidercount){
$query = "INSERT INTO ".stats_table()." (project_id,stats_date,news_".$day;
$query .= "_answer_average_time, news_".$day."_unanswered_number, news_";
$query .= $day."_insider_posts, news_".$day."_number_posts, news_".$day;
$query .= "_average_posts) VALUES(\"".$project."\",\"".$mysql_date."\",";
$query .= $avg_time.",".$unanswered.",".$insidercount.",".$numposts.",";
$query .= $average.")";
mysql_query($query,$dbh); // or die("could not insert");
}
// function get_insiders(&$text,$ins,$cached_posts,$project)
//
// Wrapper function to determine if a particular post was made by a
// committer to the project (internal). This function should be extended
// in the future to obtain the information from the Eclipse Foundation DB.
function get_insiders(&$text,$ins,$cached_posts,$project){
$text .= "Project insiders:\r\n\r\n";
$total = count($cached_posts);
for($i=0;$i<$total;$i++){
$post = $cached_posts[$i];
if ($ins->isInsider($post->getAuthor(),$project)){
$text .= $post->getAuthor()."\r\n";
$insidercount++;
}
}
$text .= "\r\n";
return $insidercount;
}
// function get_times($cached_posts)
//
// Calculate and store in Unix Timestamp format the post times for all
// posts passed.
function get_times($cached_posts){
$times = array();
$total = count($cached_posts);
for($i=0;$i<$total;$i++){
$post = $cached_posts[$i];
$time = getUnixTimestamp($post->getDate(),$post->getTime());
$times[$post->getMessageID()] = $time;
}
return $times;
}
// function get_timediffs($cached_posts)
//
// Calculate the time difference for all posts that have a reply. This
// excludes top-level posts (first posts). If a post has multiple replies
// it will only use the time from the first reply.
function get_timediffs($cached_posts){
$answers = array();
$times = array();
$timediff = array();
$top_level = array();
$k = 0;
$unanswered = 0;
# Populate data structures
$total = count($cached_posts);
for($i=0;$i<$total;$i++){
$post = $cached_posts[$i];
if (!isset($answers[$post->getMessageID()])) # First time
$answers[$post->getMessageID()] = "";
if ($post->getReference() != ""){ # Not for top-level
if (isset($answers[$post->getReference()])) # Only for found posts
$answers[$post->getReference()] .= $post->getMessageID().",";
}else
$top_level[$post->getMessageID()] = 1;
$time = getUnixTimestamp($post->getDate(),$post->getTime());
$times[$post->getMessageID()] = $time;
}
# Get time differences
$total = count($answers);
$keys = array_keys($answers);
for($i=0;$i<$total;$i++){
$key = $keys[$i];
if ($answers[$key] == ""){ # No replies
if (isset($top_level[$key]))
$unanswered++;
continue;
}
$original_post_time = $times[$key];
$tmp_array = explode(",",$answers[$key]);
$tmp_total = count($tmp_array)-1;
for($j=0;$j<$tmp_total;$j++)
$timediff[$k++] = $times[$tmp_array[$j]] - $original_post_time;
}
return array($timediff,$unanswered);
}
// function get_answers(&$text,$cached_posts,$extra_times)
//
// Determine which posts have answers, whether it is in the original time
// window or the extended (2 month) window. It will determine which posts
// have answers and which don't as well as their time differences
function get_answers(&$text,$cached_posts,$extra_times){
$answers = array();
$times = array();
$timediff = array();
$top_level = array();
$k = 0;
$unanswered = 0;
$old_unanswered = -1; // Bug # 115497
$text .= "Getting top-level and reply posts\r\n\r\n";
# Populate data structures
$total = count($cached_posts);
for($i=0;$i<$total;$i++){
$post = $cached_posts[$i];
if (!isset($answers[$post->getMessageID()])) # First time
$answers[$post->getMessageID()] = "";
if ($post->getReference() != ""){ # Not for top-level
$text .= "Reply post: ".$post->getMessageID()." to post: ";
$text .= $post->getReference()."\r\n";
$answers[$post->getReference()] .= $post->getMessageID().",";
}else{
$text .= "Top level post: ".$post->getMessageID()."\r\n";
$top_level[$post->getMessageID()] = 1;
}
$time = getUnixTimestamp($post->getDate(),$post->getTime());
$times[$post->getMessageID()] = $time;
}
$text .= "\r\nGetting time differences\r\n\r\n";
# Get time differences
$total = count($answers);
$keys = array_keys($answers);
for($i=0;$i<$total;$i++){
$key = $keys[$i];
if ($answers[$key] == ""){ # No replies
if (isset($top_level[$key])){
$text .= "Unanswered top level post: ".$key."\r\n";
$unanswered++;
if (is_before_today($times[$key])){
$old_unanswered = 0;
}
}
continue;
}
if (isset($times[$key])){
$ans = substr($answers[$key],0,-1);
$text .= "Answer ".$ans." found on main search window\r\n";
$original_post_time = $times[$key];
}else
if (isset($extra_times[$key])){
$ans = substr($answers[$key],0,-1);
$text .= "Answer ".$ans." found on extended search ";
$text .= "window\r\n";
$original_post_time = $extra_times[$key];
}else{
$ans = substr($answers[$key],0,-1);
$text .= "Answer ".$ans." for post $key not found on ";
$text .= "allowed window\r\n";
continue; # Not found in the extra time window
}
$tmp_array = explode(",",$answers[$key]);
$tmp_total = count($tmp_array)-1;
for($j=0;$j<$tmp_total;$j++)
$timediff[$k++] = $times[$tmp_array[$j]] - $original_post_time;
}
$text .= "\r\n";
return array($timediff,$unanswered,$old_unanswered);
}
// function is_before_today($date)
//
// Determine if the date passed is older than today.
function is_before_today($date){
$today = mktime(0, 0, 0, date("m"), date("d")+1, date("Y"));
if (($date + ONE_DAY) < $today)
return 1;
return 0;
}
// function get_stats($cached_posts)
//
// Obtain the mean and standard deviation from the posts cache time difference
function get_stats($cached_posts){
$timediffs = array();
$tmp_sum = 0;
$mean = 0;
$std = 0;
list($timediffs,$dummy) = get_timediffs($cached_posts);
# Compute mean and std. deviation
$total = count($timediffs);
for($i=0;$i<$total;$i++)
$mean += $timediffs[$i];
if ($total != 0){
$mean /= $total;
$mean = round($mean);
for($i=0;$i<$total;$i++)
$tmp_sum += pow(($timediffs[$i] - $mean),2);
$total = ($total != 1)?--$total:$total;
$std = round(sqrt($tmp_sum/$total));
}
#echo "<br>\nMEAN: ".realDate($mean)." STD: ".realDate($std)."[".$std."]"."<br>\n";
return array($mean,$std);
}
// function answered_and_unanswered(&$text,$cached_posts,$times,$mean,$std_dev)
//
// Determine which posts should be counted as answered or unanswered based
// on using the mean +/- (TIMES_STD * standard deviation).
function answered_and_unanswered(&$text,$cached_posts,$times,$mean,$std_dev){
$timediffs = array();
$average_ttr = 0;
$skip_count = 0;
$count = 0;
$sum = 0;
# Get ranges
$low = $mean - (TIMES_STD * $std_dev);
$low = ($low < 0)?0:$low;
$high = $mean + (TIMES_STD * $std_dev);
$text .= "Comparison ranges: mean +/- (".TIMES_STD." * standard deviation)";
$text .= "\r\n";
$text .= "low: ".$low."\r\n";
$text .= "high: ".$high."\r\n\r\n";
list($timediffs,$unanswered,$old) = get_answers($text,$cached_posts,$times);
$total = count($timediffs);
for($i=0;$i<$total;$i++){
if ($timediffs[$i] >= $low && $timediffs[$i] <= $high){
$sum += $timediffs[$i];
$count++;
}else{
$skipcount++;
}
}
$text .= $skipcount." articles not within comparison range\r\n\r\n";
if ($count != 0)
$average_ttr = round($sum / $count);
else // Bug 115497
$average_ttr = $old;
return array($average_ttr,$unanswered);
}
// function flush_pring($text)
//
// Will force the browser to do a flush print and not wait for more input.
function flush_print($text){
echo $text."<br>\n";
ob_flush();
flush();
}
?>