| <?php |
| /******************************************************************************* |
| * Copyright (c) 2019 Eclipse Foundation and others. |
| * This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License 2.0 |
| * which accompanies this distribution, and is available at |
| * https://www.eclipse.org/legal/epl-2.0/ |
| * |
| * SPDX-License-Identifier: EPL-2.0 |
| *******************************************************************************/ |
| /* |
| * This file is a prototype for executing a license scan of a list of |
| * content ids. The list can be provided in a few different forms: |
| * |
| * - When a "request" field is provided in the POST data, its value is |
| * parsed as a JSON structure that contains a "dependencies" field that is |
| * a JSON array of strings, with each string representing one unit of content, |
| * along with a "project" field that is a string containing a project id; |
| * |
| * - When a "content" field is provided in the POST data, its value is |
| * parsed as plain text with each line containing the one unit of content |
| * (blank lines and comment lines are skipped; comment lines start with |
| * "#" or "//"); |
| * |
| * - When a "json" field is provided in the POST data, its value is parsed |
| * as a JSON array of strings, with each string representing one unit of |
| * content; or |
| * |
| * - When an "id" field is provided in the GET data, it is parsed as a |
| * single unit of content (i.e., a list of one item) |
| * |
| * The id for a unit of content may be expressed as Maven coordinates of the form |
| * "groupid:artifactid[:packaging]:version", as NPMJS coordinates of the form |
| * "type/namespace/name@version", or as ClearlyDefined coordinates of the |
| * form "type/source/namespace/name/version". Mixing formats is supported. |
| * |
| * Output is expressed as ClearlyDefined ids. |
| * |
| * Two sources of information are used to map ids to license information and other |
| * metadata: first, the Eclipse Foundation data is consulted and then |
| * the ClearlyDefined services are called. Future versions of this script |
| * may consult other sources of data. |
| * |
| * usage examples: |
| * |
| * curl -X POST http://localhost/projects/services/license_check.php \ |
| * --data-urlencode content@maven.deps |
| * |
| * curl -X POST http://localhost/projects/services/license_check.php \ |
| * -d $'content=npm/npmjs/@theia/variable-resolver/0.3.19\nnpm/npmjs/@theia/outline-view/0.3.19' |
| * |
| * curl -X POST http://www.eclipse.org/projects/services/license_check.php \ |
| * -d $'json=["npm/npmjs/@theia/variable-resolver/0.3.19","npm/npmjs/@theia/outline-view/0.3.19"]' |
| * |
| * curl -X POST http://localhost/projects/services/license_check.php \ |
| * -d $'request={"project":"ecd.theia", "dependencies": ["npm/npmjs/@theia/variable-resolver/0.3.19","npm/npmjs/@theia/outline-view/0.3.19"]}' |
| * |
| * curl -X POST "http://localhost/projects/services/license_check.php" \ |
| * -d "content=`mvn dependency:list -DskipTests -Dmaven.javadoc.skip=true | grep -Poh '\S+(?=:compile)' | sort | uniq`" | jsonpp | less |
| * |
| * yarn list | grep -Poh "(?:([^\/\s]+)\/)?([^\/\s]+)@\D*(\d+(?:\.\d+)*)" \ |
| * | curl -X POST "http://localhost/projects/services/license_check.php?XDEBUG_SESSION_START=ECLIPSE_DBGP" \ |
| * --data-urlencode content@- | jsonpp | less |
| * |
| * Note that this works with an instance running on localhost. |
| * |
| * TODO Validate that we can scale to ~4K lines |
| * TODO support file upload. |
| * TODO Customize license list based on project (e.g., science.* can use LGPL) |
| * TODO Parameters for default type and provider. |
| */ |
| |
| require_once dirname(__FILE__) . '/../classes/common.php'; |
| require_once dirname(__FILE__) . '/../classes/Project.class.php'; |
| require_once dirname(__FILE__) . '/../classes/License.class.inc'; |
| require_once dirname(__FILE__) . '/../classes/ProjectContentIdMapper.class.inc'; |
| |
| /** |
| * Try to massage the content identifier into ClearlyDefined coordinates. That is, |
| * for example, recognize Maven coordinates of the form <em>groupid:artifactid:version</em>, and |
| * convert them into the ClearlyDefined equivalent, <em>maven:mavencentral:groupid:artifactid:version</em>. |
| * |
| * Answers <code>null<code> when the id is not recognized. |
| * |
| * @param string $id |
| * @return NULL|string |
| */ |
| function normalizeId($id) { |
| $matches = null; |
| |
| // Just pass through anything that's already in a ClearlyDefined coordinate form. |
| if (preg_match('/([\w@\-.]+)(?:\/[\w@\-.]+){4}/', $id, $matches)) return $matches[0]; |
| |
| /* |
| * Deal with Maven coordinates. There is a special case that we need to deal with |
| * because of Tycho using p2 repositories. When the coordinates start with |
| * "p2.eclipse-plugin" or "p2.eclipse-feature", we generate an id for p2/orbit. |
| */ |
| if (preg_match('/([\w@\-.]+):([\w@\-.]+)(?::[\w@\-.]+)?:(\d+(?:\.\d+)*)/', $id, $matches)) { |
| if (preg_match('/^p2.eclipse-(?:plugin|feature)$/', $matches[1])) { |
| return "p2/orbit/{$matches[1]}/{$matches[2]}/{$matches[3]}"; |
| } |
| return "maven/mavencentral/{$matches[1]}/{$matches[2]}/{$matches[3]}"; |
| } |
| /* |
| * Some p2 cases (org.apache.ant only, I think) provide all of the information that |
| * we need to match against Maven directly. |
| * |
| * e.g. "p2.eclipse-plugin:org.apache.ant:jar:lib/ant-jsch.jar:1.10.5.v20190526-1402" |
| * maps to "maven/mavencentral/org.apache.ant/ant-jsch/1.10.5" |
| */ |
| if (preg_match('/p2.eclipse-plugin:([\w@\-.]+)(?::[\w@\-.]+)?:lib\/([\w@\-.]+).jar:(\d+(?:\.\d+)*)/', $id, $matches)) { |
| return "maven/mavencentral/{$matches[1]}/{$matches[2]}/{$matches[3]}"; |
| } |
| |
| /* |
| * Deal with NPMJS coordinates. These take the form "namespace/name@version", |
| * e.g., "@webassemblyjs/wast-printer@1.7.8". The namespace is optional; when |
| * absent, we use a dash ("-"). |
| * |
| * I was surprised to see range qualifiers with some of the versions provided |
| * from yarn (<code>yarn list</code>). For now, we just ignore them. |
| * |
| * FIXME Sort out what to do with the range qualifiers |
| * |
| * FIXME Don't assume that values provided in NPM format are NPM. |
| */ |
| if (preg_match('/(?:([^\/\s]+)\/)?([^\/\s]+)@\D*(\d+(?:\.\d+)*)/', $id, $matches)) { |
| $namespace = empty($matches[1]) ? '-' : $matches[1]; |
| return "npm/npmjs/{$namespace}/{$matches[2]}/{$matches[3]}"; |
| } |
| |
| return null; |
| } |
| |
| function loadFromString($content) { |
| $stream = fopen('php://memory','w+'); |
| fwrite($stream, $content); |
| rewind($stream); |
| |
| $results = array(); |
| |
| while ($line = trim(fgets($stream))) { |
| if (empty($line)) break; |
| if (preg_match('/$#', $line)) break; |
| if (preg_match('/$\/\//', $line)) break; |
| |
| if ($id = normalizeId(trim($line))) { |
| $results['unmatched'][$id] = array(); |
| } else { |
| $results['invalid'][] = $line; |
| } |
| } |
| |
| fclose($stream); |
| return $results; |
| } |
| |
| function loadFromPackageLockString($content) { |
| $json = json_decode($content, true); |
| $results = array(); |
| visitPackages($json, function($id) use (&$results) { |
| $results['unmatched'][$id] = array(); |
| }); |
| return $results; |
| } |
| |
| function visitPackages($root, $callback) { |
| if (!isset($root['dependencies'])) return; |
| |
| foreach($root['dependencies'] as $name => $data) { |
| $matches = null; |
| if (preg_match('/^(.+)\/(.+)$/', $name, $matches)) { |
| $namespace = $matches[1]; |
| $name = $matches[2]; |
| } else { |
| $namespace = '-'; |
| $name = $name; |
| } |
| $id = "npm/npmjs/{$namespace}/{$name}/{$data['version']}"; |
| call_user_func($callback, $id); |
| visitPackages($data, $callback); |
| } |
| } |
| |
| function matchAgainstEclipseProjects(&$results) { |
| foreach(array_keys($results['unmatched']) as $id) { |
| if ($projectId = ProjectContentIdMapper::getEclipseProjectFor($id)) { |
| // Attempt to determine the Eclipse Project id from the |
| // ClearlyDefined coordinates. If we can identify the project |
| // then we can provide more information back. |
| $licenses = License::getLicensesForProject($projectId); |
| $spdx = License::getSPDXExpression($licenses); |
| unset($results['unmatched'][$id]); |
| $results['approved'][$id] = array( |
| 'id' => $id, |
| 'license' => $spdx, |
| 'status' => 'approved', |
| 'sourceUrl' => '', |
| 'definitionUrl' => '', |
| 'authority' => $projectId, |
| 'confidence' => 100 |
| ); |
| } else { |
| // $parts = explode('/', $id); |
| // if (count($parts) == 5) {; |
| // list($type, $source, $namespace, $name, $revision) = $parts; |
| // if (preg_match('/^org\.eclipse\./', $name)) { |
| // unset($results['unmatched'][$id]); |
| // $results['approved'][$id] = array( |
| // 'id' => $id, |
| // 'license' => '', |
| // 'status' => 'approved', |
| // 'sourceUrl' => '', |
| // 'definitionUrl' => '', |
| // 'authority' => 'eclipse', |
| // 'confidence' => 90 |
| // ); |
| // } |
| // } |
| } |
| } |
| } |
| |
| /** |
| * Private function. |
| * |
| * Match against the consolidated data from Eclipse Foundation |
| * sources. The consolidated data is stored in the dashboard |
| * database by a script that runs periodically. |
| * |
| * This function modifies the parameter. As we find matches, the id |
| * is removed from the "unmatched" set and the metadata that we do |
| * find it added to an array by status. |
| * |
| * <p>The <code>ThirdPartyLicenseException</code> table is created and maintained by the |
| * <code>import_third_party_license_data.php</code> "project service" script. |
| */ |
| function matchAgainstFoundationData(&$results, $project=null) { |
| foreach(array_keys($results['unmatched']) as $id) { |
| $real = preg_replace('/p2\.eclipse\.(plugin|feature)/', 'p2.eclipse-$1', $id); |
| |
| $match = findBestMatch($real); |
| // If we don't find something, or the something that we do find |
| // is restricted, then look for an exception. |
| if ($match == null || $match['status'] == 'restricted') { |
| if ($exception = findBoardException($real, $project)) { |
| $match = $exception; |
| } |
| } |
| if ($match) { |
| if (strcmp($id,$real) != 0) { |
| $match['actual'] = $real; |
| $results['log'][] = "{$id} => {$real}"; |
| } |
| $match['id'] = $id; |
| unset($results['unmatched'][$id]); |
| $results[$match['status']][$id] = $match; |
| } |
| } |
| } |
| |
| /** |
| * Private function. |
| * |
| * Attempt to find a match for a particular artifact in the Eclipse Foundation |
| * intellectual property database. |
| * |
| * @param string $id |
| * @return mixed|NULL the matching row or NULL if there is no match. |
| */ |
| function findBestMatch($id) { |
| |
| $where = array('id=":id"'); |
| $order = array('if(status="approved",0,1)'); |
| |
| /* |
| * According to the IP Policy, service releases should match either a minor |
| * release or another service release at the same minor level. |
| * |
| * Map an id, expressed as Clearly Defined coordinates into a regular expression |
| * capable of matching against service releases. |
| * |
| * e.g., map "maven/mavencentral/com.github.jnr/jnr-posix/3.0.29" to |
| * "maven\/mavencentral\/com\.github\.jnr\/jnr\-posix\/3\.0\.[0-9]+" |
| * |
| * @param string $id an id in Clearly Defined format |
| * @param callable Function to call with the values. |
| */ |
| $matches = null; |
| if (preg_match('/^((?:[^\/]+\/){4}v?(?:\d+\.\d+))(?:\.(\d+))?/', $id, $matches)) { |
| $like = $matches[1] . '.%'; |
| $regexp = preg_quote($matches[1]) . '\.[0-9]+'; |
| $version = isset($matches[2]) ? $matches[2] : '0'; |
| // regexp is expensive, so test first with like and then regexp. |
| // This improves performance by about five-fold. |
| $where[] = "(id like '{$like}' and id regexp '{$regexp}')"; |
| // Order results so that the one with the service release number that's closest |
| // to the one we want is at the top of the list. |
| $order[] = "abs(substring_index(substring_index(id,'/',-1),'.',-1) - {$version})"; |
| } |
| |
| $whereClause = implode(' OR ', $where); |
| $orderList = implode(', ', $order); |
| |
| // There may be multiple hits for a particular id. The query only |
| // returns one row, preferring rows that represent 'approved' |
| // content. |
| $sql = " |
| select |
| id, license, status, sourceUrl, definitionUrl, authority, confidence |
| from ThirdPartyLicense |
| where $whereClause |
| order by $orderList |
| limit 1"; |
| $args = array(':id' => $id); |
| |
| $rows = array(); |
| query('dashboard', $sql, $args, function($row) use (&$rows, $id) { |
| $row['id'] = $id; |
| $rows[] = $row; |
| }); |
| return reset($rows); |
| } |
| |
| /** |
| * Private function. |
| * |
| * Attempt to find a exception granted by the Eclipse Foundation Board of |
| * Directors that permits the project to use a particular bit of content. |
| * |
| * <p>There are several ways that |
| * an exception can be granted. The different ways are represented as the "rule" |
| * in the <code>ThirdPartyLicenseException</code> table. The rules are handled |
| * as follows: |
| * |
| * <ul> |
| * <li><em>project</em> - Allow a single specific project to use a specific |
| * version of a specific item;</li> |
| * <li><em>project_all</em> - Allow a single specific project to use all versions |
| * of a specific item item;</li> |
| * <li><em>project_all_future</em> - Allow a single specific project to use a specific |
| * version, and all future versions, of a specific item;</li> |
| * <li><em>all</em> - Allow all projects to use all versions of a specific item;</li> |
| * </ul> |
| * |
| * <p>Additional rules may be added in the future. |
| * |
| * <p>The <code>ThirdPartyLicenseException</code> table is created and maintained by the |
| * <code>import_third_party_license_data.php</code> "project service" script. |
| * |
| * @param string $id a ClearlyDefined Id. |
| * @param string $project a project id in the standard format (e.g., "technology.dash"). |
| * @return mixed|NULL the matching row or NULL if there is no match. |
| */ |
| function findBoardException($id, $project) { |
| if ($project) { |
| if (preg_match('/^((?:[^\/]+\/){4})/', $id, $matches)) { |
| $pattern = $matches[1] . '%'; |
| |
| // TODO Confirm that this orders such that the best candidate is at the top. |
| // For now, we put an exact match at the top when one exists do a secondary sort |
| // that should put the most recent version at the top. |
| $sql = " |
| select |
| l.id, l.license, |
| 'approved' as status, |
| l.sourceUrl, l.definitionUrl, |
| l.authority, l.confidence, |
| e.project, e.rule |
| from ThirdPartyLicense as l, ThirdPartyLicenseException as e |
| where |
| (e.rule = 'project' and e.project=':project' and e.id=':id' and e.id=l.id) |
| or (e.rule in ('project_all','project_all_future','workswith') and e.project=':project' and e.id like ':pattern' and e.id=l.id) |
| or (e.rule='all' and e.id like ':pattern' and e.id=l.id) |
| order by if(e.id=':id',0,1) asc, id desc |
| limit 1"; |
| $args = array(':id' => $id, ':project' => $project, ':pattern' => $pattern); |
| |
| $rows = array(); |
| query('dashboard', $sql, $args, function($row) use (&$rows, $id) { |
| $row['id'] = $id; |
| $rows[] = $row; |
| }); |
| return reset($rows); |
| } |
| } |
| return null; |
| } |
| |
| // Everything above this line could (and probably should) be factored out. |
| |
| header ("Content-type: text/csv"); |
| header ("Content-Disposition: \"inline; filename=licenses.csv\""); |
| |
| require_once (dirname ( __FILE__ ) . "/../../eclipse.org-common/system/app.class.php"); |
| $App = new App (); |
| |
| require_once dirname(__FILE__) . "/../classes/database.inc"; |
| |
| $projectId = isset($_POST['project']) ? $_POST['project'] : (isset($_GET['project']) ? $_GET['project'] : null); |
| |
| $results = null; |
| |
| foreach($argv as $arg) { |
| $matches = null; |
| if (preg_match('/^id=(?<id>.*)$/', $arg, $matches)) { |
| $results = array('unmatched' => array($matches['id'] => array())); |
| } |
| } |
| |
| if ($json = @$_POST['request']) { |
| $results = array(); |
| $request = json_decode($json, true); |
| if (isset($request['project']) && $request['project']) { |
| $projectId = $request['project']; |
| } |
| $results['unmatched'] = array_fill_keys($request['dependencies'],array()); |
| } elseif ($content = @$_GET['id']) { |
| $results = array('unmatched' => array($content => array())); |
| } elseif ($content = @$_POST['content']) { |
| $results = loadFromString($content); |
| } elseif ($content = @$_POST['package-lock']) { |
| $results = loadFromPackageLockString($content); |
| } elseif ($content = @$_POST['json']) { |
| $results = array('unmatched' => array_fill_keys(json_decode($content, true),array())); |
| } |
| |
| if ($projectId) { |
| if (isValidProjectId($projectId)) { |
| if ($project = Project::getProject($projectId)) { |
| $results['project'] = array( |
| "id" => $project->getId(), |
| "name" => $project->getFormalName(), |
| "license" => License::getSPDXExpression(License::getLicensesForProject($projectId)) |
| ); |
| } else { |
| $results['errors'][] = array("errorInvalidProjectId" => "The project id does not match an existing project"); |
| } |
| } else { |
| $results['errors'][] = array("errorMalformedProjectId" => "The project id is not well-formed"); |
| } |
| } |
| |
| if ($results) { |
| matchAgainstEclipseProjects($results); |
| matchAgainstFoundationData($results, $projectId); |
| } else { |
| $results = array(); |
| } |
| |
| echo json_encode($results); |
| |
| ?> |