| <?php |
| /******************************************************************************* |
| * Copyright (c) 2019 Eclipse Foundation and others. |
| * This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License 2.0 |
| * which accompanies this distribution, and is available at |
| * https://www.eclipse.org/legal/epl-2.0/ |
| * |
| * SPDX-License-Identifier: EPL-2.0 |
| *******************************************************************************/ |
| /* |
| * This file is a prototype for executing a license scan of a bill of |
| * materials. The bill of materials is provided as plain text with each |
| * line containing the one unit of content (i.e., library). Blank lines |
| * and comment lines are skipped (comment lines start with "#" or "//"). |
| * |
| * The id for a unit of content may be expressed as Maven coordinates of the form |
| * "groupid:artifactid[:packaging]:version", as abbridged Purl coordinates of the form |
| * "type/namespace/name@version", or as ClearlyDefined coordinates of the |
| * form "type/source/namespace/name/version". Mixing formats is supported. |
| * |
| * Output is expressed as ClearlyDefined ids. |
| * |
| * Two sources of information are used to map ids to license information and other |
| * metadata: first, the Eclipse Foundation data is consulted and then |
| * the ClearlyDefined services are called. Future versions of this script |
| * may consult other sources of data. |
| * |
| * usage example: |
| * |
| * curl -X POST http://localhost/projects/services/license_check.php \ |
| * --data-urlencode content@maven.deps |
| * |
| * curl -X POST http://localhost/projects/services/license_check.php \ |
| * -d $'content=npm/npmjs/@theia/variable-resolver/0.3.19\nnpm/npmjs/@theia/outline-view/0.3.19' |
| * |
| * curl -X POST "http://localhost/projects/services/license_check.php" \ |
| * -d "content=`mvn dependency:list -DskipTests -Dmaven.javadoc.skip=true | grep -Poh '\S+(?=:compile)' | sort | uniq`" | jsonpp | less |
| * |
| * yarn list | grep -Poh "(?:([^\/\s]+)\/)?([^\/\s]+)@\D*(\d+(?:\.\d+)*)" \ |
| * | curl -X POST "http://localhost/projects/services/license_check.php?XDEBUG_SESSION_START=ECLIPSE_DBGP" \ |
| * --data-urlencode content@- | jsonpp | less |
| * |
| * Note that this works with an instance running on localhost. |
| * |
| * TODO Validate that we can scale to ~4K lines |
| * TODO support file upload. |
| * TODO Customize license list based on project (e.g., science.* can use LGPL) |
| * TODO Parameters for default type and provider. |
| */ |
| |
| /** |
| * Try to massage the content identifier into ClearlyDefined coordinates. That is, |
| * for example, recognize Maven coordinates of the form <em>groupid:artifactid:version</em>, and |
| * convert them into the ClearlyDefined equivalent, <em>maven:mavencentral:groupid:artifactid:version</em>. |
| * |
| * Answers <code>null<code> when the id is not recognized. |
| * |
| * @param string $id |
| * @return NULL|string |
| */ |
| function normalizeId($id) { |
| $matches = null; |
| |
| // Just pass through anything that's already in a ClearlyDefined coordinate form. |
| if (preg_match('/([\w@\-.]+)(?:\/[\w@\-.]+){4}/', $id, $matches)) return $matches[0]; |
| |
| /* |
| * Deal with Maven coordinates. There is a special case that we need to deal with |
| * because of Tycho using p2 repositories. When the coordinates start with |
| * "p2.eclipse-plugin" or "p2.eclipse-feature", we generate an id for p2/orbit. |
| */ |
| if (preg_match('/([\w@\-.]+):([\w@\-.]+)(?::[\w@\-.]+)?:(\d+(?:\.\d+)*)/', $id, $matches)) { |
| if (preg_match('/^p2.eclipse-(?:plugin|feature)$/', $matches[1])) { |
| return "p2/orbit/{$matches[1]}/{$matches[2]}/{$matches[3]}"; |
| } |
| return "maven/mavencentral/{$matches[1]}/{$matches[2]}/{$matches[3]}"; |
| } |
| /* |
| * Some p2 cases (org.apache.ant only, I think) provide all of the information that |
| * we need to match against Maven directly. |
| * |
| * e.g. "p2.eclipse-plugin:org.apache.ant:jar:lib/ant-jsch.jar:1.10.5.v20190526-1402" |
| * maps to "maven/mavencentral/org.apache.ant/ant-jsch/1.10.5" |
| */ |
| if (preg_match('/p2.eclipse-plugin:([\w@\-.]+)(?::[\w@\-.]+)?:lib\/([\w@\-.]+).jar:(\d+(?:\.\d+)*)/', $id, $matches)) { |
| return "maven/mavencentral/{$matches[1]}/{$matches[2]}/{$matches[3]}"; |
| } |
| |
| /* |
| * Deal with pURL coordinates. These take the form "namespace/name@version", |
| * e.g., "@webassemblyjs/wast-printer@1.7.8". The namespace is optional; when |
| * absent, we use a dash ("-"). |
| * |
| * I was surprised to see range qualifiers with some of the versions provided |
| * from yarn (<code>yarn list</code>). For now, we just ignore them. |
| * |
| * FIXME Sort out what to do with the range qualifiers |
| * |
| * FIXME Don't assume that values provided in pURL format are NPM. |
| */ |
| if (preg_match('/(?:([^\/\s]+)\/)?([^\/\s]+)@\D*(\d+(?:\.\d+)*)/', $id, $matches)) { |
| $namespace = empty($matches[1]) ? '-' : $matches[1]; |
| return "npm/npmjs/{$namespace}/{$matches[2]}/{$matches[3]}"; |
| } |
| |
| return null; |
| } |
| |
| function loadFromString($content) { |
| $stream = fopen('php://memory','w+'); |
| fwrite($stream, $content); |
| rewind($stream); |
| |
| $results = array(); |
| |
| while ($line = trim(fgets($stream))) { |
| if (empty($line)) break; |
| if (preg_match('/$#', $line)) break; |
| if (preg_match('/$\/\//', $line)) break; |
| |
| if ($id = normalizeId(trim($line))) { |
| $results['unmatched'][$id] = array(); |
| } else { |
| $results['invalid'][] = $line; |
| } |
| } |
| |
| fclose($stream); |
| return $results; |
| } |
| |
| function loadFromPackageLockString($content) { |
| $json = json_decode($content, true); |
| $results = array(); |
| visitPackages($json, function($id) use (&$results) { |
| $results['unmatched'][$id] = array(); |
| }); |
| return $results; |
| } |
| |
| function visitPackages($root, Callable $callback) { |
| if (!isset($root['dependencies'])) return; |
| |
| foreach($root['dependencies'] as $name => $data) { |
| $matches = null; |
| if (preg_match('/^(.+)\/(.+)$/', $name, $matches)) { |
| $namespace = $matches[1]; |
| $name = $matches[2]; |
| } else { |
| $namespace = '-'; |
| $name = $name; |
| } |
| $id = "npm/npmjs/{$namespace}/{$name}/{$data['version']}"; |
| call_user_func($callback, $id); |
| visitPackages($data, $callback); |
| } |
| } |
| |
| function matchAgainstEclipseProjects(&$results) { |
| foreach(array_keys($results['unmatched']) as $id) { |
| if ($parts = preg_split('/\//', $id)) { |
| if (matchesEclipseNamespace($parts[2])) { |
| unset($results['unmatched'][$id]); |
| $results['approved'][$id] = array( |
| 'id' => $id, |
| 'license' => '', |
| 'status' => 'approved', |
| 'sourceUrl' => '', |
| 'definitionUrl' => '', |
| 'authority' => 'eclipse', |
| 'confidence' => 90 |
| ); |
| } elseif (preg_match('/^p2.eclipse-(?:plugin|feature)$/', $parts[2])) { |
| if (preg_match('/^org\.(?:eclipse|polarsys|locationtech)/', $parts[3])) { |
| unset($results['unmatched'][$id]); |
| $results['approved'][$id] = array( |
| 'id' => $id, |
| 'license' => '', |
| 'status' => 'approved', |
| 'sourceUrl' => '', |
| 'definitionUrl' => '', |
| 'authority' => 'eclipse', |
| 'confidence' => 90 |
| ); |
| } |
| } |
| } |
| } |
| } |
| |
| function matchesEclipseNamespace($namespace) { |
| // TODO Make this data-driven |
| if (preg_match('/^eclipse\b/', $namespace)) return true; |
| if (preg_match('/^org\.eclipse\b/', $namespace)) return true; |
| if (preg_match('/^org\.polarsys\b/', $namespace)) return true; |
| if (preg_match('/^org\.locationtech\b/', $namespace)) return true; |
| if (preg_match('/^jakarta\b/', $namespace)) return true; |
| if (preg_match('/^org\.aspectj/', $namespace)) return true; |
| if (preg_match('/^@theia$/', $namespace)) return true; |
| |
| return false; |
| } |
| |
| /* |
| * Match against the consolidated data from Eclipse Foundation |
| * sources. The consolidated data is stored in the dashboard |
| * database by a script that runs periodically. |
| * |
| * This function modifies the parameter. As we find matches, the id |
| * is removed from the "unmatched" set and the metadata that we do |
| * find it added to an array by status. |
| * |
| * @see project-services/capture/php/import_third_party_license_data.php |
| */ |
| function matchAgainstFoundationData(&$results) { |
| foreach(array_keys($results['unmatched']) as $id) { |
| $where = array('id=":id"'); |
| $order = array('if(status="approved",0,1)'); |
| |
| /* |
| * According to the IP Policy, service releases should match either a minor |
| * release or another service release at the same minor level. |
| * |
| * Map an id, expressed as Clearly Defined coordinates into a regular expression |
| * capable of matching against service releases. |
| * |
| * e.g., map "maven/mavencentral/com.github.jnr/jnr-posix/3.0.29" to |
| * "maven\/mavencentral\/com\.github\.jnr\/jnr\-posix\/3\.0\.[0-9]+" |
| * |
| * @param string $id an id in Clearly Defined format |
| * @param callable Function to call with the values. |
| */ |
| $matches = null; |
| if (preg_match('/^((?:[^\/]+\/){4}(?:\d+\.\d+))(?:\.(\d+))?/', $id, $matches)) { |
| $like = $matches[1] . '.%'; |
| $regexp = preg_quote($matches[1]) . '\.[0-9]+'; |
| $version = isset($matches[2]) ? $matches[2] : '0'; |
| // regexp is expensive, so test first with like and then regexp. |
| // This improves performance by about five-fold. |
| $where[] = "(id like '{$like}' and id regexp '{$regexp}')"; |
| // Order results so that the one with the service release number that's closest |
| // to the one we want is at the top of the list. |
| $order[] = "abs(substring_index(substring_index(id,'/',-1),'.',-1) - {$version})"; |
| } |
| |
| $whereClause = implode(' OR ', $where); |
| $orderList = implode(', ', $order); |
| |
| // There may be multiple hits for a particular id. The query only |
| // returns one row, preferring rows that represent 'approved' |
| // content. |
| $sql = " |
| select |
| id, license, status, sourceUrl, definitionUrl, authority, confidence |
| from ThirdPartyLicense |
| where $whereClause |
| order by $orderList |
| limit 1"; |
| $args = array(':id' => $id); |
| |
| query('dashboard', $sql, $args, function($row) use (&$results, $id) { |
| $row['id'] = $id; |
| unset($results['unmatched'][$id]); |
| $results[$row['status']][$id] = $row; |
| }); |
| } |
| } |
| |
| // Everything above this line could (and probably should) be factored out. |
| |
| header ("Content-type: text/csv"); |
| header ("Content-Disposition: \"inline; filename=licenses.csv\""); |
| |
| require_once (dirname ( __FILE__ ) . "/../../eclipse.org-common/system/app.class.php"); |
| $App = new App (); |
| |
| require_once dirname(__FILE__) . "/../classes/database.inc"; |
| |
| if ($content = @$_POST['content']) { |
| $results = loadFromString($content); |
| } elseif ($content = @$_POST['package-lock']) { |
| $results = loadFromPackageLockString($content); |
| } elseif ($content = @$_POST['json']) { |
| $results = array('unmatched' => array_fill_keys(json_decode($content, true),array())); |
| } |
| |
| if ($results) { |
| matchAgainstEclipseProjects($results); |
| matchAgainstFoundationData($results); |
| } else { |
| $results = array(); |
| } |
| |
| echo json_encode($results); |
| |
| ?> |