Drupal 8. Find new path aliases by old ones
<?php
return [
'http://example.com/en-gb/who-we-are'
];
<?php
/**
* @file
* findNewUrls.drush
*
* This script tries to find a new url based on old one.
* Old urls should be placed in the same dir as a script file,
* and named as url-list.php, use php array format to add more old urls.
*
* Here is a finding logic.
*
* 1. Get a last part of the url and cleanup it.
* 2. Search nid in the database by title from step above using LIKE operator.
* 3. Try to get a new url by nid using path alias manager.
*
* This php script should be run using drush on bootstraped drupal.
* @code
* drush php-script findNewUrls.drush
* @endcode
*
* @return
* Creates two list of urls.
*
* First 'url-list-found.map', can be used with nginx map module.
* @see https://serverfault.com/questions/599372/mapping-old-to-new-urls-with-external-file-configuration-invalid
* format: `old_url new_url;`
*
* Second 'url-list-notfound.map' can be used directly by nginx too,
* but new redirects now goes to the frontpage,
* format: `old_url /;`
*
* @tips
*
* Nginx
* Don't forget to test your config file using `sudo nginx -t`
*
* Apache
* You can also use file with a list of redirects as in nginx,
* see https://serverfault.com/questions/414225/best-way-to-handle-thousands-of-permanent-redirects
*
* Drupal
* Or if neither nginx or apache isn't an option for processing redirects,
* but you're using Redirect drupal module,
* then you can programmatically import this redirects using
* https://drupal.stackexchange.com/questions/227978/how-to-create-a-redirection-programmatically
*
* Bulk testing
* You can adapt command below to test redirects too
* `xargs -n1 -P 10 curl -o /dev/null --silent --head --write-out '%{url_effective};%{http_code};%{time_total};%{time_namelookup};%{time_connect};%{size_download};%{speed_download}\n' < urls_with_500_error.lst | tee results500.csv`
* like so
* `xargs -n1 -P 10 curl -o /dev/null --silent --head --write-out '%{http_code};%{url_effective};%{redirect_url};%{time_total};%{time_namelookup};%{time_connect};%{size_download};%{speed_download}\n' < url-list-found-testing.map | tee url-list-found-testing-done.csv`
*/
use Drupal\Core\Entity\EntityInterface;
use Drupal\node\Entity\Node;
$urls = include DRUPAL_ROOT . '/url-list.php';
$drupal_langs = array_keys(\Drupal::languageManager()->getLanguages());
foreach ($urls as $url) {
$path = str_replace('http://example.com', '', $url);
$url_parts = explode('/', $path);
// Check extracted language with available in the system.
if (in_array($url_parts[1], $drupal_langs)) {
$lang = $url_parts[1];
}
$title = str_replace('-', ' ', basename($url));
// If we're deals with node nid then try to get directly path alias.
if (filter_var($title, FILTER_VALIDATE_INT)) {
$node = Node::load($title);
$alias = FALSE;
if ($node instanceof EntityInterface) {
$alias = \Drupal::service('path.alias_manager')->getAliasByPath('/node/' . $title, $lang);
$alias = '/' . $lang . $alias;
if (!empty($alias)) {
$results_found[] = $path . ' ' . $alias . ';';
}
}
if (empty($alias)) {
$results_notfound[] = $path . ' /' . $lang . ';';
}
}
// Before getting the path alias we try to find node nid.
else {
$result = db_select('node_field_data', 'n')
->fields('n', ['nid'])
->condition('title', db_like($title) . '%', 'LIKE')
->range(0,1)
->execute();
if ($data = $result->fetchField()) {
$alias = \Drupal::service('path.alias_manager')->getAliasByPath('/node/' . $data, $lang);
$alias = '/' . $lang . $alias;
$results_found[] = $path . ' ' . $alias . ';';
}
else {
$results_notfound[] = $path . ' /' . $lang . ';';
}
}
}
file_put_contents(DRUPAL_ROOT . '/url-list-found.map', implode("\n", $results_found));
file_put_contents(DRUPAL_ROOT . '/url-list-notfound.map', implode("\n", $results_notfound));