vladdancer
8/21/2017 - 3:06 PM

Drupal 8. Find new path aliases by old ones

Drupal 8. Find new path aliases by old ones

<?php

return [
  'http://example.com/en-gb/who-we-are'
];
<?php
/**
 * @file
 * findNewUrls.drush
 *
 * This script tries to find a new url based on old one.
 * Old urls should be placed in the same dir as a script file,
 * and named as url-list.php, use php array format to add more old urls.
 *
 * Here is a finding logic.
 *
 * 1. Get a last part of the url and cleanup it.
 * 2. Search nid in the database by title from step above using LIKE operator.
 * 3. Try to get a new url by nid using path alias manager.
 *
 * This php script should be run using drush on bootstraped drupal.
 * @code
 *   drush php-script findNewUrls.drush
 * @endcode
 *
 * @return
 * Creates two list of urls.
 *
 * First 'url-list-found.map', can be used with nginx map module.
 * @see https://serverfault.com/questions/599372/mapping-old-to-new-urls-with-external-file-configuration-invalid
 * format: `old_url  new_url;`
 *
 * Second 'url-list-notfound.map' can be used directly by nginx too,
 * but new redirects now goes to the frontpage,
 * format: `old_url  /;`
 *
 * @tips
 *
 * Nginx
 * Don't forget to test your config file using `sudo nginx -t`
 *
 * Apache
 * You can also use file with a list of redirects as in nginx,
 * see https://serverfault.com/questions/414225/best-way-to-handle-thousands-of-permanent-redirects
 *
 * Drupal
 * Or if neither nginx or apache isn't an option for processing redirects,
 * but you're using Redirect drupal module,
 * then you can programmatically import this redirects using
 * https://drupal.stackexchange.com/questions/227978/how-to-create-a-redirection-programmatically
 *
 * Bulk testing
 * You can adapt command below to test redirects too
 * `xargs -n1 -P 10 curl -o /dev/null --silent --head --write-out '%{url_effective};%{http_code};%{time_total};%{time_namelookup};%{time_connect};%{size_download};%{speed_download}\n' < urls_with_500_error.lst | tee results500.csv`
 * like so
 * `xargs -n1 -P 10 curl -o /dev/null --silent --head --write-out '%{http_code};%{url_effective};%{redirect_url};%{time_total};%{time_namelookup};%{time_connect};%{size_download};%{speed_download}\n' < url-list-found-testing.map | tee url-list-found-testing-done.csv`
 */

use Drupal\Core\Entity\EntityInterface;
use Drupal\node\Entity\Node;

$urls = include DRUPAL_ROOT . '/url-list.php';

$drupal_langs = array_keys(\Drupal::languageManager()->getLanguages());

foreach ($urls as $url) {
  $path = str_replace('http://example.com', '', $url);
  $url_parts = explode('/', $path);


  // Check extracted language with available in the system.
  if (in_array($url_parts[1], $drupal_langs)) {
    $lang = $url_parts[1];
  }

  $title = str_replace('-', ' ', basename($url));

  // If we're deals with node nid then try to get directly path alias.
  if (filter_var($title, FILTER_VALIDATE_INT)) {
    $node = Node::load($title);
    $alias = FALSE;
    if ($node instanceof EntityInterface) {
      $alias = \Drupal::service('path.alias_manager')->getAliasByPath('/node/' . $title, $lang);
      $alias = '/' . $lang . $alias;
      if (!empty($alias)) {
        $results_found[] = $path . '  ' . $alias . ';';
      }
    }
    if (empty($alias)) {
      $results_notfound[] = $path . '  /' . $lang . ';';
    }

  }
  // Before getting the path alias we try to find node nid.
  else {
    $result = db_select('node_field_data', 'n')
      ->fields('n', ['nid'])
      ->condition('title', db_like($title) . '%', 'LIKE')
      ->range(0,1)
      ->execute();

    if ($data = $result->fetchField()) {
      $alias = \Drupal::service('path.alias_manager')->getAliasByPath('/node/' . $data, $lang);
      $alias = '/' . $lang . $alias;
      $results_found[] = $path . '  ' . $alias  . ';';
    }
    else {
      $results_notfound[] = $path . '  /' . $lang . ';';
    }
  }

}

file_put_contents(DRUPAL_ROOT . '/url-list-found.map', implode("\n", $results_found));
file_put_contents(DRUPAL_ROOT . '/url-list-notfound.map', implode("\n", $results_notfound));