Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First pass at getting data for XML sitemaps from indexables #17580

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ed7f3a6
First pass at getting data for XML sitemaps from indexables
Oct 31, 2021
0170779
remove more unused code
Oct 31, 2021
56bc0ac
Reinstate min steps
Oct 31, 2021
ff147e9
Bugfixes
Nov 1, 2021
c7e33f0
Intermediate
Nov 2, 2021
cf87f37
Move image inclusion to helper
Nov 2, 2021
c4b0e68
More documentation and cleanup
Nov 2, 2021
3d82a90
Author sitemaps on indexables
Nov 2, 2021
4380b4f
CS
Nov 2, 2021
5bcc14d
Make sure get_index_links uses same parameters as get_sitemap_links
Nov 4, 2021
701582e
Make get_index_links use indexables table as well
Nov 4, 2021
017a9ec
Remove filter out empty terms filters
Nov 4, 2021
91e32b6
Reinstate exclude_taxonomy filter, remove empty authors
Nov 4, 2021
7c2c4a5
Multiple optimizations
Nov 4, 2021
b5d14c4
Make wpseo_sitemap_entry filter consistent
Nov 5, 2021
ce36e4f
Remove no longer used code
Nov 5, 2021
fe461b2
Remove caching mechanism and image parser as we no longer need it
Nov 5, 2021
5480a4f
Fix CS
Nov 5, 2021
0eb2865
Clean up tests
Nov 5, 2021
7263167
Remove more remnants of sitemaps cache
Nov 5, 2021
6265cdf
Merge branch 'trunk' of github.com:Yoast/wordpress-seo into jdv/xml-s…
Nov 5, 2021
7502d9d
Remove query output when debug is on
Nov 5, 2021
9161e74
early return when there are no indexables to check for imageS
Nov 5, 2021
53b02cb
Fix CS
Nov 5, 2021
04a9e74
Further improvements
Nov 23, 2021
a22baa7
add is-protected check, change filter for entries
Nov 30, 2021
45d6d77
Merge branch 'trunk' of https://github.com/Yoast/wordpress-seo into j…
diedexx Dec 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions admin/class-admin-user-profile.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,6 @@ public function __construct() {
add_action( 'edit_user_profile', [ $this, 'user_profile' ] );
add_action( 'personal_options_update', [ $this, 'process_user_option_update' ] );
add_action( 'edit_user_profile_update', [ $this, 'process_user_option_update' ] );

add_action( 'update_user_meta', [ $this, 'clear_author_sitemap_cache' ], 10, 3 );
}

/**
* Clear author sitemap cache when settings are changed.
*
* @since 3.1
*
* @param int $meta_id The ID of the meta option changed.
* @param int $object_id The ID of the user.
* @param string $meta_key The key of the meta field changed.
*/
public function clear_author_sitemap_cache( $meta_id, $object_id, $meta_key ) {
if ( $meta_key === '_yoast_wpseo_profile_updated' ) {
WPSEO_Sitemaps_Cache::clear( [ 'author' ] );
}
}

/**
Expand Down
3 changes: 0 additions & 3 deletions admin/class-admin.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ public function __construct() {

add_action( 'admin_init', [ $this, 'map_manage_options_cap' ] );

WPSEO_Sitemaps_Cache::register_clear_on_option_update( 'wpseo' );
WPSEO_Sitemaps_Cache::register_clear_on_option_update( 'home' );

if ( YoastSEO()->helpers->current_page->is_yoast_seo_page() ) {
add_action( 'admin_enqueue_scripts', [ $this, 'enqueue_assets' ] );
}
Expand Down
6 changes: 0 additions & 6 deletions inc/class-upgrade.php
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,6 @@ protected function finish_up( $previous_version = null ) {
// Just flush rewrites, always, to at least make them work after an upgrade.
add_action( 'shutdown', 'flush_rewrite_rules' );

// Flush the sitemap cache.
WPSEO_Sitemaps_Cache::clear();

// Make sure all our options always exist - issue #1245.
WPSEO_Options::ensure_options_exist();
}
Expand Down Expand Up @@ -570,9 +567,6 @@ private function upgrade_772() {
private function upgrade_90() {
global $wpdb;

// Invalidate all sitemap cache transients.
WPSEO_Sitemaps_Cache_Validator::cleanup_database();

// Removes all scheduled tasks for hitting the sitemap index.
wp_clear_scheduled_hook( 'wpseo_hit_sitemap_index' );

Expand Down
216 changes: 46 additions & 170 deletions inc/sitemaps/class-author-sitemap-provider.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,28 @@
* @package WPSEO\XML_Sitemaps
*/

use Yoast\WP\SEO\Helpers\Author_Archive_Helper;
use Yoast\WP\SEO\Repositories\Indexable_Repository;
use Yoast\WP\SEO\Helpers\Wordpress_Helper;

/**
* Sitemap provider for author archives.
*/
class WPSEO_Author_Sitemap_Provider implements WPSEO_Sitemap_Provider {

/**
* The indexable repository.
*
* @var Indexable_Repository
*/
private $repository;

/**
* Set up object properties for data reuse.
*/
public function __construct() {
$this->repository = YoastSEO()->classes->get( 'Yoast\WP\SEO\Repositories\Indexable_Repository' );
}

/**
* Check if provider supports given item type.
*
Expand Down Expand Up @@ -42,43 +56,31 @@ public function get_index_links( $max_entries ) {
return [];
}

// @todo Consider doing this less often / when necessary. R.
$this->update_user_meta();
$query = $this->repository
->query()
->select_many( 'id', 'permalink', 'object_last_modified' )
->where( 'object_type', 'user' )
->where_raw( '( is_robots_noindex = 0 OR is_robots_noindex IS NULL )' )
->order_by_desc( 'object_last_modified' );

$has_exclude_filter = has_filter( 'wpseo_sitemap_exclude_author' );

$query_arguments = [];

if ( ! $has_exclude_filter ) { // We only need full users if legacy filter(s) hooked to exclusion logic. R.
$query_arguments['fields'] = 'ID';
}

$users = $this->get_users( $query_arguments );

if ( $has_exclude_filter ) {
$users = $this->exclude_users( $users );
$users = wp_list_pluck( $users, 'ID' );
}

if ( empty( $users ) ) {
return [];
$users_to_exclude = $this->exclude_users( [] );
if ( count( $users_to_exclude ) > 0 ) {
$query->where_not_in( 'object_id', $users_to_exclude );
}

$index = [];
$page = 1;
$user_pages = array_chunk( $users, $max_entries );
$indexables = $query->find_many();
$user_pages = array_chunk( $indexables, $max_entries );

if ( count( $user_pages ) === 1 ) {
$page = '';
}

foreach ( $user_pages as $users_page ) {
array_shift( $users_page );
Copy link
Member

@diedexx diedexx Dec 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jdevalk this line should be removed, right? It makes it that the second most recent last modified date is shown for each page instead of the most recent. Or is there something I'm missing?


$user_id = array_shift( $users_page ); // Time descending, first user on page is most recently updated.
$user = get_user_by( 'id', $user_id );
$index[] = [
'loc' => WPSEO_Sitemaps_Router::get_base_url( 'author-sitemap' . $page . '.xml' ),
'lastmod' => ( $user->_yoast_wpseo_profile_updated ) ? YoastSEO()->helpers->date->format_timestamp( $user->_yoast_wpseo_profile_updated ) : null,
'lastmod' => $users_page[0]->object_last_modified,
];

++$page;
Expand All @@ -87,65 +89,6 @@ public function get_index_links( $max_entries ) {
return $index;
}

/**
* Retrieve users, taking account of all necessary exclusions.
*
* @param array $arguments Arguments to add.
*
* @return array
*/
protected function get_users( $arguments = [] ) {

global $wpdb;

$defaults = [
'capability' => [ 'edit_posts' ],
'meta_key' => '_yoast_wpseo_profile_updated',
'orderby' => 'meta_value_num',
'order' => 'DESC',
'meta_query' => [
'relation' => 'AND',
[
'key' => $wpdb->get_blog_prefix() . 'user_level',
'value' => '0',
'compare' => '!=',
],
[
'relation' => 'OR',
[
'key' => 'wpseo_noindex_author',
'value' => 'on',
'compare' => '!=',
],
[
'key' => 'wpseo_noindex_author',
'compare' => 'NOT EXISTS',
],
],
],
];

$wordpress_helper = new Wordpress_Helper();
$wordpress_version = $wordpress_helper->get_wordpress_version();

// Capability queries were only introduced in WP 5.9.
if ( version_compare( $wordpress_version, '5.9-alpha', '<' ) ) {
$defaults['who'] = 'authors';
unset( $defaults['capability'] );
}

if ( WPSEO_Options::get( 'noindex-author-noposts-wpseo', true ) ) {
unset( $defaults['capability'] );
if ( version_compare( $wordpress_version, '5.9-alpha', '<' ) ) {
$defaults['who'] = ''; // Otherwise it cancels out next argument.
}
$author_archive = new Author_Archive_Helper();
$defaults['has_published_posts'] = $author_archive->get_author_archive_post_types();
}

return get_users( array_merge( $defaults, $arguments ) );
}

/**
* Get set of sitemap link data.
*
Expand All @@ -159,100 +102,34 @@ protected function get_users( $arguments = [] ) {
*/
public function get_sitemap_links( $type, $max_entries, $current_page ) {

$links = [];

if ( ! $this->handles_type( 'author' ) ) {
return $links;
}

$user_criteria = [
'offset' => ( ( $current_page - 1 ) * $max_entries ),
'number' => $max_entries,
];

$users = $this->get_users( $user_criteria );

// Throw an exception when there are no users in the sitemap.
if ( count( $users ) === 0 ) {
throw new OutOfBoundsException( 'Invalid sitemap page requested' );
}

$users = $this->exclude_users( $users );
if ( empty( $users ) ) {
$users = [];
}

$time = time();

foreach ( $users as $user ) {

$author_link = get_author_posts_url( $user->ID );

if ( empty( $author_link ) ) {
continue;
}

$mod = $time;

if ( isset( $user->_yoast_wpseo_profile_updated ) ) {
$mod = $user->_yoast_wpseo_profile_updated;
}

$url = [
'loc' => $author_link,
'mod' => date( DATE_W3C, $mod ),

// Deprecated, kept for backwards data compat. R.
'chf' => 'daily',
'pri' => 1,
];

/** This filter is documented at inc/sitemaps/class-post-type-sitemap-provider.php */
$url = apply_filters( 'wpseo_sitemap_entry', $url, 'user', $user );

if ( ! empty( $url ) ) {
$links[] = $url;
}
return [];
}

return $links;
}
$offset = ( ( $current_page - 1 ) * $max_entries );

/**
* Update any users that don't have last profile update timestamp.
*
* @return int Count of users updated.
*/
protected function update_user_meta() {

$user_criteria = [
'capability' => [ 'edit_posts' ],
'meta_query' => [
[
'key' => '_yoast_wpseo_profile_updated',
'compare' => 'NOT EXISTS',
],
],
];
$query = $this->repository
->query()
->select_many( 'id', 'object_id', 'permalink', 'object_last_modified' )
->where( 'object_type', 'user' )
->where_raw( '( is_robots_noindex = 0 OR is_robots_noindex IS NULL )' )
->order_by_desc( 'object_last_modified' )
->offset( $offset )
->limit( $max_entries );

$wordpress_helper = new Wordpress_Helper();
$wordpress_version = $wordpress_helper->get_wordpress_version();

// Capability queries were only introduced in WP 5.9.
if ( version_compare( $wordpress_version, '5.9-alpha', '<' ) ) {
$user_criteria['who'] = 'authors';
unset( $user_criteria['capability'] );
$users_to_exclude = $this->exclude_users( [] );
if ( count( $users_to_exclude ) > 0 ) {
$query->where_not_in( 'object_id', $users_to_exclude );
}

$users = get_users( $user_criteria );

$time = time();
$indexables = $query->find_many();

foreach ( $users as $user ) {
update_user_meta( $user->ID, '_yoast_wpseo_profile_updated', $time );
// Throw an exception when there are no users in the sitemap.
if ( count( $indexables ) === 0 ) {
throw new OutOfBoundsException( 'Invalid sitemap page requested' );
}

return count( $users );
return YoastSEO()->helpers->xml_sitemap->convert_indexables_to_sitemap_links( $indexables, 'user' );
}

/**
Expand All @@ -263,7 +140,6 @@ protected function update_user_meta() {
* @return array
*/
protected function exclude_users( $users ) {

/**
* Filter the authors, included in XML sitemap.
*
Expand Down
Loading