[devdocsgjs/main: 1339/1867] Cleanup, version, and improve Relay scraper
- From: Andy Holmes <andyholmes src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [devdocsgjs/main: 1339/1867] Cleanup, version, and improve Relay scraper
- Date: Fri, 19 Nov 2021 23:47:54 +0000 (UTC)
commit 7c01b590f07b2bb8bb6ee649e82314844804c8fd
Author: Phil Scherer <pnscher evoforge org>
Date: Sat Dec 5 06:34:00 2020 +0000
Cleanup, version, and improve Relay scraper
lib/docs/filters/relay/clean_html.rb | 31 ++++---------------
lib/docs/filters/relay/entries.rb | 58 +++++++++++++++++-------------------
lib/docs/scrapers/relay.rb | 34 +++++++++++----------
3 files changed, 50 insertions(+), 73 deletions(-)
---
diff --git a/lib/docs/filters/relay/clean_html.rb b/lib/docs/filters/relay/clean_html.rb
index f18d30ba..e3e7c3a1 100644
--- a/lib/docs/filters/relay/clean_html.rb
+++ b/lib/docs/filters/relay/clean_html.rb
@@ -2,38 +2,17 @@ module Docs
class Relay
class CleanHtmlFilter < Filter
def call
+ @doc = at_css('.post')
- if slug == 'index'
- css('img').remove
+ header = at_css('h1')
+ header.parent.before(header).remove
- css('.projectTitle').each do |node|
- node.name = 'h1'
- node.content = 'Relay'
- end
-
- css('pre').remove
-
- end
-
- css('.docLastUpdate').remove
-
- css('.docs-prevnext').remove
-
- css('.edit-page-link').remove
+ css('footer').remove
css('h2, h3').each do |node|
- node.css('a').remove
- node['id'] = node.content.gsub(/\s/, '-').downcase
+ node['id'] = node.at_css('a.anchor')['id']
end
- css('.onPageNav').remove
-
- css('#docsNav').remove
-
- css('.fixedHeaderContainer').remove
-
- css('footer').remove
-
# syntax highlight
css('pre').each do |node|
node['data-language'] = 'javascript'
diff --git a/lib/docs/filters/relay/entries.rb b/lib/docs/filters/relay/entries.rb
index 7f7c6859..99f33543 100644
--- a/lib/docs/filters/relay/entries.rb
+++ b/lib/docs/filters/relay/entries.rb
@@ -1,51 +1,47 @@
module Docs
class Relay
class EntriesFilter < Docs::EntriesFilter
-
- def get_name
- if slug == 'index'
- return 'Relay'
+ ONLY_SECTIONS = ['API Reference', 'Principles & Architecture']
+ ONLY_SLUGS = []
+
+ def call
+ if root_page?
+ css('.navGroup > h3').each do |node|
+ next if not ONLY_SECTIONS.include? node.content
+ node.next_element.css('a').each do |anchor|
+ ONLY_SLUGS << anchor['href'].split('/').last.strip
+ end
+ end
end
+ super
+ end
+ def get_name
at_css('h1').content
end
def get_type
- if slug == 'index'
- return 'Relay'
- end
-
at_css('h1').content
end
- def additional_entries
- entries = []
-
- if slug == 'index'
- return entries
- end
-
- ## avoid adding non-desired entries removing tags
- # remove header which contains a <h2> tag
- css('.fixedHeaderContainer').remove
+ def include_default_entry?
+ ONLY_SLUGS.include? slug
+ end
- # remove table of content whose title is an <h2> tag
- css('.toc').remove
- ##
+ def additional_entries
+ return [] if not include_default_entry?
- css('h2, h3').each do |node|
- next if node.content.include?('Argument')
- entry_name = node.content
+ css('article h2, article h3').each_with_object [] do |node, entries|
+ next if node.content.include?('Argument') ||
+ node.content.starts_with?('Example')
- if entry_name.include?('(')
- entry_name = entry_name.match(/.*\(/)[0] + ')'
+ name = node.content
+ if name.include?('(')
+ name = name.match(/.*\(/)[0] + ')'
end
-
- entry_id = node.content.gsub(/\s/, '-').downcase
- entries << [entry_name, entry_id]
+ id = node.at_css('a.anchor')['id']
+ entries << [name, id]
end
-
- entries
end
end
diff --git a/lib/docs/scrapers/relay.rb b/lib/docs/scrapers/relay.rb
index 8d01b3bc..0b3f6b8a 100644
--- a/lib/docs/scrapers/relay.rb
+++ b/lib/docs/scrapers/relay.rb
@@ -1,9 +1,7 @@
module Docs
class Relay < UrlScraper
self.type = 'simple'
- self.release = '10.1.0'
- self.base_url = 'https://relay.dev'
- self.root_path = 'index.html'
+ self.root_path = 'introduction-to-relay'
self.links = {
home: 'https://relay.dev/',
code: 'https://github.com/facebook/relay'
@@ -11,19 +9,7 @@ module Docs
html_filters.push 'relay/entries', 'relay/clean_html'
- options[:only] = [
- '/docs/en/graphql-in-relay',
- '/docs/en//relay-environment',
- '/docs/en/network-layer',
- '/docs/en/query-renderer',
- '/docs/en/fragment-container',
- '/docs/en/refetch-container',
- '/docs/en/pagination-container',
- '/docs/en/mutations',
- '/docs/en/subscriptions',
- '/docs/en/relay-store',
- '/docs/en/fetch-query'
- ]
+ options[:skip] = %w(videos)
options[:attribution] = <<-HTML
© 2020–present Facebook Inc.<br>
@@ -34,5 +20,21 @@ module Docs
get_latest_github_release('facebook', 'relay', opts)
end
+ version '10' do
+ self.release = '10.1.0'
+ self.base_url = "https://relay.dev/docs/en/"
+ # For some reason, the most-recent version isn't available at a versioned URL
+ end
+
+ version '9' do
+ self.release = '9.1.0'
+ self.base_url = "https://relay.dev/docs/en/v#{self.release}/"
+ end
+
+ version '8' do
+ self.release = '8.0.0'
+ self.base_url = "https://relay.dev/docs/en/v#{self.release}/"
+ end
+
end
end
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]