Skip to content

Commit

Permalink
starts implementing #95
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Feb 24, 2024
1 parent b3fe9ec commit 0a82f9f
Show file tree
Hide file tree
Showing 10 changed files with 215 additions and 1,609 deletions.
45 changes: 29 additions & 16 deletions ceurws/ceur_ws.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from lodstorage.storageconfig import StorageConfig
from tqdm import tqdm

from ceurws.indexparser import IndexHtmlParser
from ceurws.indexparser import IndexHtmlParser, ParserConfig
from ceurws.loctime import LoctimeParser
from ceurws.papertocparser import PaperTocParser
from ceurws.utils.download import Download
Expand Down Expand Up @@ -374,23 +374,33 @@ def loadFromBackup(self):
load from the SQLITE Cache file
"""
self.fromStore(cacheFile=CEURWS.CACHE_FILE)

def recreate(self, progress: bool = False, limit=None):

def update(self,parser_config:ParserConfig):
"""
update me by a checking for recently added volumes
"""
self.update_or_recreate(parser_config)

def recreate(self,parser_config:ParserConfig):
"""
recreate me by a full parse of all volume files
"""
self.update_or_recreate(parser_config)

def update_or_recreate(self,parser_config:ParserConfig):
"""
recreate or update me by parsing the index.html file
Args:
progress(bool): if True show progress
down_to_volume(int): the volume number to parse down to
"""
progress_bar=parser_config.progress_bar
loctime_parser = LoctimeParser()
pm = PaperManager()
paper_list = pm.getList()
# first reload me from the main index
self.loadFromIndexHtml(force=True)
if progress:
t = tqdm(total=len(self.volumes))
else:
t = None
invalid = 0
for volume in self.volumes:
_volume_record, soup = volume.extractValuesFromVolumePage()
Expand All @@ -412,14 +422,15 @@ def recreate(self, progress: bool = False, limit=None):
setattr(volume, attr, value)
volume.resolveLoctime()
# update progress bar
if t is not None and volume.valid:
# print(f"{volume.url}:{volume.acronym}:{volume.desc}:{volume.h1}:{volume.title}")
if volume.acronym:
description = volume.acronym[:20]
else:
description = "?"
t.set_description(f"{description}")
t.update()
if progress_bar:
if volume.valid:
# print(f"{volume.url}:{volume.acronym}:{volume.desc}:{volume.h1}:{volume.title}")
if volume.acronym:
description = volume.acronym[:20]
else:
description = "?"
progress_bar.set_description(f"{description}")
progress_bar.update()
print(
f"storing recreated volume table for {len(self.volumes)} volumes ({invalid} invalid)"
)
Expand All @@ -432,7 +443,9 @@ def loadFromIndexHtml(self, force: bool = False):
load my content from the index.html file
Args:
force(bool): if TRUE fetch index.html from internet else read locally cached version
force(bool): if TRUE fetch index.html
from ceur-ws.org internet homepage
else read locally cached version
"""
htmlText = self.getIndexHtml(force=force)
indexParser = IndexHtmlParser(htmlText, debug=self.debug)
Expand Down
215 changes: 0 additions & 215 deletions ceurws/ceur_ws_cmd.py

This file was deleted.

40 changes: 25 additions & 15 deletions ceurws/ceur_ws_web_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ def getArgParser(self, description: str, version_msg) -> ArgumentParser:
override the default argparser call
"""
parser = super().getArgParser(description, version_msg)
parser.add_argument(
"-dbu", "--dblp_update", action="store_true", help="update dblp cache"
)
parser.add_argument(
"-nq",
"--namedqueries",
action="store_true",
help="generate named queries [default: %(default)s]",
)
parser.add_argument(
"-den",
"--dblp_endpoint_name",
help="name of dblp endpoint to use %(default)s",
default="qlever-dblp",
)
parser.add_argument(
"-f",
"--force",
Expand All @@ -53,10 +68,10 @@ def getArgParser(self, description: str, version_msg) -> ArgumentParser:
help="recreate caches e.g. volume table",
)
parser.add_argument(
"-den",
"--dblp_endpoint_name",
help="name of dblp endpoint to use %(default)s",
default="qlever-dblp",
"-uv",
"--update",
action="store_true",
help="update volumes by parsing index.html adding recently published volumes",
)
parser.add_argument(
"-wen",
Expand All @@ -70,15 +85,6 @@ def getArgParser(self, description: str, version_msg) -> ArgumentParser:
action="store_true",
help="update tables from wikidata",
)
parser.add_argument(
"-dbu", "--dblp_update", action="store_true", help="update dblp cache"
)
parser.add_argument(
"-nq",
"--namedqueries",
action="store_true",
help="generate named queries [default: %(default)s]",
)
return parser

def handle_args(self) -> bool:
Expand All @@ -95,9 +101,13 @@ def handle_args(self) -> bool:
manager.loadFromBackup()
for volume in manager.getList():
print(volume)
if args.recreate:
if args.recreate or args.update:
manager = VolumeManager()
manager.recreate(progress=True)
progress_bar=tqdm(total=len(manager.volumes))
if args.recreate:
manager.recreate(progress_bar=progress_bar)
else:
manager.update(progress_bar=progress_bar)
if args.wikidata_update:
wdsync = WikidataSync.from_args(args)
wdsync.update(withStore=True)
Expand Down
Loading

0 comments on commit 0a82f9f

Please sign in to comment.