Skip to content

Commit

Permalink
chore: merge
Browse files Browse the repository at this point in the history
  • Loading branch information
nsantacruz committed Dec 16, 2024
2 parents 6386f4f + e86933c commit bef0c0b
Show file tree
Hide file tree
Showing 29 changed files with 349 additions and 189 deletions.
24 changes: 17 additions & 7 deletions .github/workflows/weekly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name: Weekly Tasks
on:
schedule:
- cron: '0 0 * 6 *'
workflow_dispatch:

jobs:
build-mongo:
Expand All @@ -11,11 +12,21 @@ jobs:
contents: 'read'
id-token: 'write'
steps:
- name: Maximize build space
uses: AdityaGarg8/remove-unwanted-software@v4.1
with:
remove-android: 'true'
remove-dotnet: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
remove-docker-images: 'true'
- uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
uses: docker/setup-buildx-action@v3
with:
buildkitd-config: ./build/standalone-db/buildkit.toml
- id: auth
name: Authenticate to Google Cloud
uses: google-github-actions/auth@v1
Expand All @@ -37,27 +48,26 @@ jobs:
password: '${{ steps.auth.outputs.access_token }}'
- name: Get current date
id: date
run: echo "date$(date +'%Y%m%d%H%M')" >> $GITHUB_OUTPUT
run: echo "date=$(date +'%Y%m%d%H%M')" >> $GITHUB_OUTPUT
- name: Generate image metadata
id: meta
uses: docker/metadata-action@v3
with:
images: |
gcr.io/${{ secrets.DEV_PROJECT }}/sefaria-mongo
us-east1-docker.pkg.dev/${{ secrets.DEV_PROJECT }}/containers/sefaria-${{ matrix.app }}-${{ steps.branch-name.outputs.current_branch }}
us-east1-docker.pkg.dev/${{ secrets.DEV_PROJECT }}/sefaria-public/sefaria-mongo
# generate Docker tags based on the following events/attributes
tags: |
type=sha,enable=true,priority=100,prefix=sha-,suffix=-${{ steps.date.outputs.date }},format=short
type=sha
flavor: |
latest=true
- name: build and push
uses: docker/build-push-action@v2
uses: docker/build-push-action@v6
with:
cache-from: type=registry, ref=sefaria-mongo/cache
cache-to: type=registry, ref=sefaria-mongo/cache, mode=max
context: .
push: true
platforms: linux/amd64,linux/arm64
file: ./build/standalone-db/Dockerfile
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
Expand Down
2 changes: 2 additions & 0 deletions build/standalone-db/buildkit.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[worker.oci]
max-parallelism = 1
5 changes: 3 additions & 2 deletions django_topics/models/topic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from django.db import models
from django.db.models.query import QuerySet
import random
from django_topics.models.pool import TopicPool

Expand All @@ -14,10 +15,10 @@ def sample_topic_slugs(self, order, pool: str = None, limit=10) -> list[str]:
else:
raise Exception("Invalid order: '{}'".format(order))

def get_pools_by_topic_slug(self, topic_slug: str) -> list[str]:
def get_pools_by_topic_slug(self, topic_slug: str) -> QuerySet:
return self.filter(slug=topic_slug).values_list("pools__name", flat=True)

def get_topic_slugs_by_pool(self, pool: str) -> list[str]:
def get_topic_slugs_by_pool(self, pool: str) -> QuerySet:
return self.filter(pools__name=pool).values_list("slug", flat=True)


Expand Down
5 changes: 5 additions & 0 deletions helm-chart/sefaria-project/templates/configmap/gunicorn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ data:
{{- end }}
loglevel = "warning"
preload_app = True
{{- if .Values.instrumentation.enabled }}
def post_fork(server, worker):
Expand All @@ -33,6 +34,10 @@ data:
{{- end }}
def on_starting(server):
from reader.startup import init_library_cache
init_library_cache()
def combined_logformat(logger, name, event_dict):
if event_dict.get('logger') == "gunicorn.access":
message = event_dict['event']
Expand Down
2 changes: 1 addition & 1 deletion helm-chart/sefaria-project/templates/rollout/task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ spec:
- name: SLACK_URL
valueFrom:
secretKeyRef:
name: { { template "sefaria.secrets.slackWebhook" . } }
name: {{ template "sefaria.secrets.slackWebhook" . }}
key: slack-webhook
envFrom:
{{- if .Values.tasks.enabled }}
Expand Down
2 changes: 1 addition & 1 deletion helm-chart/sefaria-project/templates/rollout/web.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ spec:
- name: web
image: "{{ .Values.web.containerImage.imageRegistry }}:{{ .Values.web.containerImage.tag }}"
imagePullPolicy: Always
args: [ "python manage.py migrate && gunicorn sefaria.wsgi --access-logfile - --error-logfile - --timeout 300 --threads {{ .Values.web.resources.web.gunicornThreadCount }} --worker-tmp-dir /dev/shm -b 0.0.0.0:80" ]
args: [ "python manage.py migrate && gunicorn sefaria.wsgi --access-logfile - --error-logfile - --timeout 420 --threads {{ .Values.web.resources.web.gunicornThreadCount }} --worker-tmp-dir /dev/shm -b 0.0.0.0:80" ]
env:
# WEB_CONCURRENCY is used for determining the number of server workers
- name: WEB_CONCURRENCY
Expand Down
Empty file.
14 changes: 14 additions & 0 deletions reader/management/commands/runserver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Using staticfiles as the base class in order to not overwrite its custom runserver logic
from django.contrib.staticfiles.management.commands.runserver import Command as RunserverCommand
from reader.startup import init_library_cache
import structlog
logger = structlog.get_logger(__name__)


class Command(RunserverCommand):

def get_handler(self, *args, **options):
handler = super(Command, self).get_handler(*args, **options)
logger.info("Starting reader application")
init_library_cache()
return handler
3 changes: 3 additions & 0 deletions reader/startup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@


def init_library_cache():
import django
django.setup()
import structlog
logger = structlog.get_logger(__name__)

Expand Down Expand Up @@ -32,3 +34,4 @@ def init_library_cache():

if server_coordinator:
server_coordinator.connect()
logger.info("Initialization Complete")
37 changes: 17 additions & 20 deletions reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3671,36 +3671,33 @@ def profile_follow_api(request, ftype, slug):
return jsonResponse(response)
return jsonResponse({"error": "Unsupported HTTP method."})


@staff_member_required
def topic_upload_photo(request, topic):
from io import BytesIO
def topic_upload_photo(request, slug, secondary=False):
from sefaria.helper.topic import add_image_to_topic, delete_image_from_topic, add_secondary_image_to_topic, delete_secondary_image_from_topic
import uuid
import base64
if request.method == "DELETE":
old_filename = request.GET.get("old_filename")
if old_filename is None:
return jsonResponse({"error": "You cannot remove an image as you haven't selected one yet."})
old_filename = f"topics/{old_filename.split('/')[-1]}"
GoogleStorageManager.delete_filename(old_filename, GoogleStorageManager.TOPICS_BUCKET)
topic = Topic.init(topic)
if hasattr(topic, "image"):
del topic.image
topic.save()
if secondary:
delete_secondary_image_from_topic(slug)
else:
delete_image_from_topic(slug)
return jsonResponse({"success": "You have successfully removed the image."})
elif request.method == "POST":
file = request.POST.get('file')
old_filename = request.POST.get('old_filename') # delete file from google storage if there is one there
if old_filename:
old_filename = f"topics/{old_filename.split('/')[-1]}"
img_file_in_mem = BytesIO(base64.b64decode(file))
img_url = GoogleStorageManager.upload_file(img_file_in_mem, f"topics/{request.user.id}-{uuid.uuid1()}.gif",
GoogleStorageManager.TOPICS_BUCKET, old_filename=old_filename)
topic = Topic.init(topic)
if not hasattr(topic, "image"):
topic.image = {"image_uri": img_url, "image_caption": {"en": "", "he": ""}}

to_filename = f"topics/{slug}-{'secondary-' if secondary else ''}{uuid.uuid1()}.png"
img_url = GoogleStorageManager.upload_file(request.FILES.get('file'), to_filename, GoogleStorageManager.TOPICS_BUCKET, old_filename=old_filename)
if secondary:
add_secondary_image_to_topic(slug, img_url)
else:
topic.image["image_uri"] = img_url
topic.save()
add_image_to_topic(slug, img_url)
return jsonResponse({"url": img_url})
return jsonResponse({"error": "Unsupported HTTP method."})

Expand Down Expand Up @@ -4664,9 +4661,9 @@ def android_asset_links_json(request):
}]
)

def application_health_api(request):
def rollout_health_api(request):
"""
Defines the /healthz and /health-check API endpoints which responds with
Defines the /healthz-rollout API endpoint which responds with
200 if the application is ready for requests,
500 if the application is not ready for requests
"""
Expand All @@ -4680,9 +4677,9 @@ def application_health_api_nonlibrary(request):
return http.HttpResponse("Healthy", status="200")


def rollout_health_api(request):
def application_health_api(request):
"""
Defines the /healthz-rollout API endpoint which responds with
Defines the /healthz API endpoint which responds with
200 if the services Django depends on, Redis, Multiserver, and NodeJs
are available.
500 if any of the aforementioned services are not available
Expand Down
63 changes: 45 additions & 18 deletions sefaria/helper/topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,15 +729,16 @@ def calculate_other_ref_scores(ref_topic_map):
return num_datasource_map, langs_available, comp_date_map, order_id_map


def update_ref_topic_link_orders(sheet_source_links, sheet_topic_links):
other_ref_topic_links = list(RefTopicLinkSet({"is_sheet": False, "generatedBy": {"$ne": TopicLinkHelper.generated_by_sheets}}))
ref_topic_links = other_ref_topic_links + sheet_source_links
def update_ref_topic_link_orders(source_links, sheet_topic_links):
"""
topic_tref_score_map, ref_topic_map = calculate_mean_tfidf(ref_topic_links)
@param source_links: Links between sources and topics (as opposed to sheets and topics)
@param sheet_topic_links: Links between sheets and topics
"""
topic_tref_score_map, ref_topic_map = calculate_mean_tfidf(source_links)
num_datasource_map, langs_available, comp_date_map, order_id_map = calculate_other_ref_scores(ref_topic_map)
pr_map, pr_seg_map = calculate_pagerank_scores(ref_topic_map)
sheet_cache = {}
intra_topic_link_cache = {}

def get_sheet_order(topic_slug, sheet_id):
if sheet_id in sheet_cache:
Expand Down Expand Up @@ -797,7 +798,7 @@ def get_sheet_order(topic_slug, sheet_id):
}

all_ref_topic_links_updated = []
all_ref_topic_links = sheet_topic_links + ref_topic_links
all_ref_topic_links = sheet_topic_links + source_links
for l in tqdm(all_ref_topic_links, desc='update link orders'):
if l.is_sheet:
setattr(l, 'order', get_sheet_order(l.toTopic, int(l.ref.replace("Sheet ", ""))))
Expand Down Expand Up @@ -962,15 +963,18 @@ def calculate_popular_writings_for_authors(top_n, min_pr):
}).save()

def recalculate_secondary_topic_data():
sheet_source_links = RefTopicLinkSet({'pools': 'textual'})
sheet_topic_links = RefTopicLinkSet({'pools': 'sheets'})
sheet_related_links = IntraTopicLinkSet()
source_links = RefTopicLinkSet({'is_sheet': False})
sheet_links = [RefTopicLink(l) for l in generate_sheet_topic_links()]

related_links = update_intra_topic_link_orders(sheet_related_links)
all_ref_links = update_ref_topic_link_orders(sheet_source_links.array(), sheet_topic_links.array())
related_links = update_intra_topic_link_orders(IntraTopicLinkSet())
all_ref_links = update_ref_topic_link_orders(source_links.array(), sheet_links)

RefTopicLinkSet({"is_sheet": True}).delete()

db.topic_links.bulk_write([
UpdateOne({"_id": l._id}, {"$set": {"order": l.order}})
if getattr(l, "_id", False) else
InsertOne(l.contents(for_db=True))
for l in (all_ref_links + related_links)
])

Expand Down Expand Up @@ -1314,7 +1318,7 @@ def delete_ref_topic_link(tref, to_topic, link_type, lang):
return {"error": f"Cannot delete link between {tref} and {to_topic}."}


def add_image_to_topic(topic_slug, image_uri, en_caption, he_caption):
def add_image_to_topic(topic_slug: str, image_uri: str, en_caption: str = "", he_caption: str =""):
"""
A function to add an image to a Topic in the database. Helper for data migration.
This function queries the desired Topic, adds the image data, and then saves.
Expand All @@ -1325,9 +1329,32 @@ def add_image_to_topic(topic_slug, image_uri, en_caption, he_caption):
:param he_caption String: The Hebrew caption for a Topic image
"""
topic = Topic.init(topic_slug)
topic.image = {"image_uri": image_uri,
"image_caption": {
"en": en_caption,
"he": he_caption
}}
topic.save()
if not hasattr(topic, "image"):
topic.image = {"image_uri": image_uri, "image_caption": {"en": en_caption, "he": he_caption}}
else:
topic.image["image_uri"] = image_uri
if en_caption:
topic.image["image_caption"]["en"] = en_caption
if he_caption:
topic.image["image_caption"]["he"] = he_caption
topic.save()


def add_secondary_image_to_topic(topic_slug: str, image_uri: str):
topic = Topic.init(topic_slug)
topic.secondary_image_uri = image_uri
topic.save()


def delete_image_from_topic(topic_slug: str):
topic = Topic.init(topic_slug)
if hasattr(topic, "image"):
del topic.image
topic.save()


def delete_secondary_image_from_topic(topic_slug: str):
topic = Topic.init(topic_slug)
if hasattr(topic, "secondary_image_uri"):
del topic.secondary_image_uri
topic.save()
2 changes: 1 addition & 1 deletion sefaria/model/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def process_version_title_change_in_search(ver, **kwargs):
text_index = library.get_index(ver.title)
delete_version(text_index, kwargs.get("old"), ver.language)
for ref in text_index.all_segment_refs():
TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, False)
TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, ver.languageFamilyName, ver.isPrimary)


# Version Title Change
Expand Down
8 changes: 4 additions & 4 deletions sefaria/model/tests/topic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,17 +180,17 @@ def test_change_title(self, topic_graph):
def test_pools(self, topic_graph, topic_pool):
ts = topic_graph['topics']
t1 = ts['1']
assert len(t1.pools) == 0
assert len(t1.get_pools()) == 0
t1.add_pool(topic_pool.name)
assert t1.pools == [topic_pool.name]
assert t1.get_pools() == [topic_pool.name]

# dont add duplicates
t1.add_pool(topic_pool.name)
assert t1.pools == [topic_pool.name]
assert t1.get_pools() == [topic_pool.name]

assert t1.has_pool(topic_pool.name)
t1.remove_pool(topic_pool.name)
assert len(t1.pools) == 0
assert len(t1.get_pools()) == 0
# dont error when removing non-existant pool
t1.remove_pool(topic_pool.name)

Expand Down
6 changes: 4 additions & 2 deletions sefaria/model/topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ class Topic(abst.SluggedAbstractMongoRecord, AbstractTitledObject):
'isAmbiguous', # True if topic primary title can refer to multiple other topics
"data_source", #any topic edited manually should display automatically in the TOC and this flag ensures this
'image',
'secondary_image_uri',
"portal_slug", # slug to relevant Portal object
]

Expand Down Expand Up @@ -187,7 +188,7 @@ def load(self, query, proj=None):
def _set_derived_attributes(self):
self.set_titles(getattr(self, "titles", None))
slug = getattr(self, "slug", None)
self.pools = list(DjangoTopic.objects.get_pools_by_topic_slug(str(slug))) if slug is not None else []
self.pools = list(DjangoTopic.objects.get_pools_by_topic_slug(slug)) if slug is not None else []
if self.__class__ != Topic and not getattr(self, "subclass", False):
# in a subclass. set appropriate "subclass" attribute
setattr(self, "subclass", self.reverse_subclass_map[self.__class__.__name__])
Expand Down Expand Up @@ -229,7 +230,8 @@ def _sanitize(self):
setattr(self, attr, p)

def get_pools(self) -> list[str]:
return getattr(self, 'pools', [])
slug = getattr(self, "slug", None)
return list(DjangoTopic.objects.get_pools_by_topic_slug(str(slug))) if slug is not None else []

def has_pool(self, pool: str) -> bool:
return pool in self.get_pools()
Expand Down
Loading

0 comments on commit bef0c0b

Please sign in to comment.