-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH action to generate report #199
base: main
Are you sure you want to change the base?
Changes from 67 commits
d16abb2
713d64c
519360c
e6f4814
72496f4
8428d3a
e170b59
bfce046
0993129
87027d2
686f686
ff52971
ca6db89
d228f9d
68f707f
ad6b589
f18e8b7
387cfc1
04b4193
a443081
99ac264
6ee89b2
a8f6ed3
664853b
e35c974
a8af5f2
024cf6e
49c346e
0191c85
3eb9157
676a00e
c085751
3e18a37
0fa5ece
e1ecbc3
082d3cc
d46ea44
965a81e
7366d2d
747f0a4
6156e21
588892c
958630b
e24a666
0e58f10
5c28c0e
21811dd
97de713
644f8c3
e176592
a101f18
615baf2
bb8f25a
a8a615a
8157a12
d3f6f52
f1f687f
a10bc2a
7fd340d
1649b35
30aa60c
cc845d4
7854124
9fbad37
4fc9dde
86e645e
c614004
5a207bc
8ce97ee
f7fe412
e9904c8
7da2aae
41a65ed
8eb0f06
40947ef
0e9c065
e4794de
ee2c3b1
e1dcd63
541f1f3
ac364fb
05609a1
7560db2
502ff76
639f279
96490e5
64d69a7
6f3dae5
b38be7d
f0b0709
326bb55
c881287
a3505f9
fcd9531
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
name: disk-usage-report-job | ||
namespace: jupyterhub | ||
spec: | ||
backoffLimit: 0 # No retry on failure | ||
template: | ||
metadata: | ||
labels: | ||
app: disk-usage-report | ||
spec: | ||
containers: | ||
- name: disk-usage-report | ||
image: dandiarchive/dandihub-report-generator:latest | ||
args: | ||
- "/home/" | ||
volumeMounts: | ||
- name: persistent-storage | ||
mountPath: "/home" | ||
subPath: "home" | ||
restartPolicy: Never | ||
nodeSelector: | ||
NodeGroupType: default | ||
NodePool: default | ||
hub.jupyter.org/node-purpose: user | ||
tolerations: | ||
- key: "hub.jupyter.org/dedicated" | ||
operator: "Equal" | ||
value: "user" | ||
effect: "NoSchedule" | ||
volumes: | ||
- name: persistent-storage | ||
persistentVolumeClaim: | ||
claimName: efs-persist |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# manifests/hello-world-pod.yaml | ||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
name: hello-world-pod | ||
spec: | ||
containers: | ||
- name: hello | ||
image: busybox | ||
command: ['sh', '-c', 'echo Hello, World! && sleep 30'] | ||
nodeSelector: | ||
NodeGroupType: default | ||
NodePool: default | ||
hub.jupyter.org/node-purpose: user | ||
tolerations: | ||
- key: "hub.jupyter.org/dedicated" | ||
operator: "Equal" | ||
value: "user" | ||
effect: "NoSchedule" | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,63 @@ | ||||||
#!/usr/bin/env bash | ||||||
|
||||||
set -e | ||||||
|
||||||
# Load environment variables from the file if they are not already set | ||||||
ENV_FILE=".ec2-session.env" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
if [ -f "$ENV_FILE" ]; then | ||||||
echo "Loading environment variables from $ENV_FILE..." | ||||||
source "$ENV_FILE" | ||||||
else | ||||||
echo "Warning: Environment file $ENV_FILE not found." | ||||||
fi | ||||||
|
||||||
# Ensure required environment variables are set | ||||||
if [ -z "$INSTANCE_ID" ]; then | ||||||
echo "Error: INSTANCE_ID is not set. Cannot proceed with cleanup." | ||||||
exit 1 | ||||||
fi | ||||||
|
||||||
if [ -z "$ALLOC_ID" ]; then | ||||||
echo "Error: ALLOC_ID is not set. Cannot proceed with cleanup." | ||||||
exit 1 | ||||||
fi | ||||||
|
||||||
# Check for AWS CLI and credentials | ||||||
if ! command -v aws &>/dev/null; then | ||||||
echo "Error: AWS CLI is not installed. Please install it and configure your credentials." | ||||||
exit 1 | ||||||
fi | ||||||
|
||||||
if ! aws sts get-caller-identity &>/dev/null; then | ||||||
echo "Error: Unable to access AWS. Ensure your credentials are configured correctly." | ||||||
exit 1 | ||||||
fi | ||||||
|
||||||
# Terminate EC2 instance | ||||||
echo "Terminating EC2 instance with ID: $INSTANCE_ID..." | ||||||
if aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" --no-cli-pager; then | ||||||
echo "Instance termination initiated. Waiting for the instance to terminate..." | ||||||
if aws ec2 wait instance-terminated --instance-ids "$INSTANCE_ID"; then | ||||||
echo "Instance $INSTANCE_ID has been successfully terminated." | ||||||
else | ||||||
echo "Warning: Instance $INSTANCE_ID may not have terminated correctly." | ||||||
fi | ||||||
else | ||||||
echo "Warning: Failed to terminate instance $INSTANCE_ID. It may already be terminated." | ||||||
fi | ||||||
|
||||||
# Release Elastic IP | ||||||
echo "Releasing Elastic IP with Allocation ID: $ALLOC_ID..." | ||||||
if aws ec2 release-address --allocation-id "$ALLOC_ID"; then | ||||||
echo "Elastic IP with Allocation ID $ALLOC_ID has been successfully released." | ||||||
else | ||||||
echo "Warning: Failed to release Elastic IP with Allocation ID $ALLOC_ID. It may already be released." | ||||||
fi | ||||||
|
||||||
# Cleanup environment file | ||||||
if [ -f "$ENV_FILE" ]; then | ||||||
echo "Removing environment file $ENV_FILE..." | ||||||
rm -f "$ENV_FILE" | ||||||
fi | ||||||
|
||||||
echo "Cleanup complete." |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
import time | ||
import json | ||
import sys | ||
import gzip | ||
from datetime import datetime | ||
|
||
def list_files_with_metadata(directory, output_file): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. write a simple test, could even probably be in this file, where you populate directory with nested folders and symlinks and you know the ground truth to aim for and compare against. |
||
# Record the start time | ||
start_time = time.time() | ||
|
||
# Get the current date and time for indexing | ||
index_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | ||
|
||
files_metadata = [] | ||
|
||
for root, dirs, files in os.walk(directory): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FTR. walk seems to be already doing desired (right) thing and does not follow symlinked folders. We get
for
note: we do not monitor empty folders below |
||
for name in files: | ||
filepath = os.path.join(root, name) | ||
relative_path = os.path.relpath(filepath, directory) | ||
|
||
try: | ||
metadata = { | ||
"path": relative_path, | ||
"size": os.path.getsize(filepath), | ||
asmacdo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"modified": time.ctime(os.path.getmtime(filepath)), | ||
"created": time.ctime(os.path.getctime(filepath)) | ||
} | ||
files_metadata.append(metadata) | ||
except (FileNotFoundError, PermissionError) as e: | ||
print(f"Skipping {filepath}: {e}") | ||
asmacdo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Record the end time and calculate the duration | ||
end_time = time.time() | ||
duration = end_time - start_time | ||
|
||
# Prepare the output data with additional metadata | ||
output_data = { | ||
"index_timestamp": index_timestamp, | ||
"duration_seconds": duration, | ||
"files": files_metadata | ||
} | ||
|
||
# Compress and write the output data to a .json.gz file | ||
with gzip.open(output_file, "wt", encoding="utf-8") as gz_file: | ||
json.dump(output_data, gz_file, indent=4) | ||
|
||
print(f"Indexing completed. Compressed results written to {output_file}") | ||
|
||
# Ensure the script is called with the required arguments | ||
if __name__ == "__main__": | ||
if len(sys.argv) != 3: | ||
print("Usage: python script.py <directory_to_index> <output_json_gz_file>") | ||
sys.exit(1) | ||
|
||
directory_to_index = sys.argv[1] | ||
output_json_gz_file = sys.argv[2] | ||
|
||
# Ensure the output filename ends with .json.gz for clarity | ||
if not output_json_gz_file.endswith(".json.gz"): | ||
output_json_gz_file += ".json.gz" | ||
|
||
list_files_with_metadata(directory_to_index, output_json_gz_file) | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,133 @@ | ||||||
#!/usr/bin/env bash | ||||||
|
||||||
set -e | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
# Check for AWS CLI and credentials | ||||||
if ! command -v aws &>/dev/null; then | ||||||
echo "Error: AWS CLI is not installed. Please install it and configure your credentials." | ||||||
exit 1 | ||||||
fi | ||||||
|
||||||
if ! aws sts get-caller-identity &>/dev/null; then | ||||||
echo "Error: Unable to access AWS. Ensure your credentials are configured correctly." | ||||||
exit 1 | ||||||
fi | ||||||
|
||||||
# Set variables | ||||||
AWS_REGION="us-east-2" | ||||||
# TODO document that this key needs to be created | ||||||
KEY_NAME="dandihub-gh-actions" | ||||||
# TODO create if DNE | ||||||
# allow gh-actions to ssh into ec2 job instance from anywhere | ||||||
SECURITY_GROUP_ID="sg-0bf2dc1c2ff9c122e" | ||||||
# TODO retrieve subnet id (public, created by dandi-hub eks-dandihub-public-us-east-2a) | ||||||
SUBNET_ID="subnet-0f544cca61ccd2804" | ||||||
AMI_ID="ami-088d38b423bff245f" | ||||||
EFS_ID="fs-02aac16c4c6c2dc27" | ||||||
LOCAL_SCRIPTS_DIR=".github/scripts" | ||||||
REMOTE_SCRIPTS_DIR="/home/ec2-user/scripts" | ||||||
MOUNT_POINT="/mnt/efs" | ||||||
ENV_FILE=".ec2-session.env" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dislike the fact that it would be just dumping into some hidden file in my current directory.
Suggested change
but then we might want to add logic to react if file already exist -- since that would mean likely that cleanup did not remove it and instance might still be running etc. |
||||||
|
||||||
# Ensure the environment file is writable | ||||||
echo "# Environment variables for EC2 session" > $ENV_FILE | ||||||
echo "# Auto-generated by launch script on $(date)" >> $ENV_FILE | ||||||
|
||||||
# Run EC2 instance | ||||||
echo "Launching EC2 instance..." | ||||||
export INSTANCE_ID=$(aws ec2 run-instances \ | ||||||
--image-id $AMI_ID \ | ||||||
--count 1 \ | ||||||
--instance-type t3.micro \ | ||||||
--key-name $KEY_NAME \ | ||||||
--security-group-ids $SECURITY_GROUP_ID \ | ||||||
--subnet-id $SUBNET_ID \ | ||||||
--tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=dandihub-gh-actions}]" \ | ||||||
--query 'Instances[0].InstanceId' \ | ||||||
--output text) | ||||||
|
||||||
if [ -z "$INSTANCE_ID" ]; then | ||||||
echo "Error: Failed to launch EC2 instance." | ||||||
exit 1 | ||||||
fi | ||||||
echo "Instance ID: $INSTANCE_ID" | ||||||
echo "export INSTANCE_ID=$INSTANCE_ID" >> $ENV_FILE | ||||||
|
||||||
# Wait for instance to initialize | ||||||
echo "Waiting for instance to reach status OK..." | ||||||
aws ec2 wait instance-status-ok --instance-ids "$INSTANCE_ID" | ||||||
|
||||||
# Allocate Elastic IP | ||||||
echo "Allocating Elastic IP..." | ||||||
export ALLOC_ID=$(aws ec2 allocate-address \ | ||||||
--tag-specifications "ResourceType=elastic-ip,Tags=[{Key=Name,Value=dandihub-gh-actions-eip}]" \ | ||||||
--query 'AllocationId' \ | ||||||
--output text) | ||||||
|
||||||
if [ -z "$ALLOC_ID" ]; then | ||||||
echo "Error: Failed to allocate Elastic IP." | ||||||
exit 1 | ||||||
fi | ||||||
echo "Elastic IP Allocation ID: $ALLOC_ID" | ||||||
echo "export ALLOC_ID=$ALLOC_ID" >> $ENV_FILE | ||||||
|
||||||
# Associate Elastic IP with instance | ||||||
echo "Associating Elastic IP with instance..." | ||||||
export EIP_ASSOC=$(aws ec2 associate-address \ | ||||||
--instance-id "$INSTANCE_ID" \ | ||||||
--allocation-id "$ALLOC_ID" \ | ||||||
--query 'AssociationId' \ | ||||||
--output text) | ||||||
|
||||||
if [ -z "$EIP_ASSOC" ]; then | ||||||
echo "Error: Failed to associate Elastic IP." | ||||||
exit 1 | ||||||
fi | ||||||
|
||||||
# Get Elastic IP address | ||||||
export PUBLIC_IP=$(aws ec2 describe-addresses \ | ||||||
--allocation-ids "$ALLOC_ID" \ | ||||||
--query 'Addresses[0].PublicIp' \ | ||||||
--output text) | ||||||
|
||||||
echo "Elastic IP Address: $PUBLIC_IP" | ||||||
echo "export PUBLIC_IP=$PUBLIC_IP" >> $ENV_FILE | ||||||
|
||||||
# Upload scripts to EC2 instance | ||||||
echo "Uploading scripts to EC2 instance..." | ||||||
scp -i "$EC2_SSH_KEY" -o "StrictHostKeyChecking=no" \ | ||||||
$LOCAL_SCRIPTS_DIR/produce-report.py $LOCAL_SCRIPTS_DIR/create-file-index.py \ | ||||||
ec2-user@"$PUBLIC_IP":"$REMOTE_SCRIPTS_DIR/" | ||||||
|
||||||
if [ $? -eq 0 ]; then | ||||||
echo "Scripts uploaded successfully to $REMOTE_SCRIPTS_DIR on the instance." | ||||||
else | ||||||
echo "Error: Failed to upload scripts to the instance." | ||||||
exit 1 | ||||||
fi | ||||||
|
||||||
# TODO automate | ||||||
# eks-dandihub-efs sg is created by dandi-hub install | ||||||
# this sg needs to accept incoming 2049 from the sg created for this ec2 | ||||||
# sg-061d875722e569724 - eks-dandihub-efs | ||||||
# aws ec2 authorize-security-group-ingress \ | ||||||
# --group-id sg-061d875722e569724 \ | ||||||
# --protocol tcp \ | ||||||
# --port 2049 \ | ||||||
# --source-group $SECURITY_GROUP_ID | ||||||
|
||||||
# Mount EFS on the EC2 instance | ||||||
echo "Mounting EFS on the EC2 instance..." | ||||||
ssh -i "$EC2_SSH_KEY" -o "StrictHostKeyChecking=no" ec2-user@"$PUBLIC_IP" \ | ||||||
"sudo yum install -y amazon-efs-utils && \ | ||||||
sudo mkdir -p $MOUNT_POINT && \ | ||||||
sudo mount -t efs $EFS_ID:/ $MOUNT_POINT && \ | ||||||
echo '$EFS_ID:/ $MOUNT_POINT efs defaults,_netdev 0 0' | sudo tee -a /etc/fstab && \ | ||||||
echo 'EFS mounted at $MOUNT_POINT'" | ||||||
|
||||||
# Output SSH command for convenience | ||||||
echo "To connect to your instance, use:" | ||||||
echo "ssh -i \$EC2_SSH_KEY ec2-user@$PUBLIC_IP" | ||||||
|
||||||
echo "Environment variables saved to $ENV_FILE." | ||||||
echo "Run 'source $ENV_FILE' to restore the environment variables." |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.