Skip to content

121 add use cases semiprime factors ratio detection #126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions .github/workflows/automerge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python application

on:
push:
branches: [ "main", "109-spelling-errors-in-license" ]
branches: [ "main", "121-add-use-cases-semiprime-factors-ratio-detection" ]

permissions:
contents: read
Expand All @@ -24,6 +24,7 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip3 install -r cicd-requirements.txt
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand All @@ -43,12 +44,17 @@ jobs:
run: python3 regression-example-ames-no-preproc.py
- name: Test distributed random search Ames by running - Val set
run: python3 regression-example-ames-no-preproc-val-set.py
- name: Test text classifier - random search - ham-spam
run: python3 text-class-ham-or-spam.py
timeout-minutes: 90
# - name: Test text classifier - random search - ham-spam
# run: python3 text-class-ham-or-spam.py
# timeout-minutes: 90
# - name: Test image classifier - small subset of CIFAR10
# timeout-minutes: 90
# run: python3 cifar10-example.py
- name: Test image classifier EfficientNetv2S - small subset of CIFAR10
# - name: Test image classifier EfficientNetv2S - small subset of CIFAR10
# timeout-minutes: 240
# run: python3 cifar-10-efficientnetv2s.py
- name: Test semiprime factor ratio class
timeout-minutes: 240
run: python3 cifar-10-efficientnetv2s.py
run: python3 semiprime_factor_ratio_class.py


3 changes: 3 additions & 0 deletions cicd-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
tensorflow-datasets==4.9.3
gmpy2==2.1.5
scikit-learn==1.3.2
6,676 changes: 6,676 additions & 0 deletions semiprime_factor_ratio_class.ipynb

Large diffs are not rendered by default.

127 changes: 127 additions & 0 deletions semiprime_factor_ratio_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#
# This test is based on Sam Blake's preprint "Integer Factorisation, Fermat &
# Machine Learning on a Classical Computer", arXiv:2308.12290
#
# Detecting the ratio of semiprime factors may theoretically help improving
# the classical Lawrence algorithm for semiprime factorization
#

import numpy as np
import gmpy2
from gmpy2 import mpz, mpq, mpfr
from cerebros.simplecerebrosrandomsearch.simple_cerebros_random_search\
import SimpleCerebrosRandomSearch
import pendulum
import pandas as pd
import tensorflow as tf
from cerebros.units.units import DenseUnit
from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component\
import zero_7_exp_decay, zero_95_exp_decay, simple_sigmoid
from ast import literal_eval
from sklearn.metrics import confusion_matrix

TIME = pendulum.now().__str__()[:16]\
.replace('T', '_')\
.replace(':', '_')\
.replace('-', '_')
PROJECT_NAME = f'{TIME}_cerebros_auto_ml_test_semiprimes'

raw_data = pd.read_csv("semiprime_ratio_training_data_128_ratio_2_3.csv")
raw_data.iloc[:,0] = raw_data.iloc[:,0].apply(mpz)
raw_data = raw_data.values
X = np.array(list(map(lambda x: [int(i) for i in gmpy2.digits(x,2)], raw_data[:,0])), dtype=np.int32)
y = np.array(raw_data[:,1], dtype=np.int32)

num_data_points = 200_000
X_train, y_train = X[:num_data_points,:], y[:num_data_points]
X_test, y_test = X[num_data_points:,:], y[num_data_points:]

tensor_x = tf.constant(X_train)
training_x = [tensor_x]
INPUT_SHAPES = [training_x[i].shape[1] for i in np.arange(len(training_x))]
train_labels = [y_train]
OUTPUT_SHAPES = [1]

meta_trial_number = 42 # in distributed training set this to a random number

activation = "relu"
predecessor_level_connection_affinity_factor_first = 19.613
predecessor_level_connection_affinity_factor_main = 0.5518
max_consecutive_lateral_connections = 34
p_lateral_connection = 0.36014
num_lateral_connection_tries_per_unit = 11
learning_rate = 0.095
epochs = 10
batch_size = 634
maximum_levels = 5
maximum_units_per_level = 5
maximum_neurons_per_unit = 25

cerebros_automl =\
SimpleCerebrosRandomSearch(
unit_type=DenseUnit,
input_shapes=INPUT_SHAPES,
output_shapes=OUTPUT_SHAPES,
training_data=training_x,
labels=train_labels,
validation_split=0.35,
direction='maximize',
metric_to_rank_by='val_binary_accuracy',
minimum_levels=1,
maximum_levels=maximum_levels,
minimum_units_per_level=1,
maximum_units_per_level=maximum_units_per_level,
minimum_neurons_per_unit=1,
maximum_neurons_per_unit=maximum_neurons_per_unit,
activation=activation,
final_activation='sigmoid',
number_of_architecture_moities_to_try=3,
number_of_tries_per_architecture_moity=2,
number_of_generations=3,
minimum_skip_connection_depth=1,
maximum_skip_connection_depth=7,
predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,
predecessor_level_connection_affinity_factor_first_rounding_rule='ceil',
predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,
predecessor_level_connection_affinity_factor_main_rounding_rule='ceil',
predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,
seed=8675309,
max_consecutive_lateral_connections=max_consecutive_lateral_connections,
gate_after_n_lateral_connections=3,
gate_activation_function=simple_sigmoid,
p_lateral_connection=p_lateral_connection,
p_lateral_connection_decay=zero_95_exp_decay,
num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,
learning_rate=learning_rate,
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.TrueNegatives(),
tf.keras.metrics.FalseNegatives(), tf.keras.metrics.FalsePositives(),
tf.keras.metrics.TruePositives()],
epochs=epochs,
patience=7,
project_name=f"{PROJECT_NAME}_meta_{meta_trial_number}",
model_graphs='model_graphs',
batch_size=batch_size,
meta_trial_number=meta_trial_number)

result = cerebros_automl.run_random_search()
best_model_found = cerebros_automl.get_best_model()

trainable_params = np.sum([np.prod(w.get_shape()) for w in best_model_found.trainable_weights])
non_trainable_params = np.sum([np.prod(w.get_shape()) for w in best_model_found.non_trainable_weights])
total_params = trainable_params + non_trainable_params

print(f"Best model found: {total_params} total parameters ({trainable_params} trainable, {non_trainable_params} non-trainable)")

print(f"Best accuracy is ({cerebros_automl.metric_to_rank_by}): {result}")

# best_model_found.compile()
best_model_found.summary()

y_pred = best_model_found.predict(X_test)
threshold = y_pred.mean()
y_pred = (y_pred > threshold).astype(int)

cm = confusion_matrix(y_test, y_pred, normalize='all')
print("Confusion matrix")
print(cm)
Loading