-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot3D.R
98 lines (79 loc) · 3.37 KB
/
plot3D.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Plot3D.R creates 3D scatterplot from data, which was prepared using fit.R
# Usage: Rscript plot.R input.txt output.png 0.1
# 0.1 is thresholp value of NBD parameters k and p.
# Try experimenting with this value to zoom in and out of the plot.
# Z-axis id Document Frequency (DF), the higher the value, the more
# text in the corpus contain the word.
# Interesting values are between 0.1 and 0.7, see graphs in the repo.
# Check if both input and output file names are provided
if (length(commandArgs(trailingOnly = TRUE)) < 4) {
stop("Please provide input.txt, output.png, rotation angle, threshold as command line arguments; recommended threshold is 0.2")
}
# Get the input and output file names from the command line
input_file <- commandArgs(trailingOnly = TRUE)[1]
output_file <- commandArgs(trailingOnly = TRUE)[2]
angle <- as.numeric(commandArgs(trailingOnly = TRUE)[3])
threshold <- as.numeric(commandArgs(trailingOnly = TRUE)[4])
# Read the data from the text file
data <- read.table(input_file, sep = "\t", header = TRUE)
# Extract columns k, p, DF, sum
x_values <- data$k
y_values <- data$p
z_values <- data$DF
fr <- data$sum
# Filter data based on conditions
filtered_data <- subset(data, k < threshold & p < threshold)
# Extract columns k and p from the filtered data
x_values <- log(filtered_data$k) # Apply log scale to x-axis
y_values <- filtered_data$p
z_values <- filtered_data$DF
fr <- filtered_data$sum
# Set up PNG output file
png(
output_file,
width = 6.25,
height = 6.25,
units = "in",
res = 1500,
pointsize = 2
)
# Set margins
par(mar = c(10, 10, 10, 10) + 0.1) # c(bottom, left, top, right) + extra space
# 3D Plot
library(scatterplot3d)
# Create the scatter plot
scatterplot <- scatterplot3d(
x = x_values,
y = y_values,
z = z_values,
main = paste("Input:", input_file, " Output:", output_file, " Threshold =", threshold, "Log scale for k-value."),
#clab = "Document\nFrequency",
#xlab = "\nNBD k (log scale),
#ylab = "\nNBD p",
#zlab = "\nDocument Frequency",
xlab = colnames(data)[2],
ylab = colnames(data)[3],
zlab = colnames(data)[6],
pch = 20,
angle = angle,
#cex = 2,
#cex.main = 2,
#cex.lab = 2,
grid=TRUE,
box=FALSE)
# Scale the size variable (fr, word frequency, or 'sum' in our data) to be between a desired range
scaled_size <- 1 + 10 * (fr - min(fr)) / (max(fr) - min(fr))
# Set the transparency level (alpha) for points
alpha <- 0.2 # Adjust the transparency level as needed (0 = fully transparent, 1 = opaque)
# Create a color vector with opacity
point_colors <- rgb(0, 0, 1, alpha = alpha)
# Add points with adjusted size and opacity based on fr
points(scatterplot$xyz.convert(x_values, y_values, z_values), pch = 20, col = point_colors, cex = scaled_size)
# Add labels from column A to each point
#text(scatterplot$xyz.convert(x_values, y_values, z_values), labels = filtered_data$word, pos = 3, col = "red", cex = 1)
# Add labels from column A to each point with size dependent on z_values
normalized_z <- (z_values - min(z_values)) / (max(z_values) - min(z_values)) # Normalize z_values between 0 and 1
label_size <- 1 + 3 * normalized_z # Adjust the scaling factor as needed
text(scatterplot$xyz.convert(x_values, y_values, z_values), labels = filtered_data$word, pos = 3, col = "red", cex = label_size)
# Save the plot
dev.off()