-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathTODO
executable file
·306 lines (250 loc) · 10.5 KB
/
TODO
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
CH_TODO list:
==========
sample:
__enum (state, 2, (
idle,
startup,
execute,
shutdown
));
__struct (pt_cart_t,(
(ch_bit8) x,
(ch_bit8) y
));
__struct (pt_polar_t,(
(ch_bit8) r,
(ch_bit8) theta
));
__union (point_t,(
(pt_cart_t) c,
(pt_polar_t) p
));
*** add support for any datatype for mem/rom template T
*** Understanding conditional assignment ***
ch_bit4 a;
if (x > 0) {
a[0-2] = 0;
a[1] = 1; // override a[1]
if (y > 0) {
a[0] = 1; // override a[0]
a[3] = 1;
}
} else {
a = 0;
}
*********************************************
X*** automatic dead code elimination
X*** add detection of duplicate assignments inside control block and outside (very important for bugs resolution)
X*** Assignment rule: by constructor assignent is ignored during check
*** prevent memory allocation inside conditionals
*** fix memport reassignment
*** assigning a vector fron memory is broken (and reading the vector inside the branch)
*** the branch.owner concept is broken (a nested assignment to global would not prevent outer assignment) - use bitmask intead to check overrides
*** subscripts use inside control statement still broken
*** investigate alternative proxy implementation based on bitmask for simplicity
*** find a way to fix ch_reg such that it is initialized to itself by default (next -> this)
*** should consider gettng rid of defer initialization once auto dead code elimination is supported (will remove all nasty hacks in code where dst == nullptr is a worry)
*** address the issue why friend member still don't get implicit template constructor (e.g. ch_min, ch_sel, ...)
*** handling floating-point delays: (y = a * b) => ch_decoupled<ch_float32> object exporting ready/valid signals to be connected
*** complete float implementation (compare operations)
*** make ch_mem store the enclosed type like ch_mem<ch_bit32, 10>
*** use callstack api to save source file/line for undefined variables. check out GDB api (https://github.com/brasko/gdbwire) and libcwd (http://libcwd.sourceforge.net/index.html)
*** move all the math routines into the bitvector class, then accelerate the bitvector class using intel SSE/AVX intrinsics
*** add support for ch_reg implace operators ++(), --(), >>=(), <<=(), &=(), |=(), ^=(), +=(), -=(), *=(), \=()
*** test ch_shuffle
*** printing struct, union, enum values using reflection
*** allow ch_println() to use __if and __switch
*** handshking valid/ready signals
- implement ch_call(function, args...) using variadic template to generate modules binding
- implement bitbase.clone() to enable fork assignments
- prevent assignments to yourself (e.g. a = a, a[0] = a[1])
- implement ch_make_device() to use compiler time binding
- add C++ user defined literals for _b, _h, _d, _o convertion to bit<N> and bus<N>
- create ../configure --prefix / make / make install scripts
- implement cash namespaces ch::logic, ch::system, ch::stl
- to_verilog() renaming module arguments
- investigate using std::bind and variadic functions for device creation
- add mutex and fifo primitives for TML modelling
- move memory functions into classes to support onetime initialization: ch_mem<N, W, AsyncR> mem(..); x = mem.read(addr); mem.write(en, addr, val);
- support ch_device_interface<type> for user to derive and implement emit()
- enforce type safety in CASH for explicit size casting
- experiment if/else using lambdas ch_when(a > 1, []{...}).when(b > 1, []{...}).otherwise([]{...})
- revisit the next() operator concept to make reg implicit
- check out usefull AVX shuffle api to support in CASH
- CASH conditionals: ch_when(?, :=).when(?, :=).otherwise(:=)
- global Reset device->reset() => manually reset all nodes in device to zero.
- FSM designs integration
- should generate high-level verilog
- refactor ch_tristate class => movech_bit<1>tion
- implement unit test framework like in MyHDL/chisel
- fix hierarchy support
- implement netlist export/import
- implement binary export/import
- implement AAL toolchain
- add direct initialization buffer support for ch_mem and ch_mem
- extend operators argument to support different bit size
- refactor ch_llrom
- add file initialization support to ch_llmem
- implement __union and __struct as part of aggregate.h
- implement ch_int and ch_unit classes
- implement plugin interface for attaching other simulators
- implement default simulator loging
- add source location information to user objects for debugging
- implement module functions generation and reuse
- implement new analysis api
- implement new netlist export/import
- implement GCC "#pragma cash module" to convert native functions to CASH module
- redesign schematic export to use new clock design
- accelerate simulation using a virtual machine
- support for high-level custom compoment plugins (with query cycle and gate count)
- support for persistence
Completed:
==========
- refactor ch_delay class => move to function
- refactor ch_wire class => all objects operate as wires before first assignment
- move ch_mem into llmem.h
- create cash.h header file
- adding native support for integers operations
- add CASH_ to all header files #define declarations
- implement ch::internal to cleanup api namespace
- keep all implementation specific headers private
- append ch_ to CASH public types and functions
- runtime assertion support for testing
- explicit clock implementation
- explicit clock simulation
- redesign concatenation support to use the comma operation
- add special assign operator '<=' for non-blocking assignments
- implement signal class
- implement efficient proxy classes for concatenate and slice operators
- headers cleanup/refactoring
- implement true assignment operator
- generic clock signal generation
- register enable/reset control
- optimize global states management (static objects => scoped objects)
- fix signals update during simulation (should only optimize read-only literals)
- redesign optimizer + remove all redundant loops
- redesign optimizer + optimize dead_code_removal calls
- optimize all loop to use auto& construct
- use C++ 11 std containers emplace api to reduce object construction overhead
- automic device instantiation
- new vcd simulator class
- implement ch_sel
- implement ch_case
- update llrom
- update llram
- fix memory implementation
- replace abort() with runtime assertion macro
- implement ch_pushcd, ch_popcd api (check out the opengl matrix api)
- implement new verilog generator
notes:
======
// debugging environment variables
export CASH_DEBUG_NODE=32
export CASH_DEBUG_LEVEL=2
export CASH_DUMP_AST=1
// infering muliple cycles operations
y = ch_pipeline(x, 2) = ch_delay(ch_delay(x));
z = ch_pipeline(x * y, 2); => z = ch_delay(mul1(x,y,ch_delay(mul0(x,y)));
// AOCL profiling report
aocl report jpeg_decoder.aocx profile.mon
// sync submodules
git submodule update --init --recursive
// Detecting leaks using valgrind
$ valgrind --leak-check=yes <app> <args...> > valgrind.log 2>&1
$ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes <app> <args...> > valgrind.log 2>&1
// CPU Profiler
$ valgrind --tool=callgrind <app> <args...>
$ kcachegrind
// google perf tools
$ CPUPROFILE_FREQUENCY=100000 CPUPROFILE=profiler.log ./aes.out -e
$ google-pprof --callgrind ./aes.out profiler.log > profiler.callgrind
$ kcachegrind profiler.callgrin
// cmake build
cmake -DCMAKE_BUILD_TYPE=Release .. && make -j`nproc` all test
cmake -DCMAKE_BUILD_TYPE=Debug .. && make -j`nproc` all test
cmake -DCMAKE_BUILD_TYPE=Release -DJIT=LIBJIT .. && make -j`nproc` all test
cmake -DCMAKE_BUILD_TYPE=Debug -DJIT=LIBJIT .. && make -j`nproc` all test
cmake -DCMAKE_BUILD_TYPE=Debug -DCODECOV=ON .. && make -j`nproc` all test && ../codecov.sh .
// dump assembly
objdump -d a.out > a.asm
// debugging C macros
g++ -std=c++17 -O0 -g -Wall -Wextra -Werror -E -dD dogfood.cpp > dogfood.log
clang-format-5.0 dogfood.log -i
// Detecting Cycles in CASH?
// cycles only occurs when combinatorial nets eval() function is entered a second time without having exited.
bool nandimpl::eval(ch_tick t) {
if (tick_ != t) {
if (tick_ > t) {
CASH_ABBORT("found cycle");
}
tick_ = t + 1;
cval_ = !(this->src(0).eval(t) && this->src(1).eval(t));
tick_ = t;
}
return cval_;
}
//
// alternative CASH programming model
// enable realtime debugging in behavioral mode
//
template <unsigned N>
void Counter(ch_bit<N>& out) {
ontick([&]() {
out = out + 0x1;
});
}
// user-defined literals for binary numbers
constexpr unsigned long long Scale(unsigned long long x, const char* s) {
return (!*s ? x : Scale(10*x + ((unsigned long long)*s)-((unsigned long long)'0'), s+1));
}
constexpr unsigned long long ToScaledBinary(unsigned long long x, unsigned long long scale, const char* s) {
return (!*s
? x
: ( *s == 'e' || *s == 'E'
? (x << Scale(0,s+1)) // put the digits in the right position
: ToScaledBinary(x + x + (*s =='1'? 1 : 0), scale, s+1)));
}
std::cout << std::hex << "0x" << 1111111111_b << std::endl;
std::cout << std::hex << "0x" << 1111111111e16_b << std::endl;
//--
ch_device<Adder<2>> my_adder;
__tie(out, cout) = my_adder.bind(lhs, rhs, cin);
template <typename From, typename To>
class is_explicitly_convertible {
private:
template <size_t N>
struct dummy_t {};
template <typename F, typename T>
static std::true_type check(dummy_t<sizeof(static_cast<T>(std::declval<F>()))>*);
template <typename F, typename T>
static std::false_type check(...);
public:
static constexpr bool value = decltype(check<From, To>(nullptr))::value;
};
#include <iostream>
template <typename Child>
struct Base
{
void interface()
{
static_cast<Child*>(this)->implementation();
}
};
struct Derived : Base<Derived>
{
void implementation()
{
cerr << "Derived implementation\n";
}
};
int main()
{
Derived d;
d.interface(); // Prints "Derived implementation"
}
http://stackoverflow.com/questions/25513380/c-style-callbacks-in-c11
http://en.cppreference.com/w/cpp/thread/packaged_task
http://stackoverflow.com/questions/38561242/struct-to-from-stdtuple-conversion
clang++-9 -std=c++17 -I../include -lcash -L$CASH_HOME/build/lib -Xclang -load -Xclang lib/libcashpp.so -Xclang -add-plugin -Xclang cash-pp -Xclang -plugin-arg-cash-pp -Xclang debug -Xclang -plugin-arg-cash-pp -Xclang dump-ast ../examples/adder.cpp > test.log 2>&1
clang++-9 -std=c++17 -I../include -Xclang -load -Xclang lib/libcashpp.so -Xclang -add-plugin -Xclang cash-pp -Xclang -plugin-arg-cash-pp -Xclang debug -Xclang -plugin-arg-cash-pp -Xclang dump-ast ../clang/test.cpp > test.log 2>&1
bundle exec jekyll serve --trace