Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
241 commits
Select commit Hold shift + click to select a range
7265c00
Setting up GitHub Classroom Feedback
github-classroom[bot] Apr 6, 2022
1bb3dcf
Allow threading in CMakeLists.txt
drew-harris Apr 7, 2022
643816c
Install rapidjson
drew-harris Apr 7, 2022
e23972e
Parse single json file (test) (main.cpp)
drew-harris Apr 7, 2022
2359882
Multithreading superjson reads from folder
drew-harris Apr 7, 2022
b8d2409
Loop through every word
drew-harris Apr 7, 2022
bf137a8
Output seconds
drew-harris Apr 7, 2022
98eae49
created basic header definition for hash map class titled "HashOrdere…
adamcesco Apr 7, 2022
c5c4d5d
defined some basic methods for HashOrderedMap class. Created an in-cl…
adamcesco Apr 7, 2022
46d7a09
fixed minor bugs, added a definition for HashOrderedMap<T, U>& clear_…
adamcesco Apr 7, 2022
e8576cc
optimized method "int increase_max_cap(unsigned int)" and finished de…
adamcesco Apr 7, 2022
388bc7b
found and fixed memory mishandling error in "HashOrderedMap::clear()"
adamcesco Apr 7, 2022
80b8cd0
defined and tested copy constructor and assignment constructor for Ha…
adamcesco Apr 7, 2022
9cbf7a1
stress tested this the custom HashOrderedMap class by creating a Hash…
adamcesco Apr 7, 2022
8b78901
Added more tests for HashOrderedMap class. Added "bool contains(const…
adamcesco Apr 7, 2022
8dce324
added more documentation for method use
adamcesco Apr 7, 2022
7d38c8f
cleared all valgrind messages by editing the order of boolean logic o…
adamcesco Apr 7, 2022
b466e54
fixed more boolean logic operations to potentially remove future valg…
adamcesco Apr 7, 2022
4b3c2f1
created a private constructor that takes an integer that will determi…
adamcesco Apr 7, 2022
3a36582
Updated/increased documentation. renamed files and classes to be more…
adamcesco Apr 7, 2022
9ab2da8
Add catch test for clean merge
drew-harris Apr 7, 2022
f0db797
Merge pull request #3 from Adam/dictionary-implementation
drew-harris Apr 7, 2022
6ed0e12
Merge branch 'main' into install-rapid-json-and-test
drew-harris Apr 7, 2022
af7d691
Merge pull request #2 from SMUCSE2341/install-rapid-json-and-test
drew-harris Apr 7, 2022
b18d275
added definitions for merge_with() and overlap_with() methods for cla…
adamcesco Apr 7, 2022
3357102
minor optimization edits for new methods
adamcesco Apr 7, 2022
f3ce936
fixed minor bug in "hash_ordered_map<T, U> &hash_ordered_map<T, U>::m…
adamcesco Apr 7, 2022
f2dbb5d
Formatting
drew-harris Apr 7, 2022
1c6d6ad
wrote tests for, and solved all presented bugs within, "hash_ordered_…
adamcesco Apr 7, 2022
9d25e0f
wrote tests for, and solved all presented bugs within, "hash_ordered_…
adamcesco Apr 7, 2022
a80dd39
updated documentation
adamcesco Apr 7, 2022
bcaca2d
Merge branch 'main' into dictionary-implementation
drew-harris Apr 7, 2022
87d47c2
Merge pull request #4 from SMUCSE2341/dictionary-implementation
drew-harris Apr 7, 2022
9cdfb5c
basic editing
adamcesco Apr 7, 2022
5b3c38f
added support for custom hash functions
adamcesco Apr 8, 2022
6148d50
created new AVL tree class and it's header definition
adamcesco Apr 8, 2022
4e3cc72
Create search engine class and small dataset
drew-harris Apr 8, 2022
72b01d9
Link processor and search engine class
drew-harris Apr 8, 2022
e1c0092
Merge branch 'main' into class-organization
drew-harris Apr 8, 2022
c1ad3ca
Create process method and atomic count for progress bar
drew-harris Apr 8, 2022
b2b6a0d
Input file names in queue
drew-harris Apr 8, 2022
b87d59e
Count total files
drew-harris Apr 8, 2022
39532a4
Progress display
drew-harris Apr 8, 2022
bb569b0
Actually parse files
drew-harris Apr 8, 2022
0cc303c
Remove debug prints
drew-harris Apr 8, 2022
03c0318
Awesome progress bar
drew-harris Apr 8, 2022
714f6d4
Change progress bar style
drew-harris Apr 8, 2022
716164e
defined some basic AVL methods. No methods have been tested, and some…
adamcesco Apr 8, 2022
0553c33
defined the balance methods and the fixed bug within height updating …
adamcesco Apr 8, 2022
63607f0
Replace std map with our map
drew-harris Apr 9, 2022
0d4725c
Change spacing
drew-harris Apr 9, 2022
6fc1bd6
Bundle tables and locks
drew-harris Apr 9, 2022
ccf22ff
Color and center output
drew-harris Apr 9, 2022
74bc8ab
defined all methods needed for a working avl tree
adamcesco Apr 9, 2022
54626a8
created some basic tests and found bugs within the rotation methods
adamcesco Apr 9, 2022
8934de4
found a new bug when inserting new nodes in mass. There is miscommuni…
adamcesco Apr 9, 2022
f09a8d0
Merge: Class organization
drew-harris Apr 9, 2022
e5e3ff8
Merge branch 'main' into AVL-tree-implementation
drew-harris Apr 9, 2022
f4a32fe
Merge: AVL Tree
drew-harris Apr 9, 2022
129034e
Middle of stuff
drew-harris Apr 9, 2022
83ca3dc
Changes by escob
drew-harris Apr 9, 2022
5c909d8
changes to avl_tree node stitching during rotations
adamcesco Apr 9, 2022
f63a82c
Multithreaded tables
drew-harris Apr 9, 2022
bf70feb
Merge remote-tracking branch 'origin/main' into main
drew-harris Apr 9, 2022
f352f84
added nullptr handling
adamcesco Apr 9, 2022
bb9b786
Remove extra table call
drew-harris Apr 9, 2022
155cfc1
removing cout statements that were used for debugging
adamcesco Apr 9, 2022
5d035f7
added documentation
adamcesco Apr 9, 2022
3b5792c
Merge pull request #7 from SMUCSE2341/table-multithreading
drew-harris Apr 9, 2022
6f195b5
Merge pull request #8 from SMUCSE2341/avl_tree_stictch_editing
drew-harris Apr 9, 2022
06b39d5
Add words to tree
drew-harris Apr 9, 2022
1e145dc
Reduce progress bar refresh rate
drew-harris Apr 9, 2022
3da2548
Move into unordered map
drew-harris Apr 9, 2022
25f2009
minor avl tree optimizations and added new insert-type methods
adamcesco Apr 9, 2022
1e5ce57
Merge pull request #9 from SMUCSE2341/avl_tree_optimization
drew-harris Apr 9, 2022
1d04d28
Add progress bar for tree conversion
drew-harris Apr 9, 2022
dd00b62
Merge branch 'main' into map-strategy
drew-harris Apr 9, 2022
6237c9b
Noticed that the pivot node is the resulting highest node that has be…
adamcesco Apr 9, 2022
d0c6d62
Reduce the amount of times the height of the tree is updated to incre…
adamcesco Apr 9, 2022
247d3d9
Merge branch 'avl_tree_optimization' into map-strategy
drew-harris Apr 9, 2022
736a9dd
BELL!!!!!!
drew-harris Apr 9, 2022
55a0421
Merge pull request #10 from SMUCSE2341/map-strategy
drew-harris Apr 9, 2022
fc86f06
erased some unneeded data members from "void Processor::process()". M…
adamcesco Apr 9, 2022
184b94a
Added documentation for users of the avl_tree class
adamcesco Apr 10, 2022
3df8912
added more and better documentation
adamcesco Apr 10, 2022
0d0a93a
Set up TBB with CMakeLists.txt
drew-harris Apr 11, 2022
1cfb38e
Use tbb for map
drew-harris Apr 11, 2022
18e789b
Minor optimizations
drew-harris Apr 11, 2022
612d207
Change display update time
drew-harris Apr 11, 2022
d442f37
Change everything to tbb maps
drew-harris Apr 11, 2022
572696a
Merge branch 'main' into tbb
drew-harris Apr 11, 2022
c13b30b
Delete tbb for submodule
drew-harris Apr 11, 2022
24d6615
Add tbb submodule
drew-harris Apr 11, 2022
06e9cef
Merge remote-tracking branch 'origin/tbb' into tbb
drew-harris Apr 11, 2022
8adec06
Add submodule warning
drew-harris Apr 11, 2022
d3d2831
Merge pull request #12 from SMUCSE2341/tbb
drew-harris Apr 11, 2022
0d20d7b
Updated stopwords to not have punctuation, moved some string manipula…
adamcesco Apr 11, 2022
86b5243
Renamed some method parameters within avl_tree class and added delete…
adamcesco Apr 11, 2022
d64e837
removed uneccessary methods from avl_tree class, and improved word-fi…
adamcesco Apr 11, 2022
f8f187d
Made minor improvements to increase speed
adamcesco Apr 11, 2022
97e11db
revised the rotation methods to no longer require an O(n) operation t…
adamcesco Apr 11, 2022
33ef471
updated when the height of a node is updated, should produce a comple…
adamcesco Apr 11, 2022
62666b9
experimental edits to avl_tree inserting algorithm
adamcesco Apr 11, 2022
26acc2f
fixed all bugs within node height calculation. avl_tree class is now …
adamcesco Apr 11, 2022
b51ae2b
Merge pull request #13 from SMUCSE2341/avl_optimization
adamcesco Apr 11, 2022
9db4784
Implemented the searching method to be used within the speed test
adamcesco Apr 11, 2022
03bbbef
made main safe for word-query speed tests and just parsing/tree-building
adamcesco Apr 11, 2022
951e2f5
Segfault on tree conversion
drew-harris Apr 11, 2022
b071952
Merge remote-tracking branch 'origin/main' into main
drew-harris Apr 11, 2022
c84768f
uuid values are now pushed into the tree. The tree values are now pro…
adamcesco Apr 12, 2022
e61682a
Changed data transferring algorithm to implement pointer assignment f…
adamcesco Apr 12, 2022
6fc7eb1
Wrote warnings for Processor::wordTree and Processor::tbbMap
adamcesco Apr 12, 2022
815b155
Use TBB for filename queue
drew-harris Apr 12, 2022
5926d90
Revert "Wrote warnings for Processor::wordTree and Processor::tbbMap"…
drew-harris Apr 12, 2022
a286548
revert everything
drew-harris Apr 12, 2022
7a99098
Remove the special thing that we need that makes it faster
drew-harris Apr 12, 2022
b1e0291
filling tbbMap with string pointers instead of string. This saves tim…
adamcesco Apr 13, 2022
0f1c45e
added stemming for the searched word
adamcesco Apr 13, 2022
69827c3
Add extra check before json parsing due to sample data error
drew-harris Apr 13, 2022
367dc48
optimized logic of the avl_tree insertion methods
adamcesco Apr 13, 2022
7677f72
Remove submodule warning
drew-harris Apr 13, 2022
2ea04b2
printing uuids
adamcesco Apr 13, 2022
3386a89
Merge branch 'main' into UUID_saving_optimization
adamcesco Apr 13, 2022
eacddf1
Merge pull request #14 from SMUCSE2341/UUID_saving_optimization
adamcesco Apr 13, 2022
80b5ee0
implemented and slightly tested node deletion
adamcesco Apr 13, 2022
b3fddec
wrote more tests for avl_tree node deletion
adamcesco Apr 13, 2022
514c598
removed all "illegal" multi-threading techniques. Implemented massive…
adamcesco Apr 14, 2022
3258aa6
implemented custom hashing function for strings
adamcesco Apr 14, 2022
6da7e77
optimized if-statements/reduces number of if-statements
adamcesco Apr 14, 2022
80e6c08
optimized if-statements/reduces number of if-statements and made mino…
adamcesco Apr 14, 2022
4827773
now printing three articles that contain the searched word
adamcesco Apr 14, 2022
f7d31a8
minor optimizations
adamcesco Apr 14, 2022
9f6f6d7
minor optimizations
adamcesco Apr 14, 2022
2d40bd3
a combination of time saving changes that accumulates a second off of…
adamcesco Apr 15, 2022
5ffd14a
more testing
adamcesco Apr 15, 2022
7203097
Implemented stemming word to be searched for
adamcesco Apr 15, 2022
8efcd84
minor changes to avl_tree class
adamcesco Apr 15, 2022
f7f6ac2
defined and tested assignment operator overload and copy constructor …
adamcesco Apr 15, 2022
f4cb1f2
started tested avl_tree node deletion. Minor changes to hashmap class
adamcesco Apr 15, 2022
c20a2d0
started testing avl_tree deletion more thoroughly. Found bug
adamcesco Apr 15, 2022
98dadb1
started implementing a new algorithm for avl_tree deletion
adamcesco Apr 16, 2022
daf94fb
introduced some minor time saving techniques and implemented inverse-…
adamcesco Apr 17, 2022
3d1a257
fully implemented inverse stemming
adamcesco Apr 17, 2022
fb8d9f7
fully implemented inverse stemming
adamcesco Apr 17, 2022
4964aed
added support for words not found within the hashed-inverse-stemmed.txt
adamcesco Apr 17, 2022
f5e3059
updated avl_tree deletion
adamcesco Apr 19, 2022
af3247f
updated avl_tree deletion tests
adamcesco Apr 19, 2022
ce4bcc5
updated avl_tree deletion method to be 100% completed
adamcesco Apr 19, 2022
5fc4b9a
optimized avl tree deletion
adamcesco Apr 19, 2022
6c63445
changes parameters of methods used in avl_tree node deletion to allow…
adamcesco Apr 19, 2022
25832f2
Saving file directory string addresses and now ready for speed demo c…
adamcesco Apr 19, 2022
4a18a51
minor optimizations
adamcesco Apr 19, 2022
01a0813
successfully implemented avl_tree file saving and building
adamcesco Apr 20, 2022
d6c06e9
added documentation and increased readability
adamcesco Apr 20, 2022
75d8762
completed avl tree caching for tbb pointer based trees
adamcesco Apr 20, 2022
2f1b0ec
Reorder args and prevent fence problem for cache generation
drew-harris Apr 21, 2022
be0ea85
Return reference in avl_tree clear()
drew-harris Apr 23, 2022
cefa578
Add word score
drew-harris Apr 23, 2022
7e87f56
Save to archive
drew-harris Apr 23, 2022
d7e3f15
Merge remote-tracking branch 'origin/main' into main
adamcesco Apr 23, 2022
73c7af9
successfully implemented avl tree caching for the new avl tree
adamcesco Apr 24, 2022
87a0847
successfully implemented avl tree caching for the new avl tree
adamcesco Apr 24, 2022
b200e93
cleaned and removed unnecessary caching methods from classes
adamcesco Apr 24, 2022
58b5e36
removed avl_tree_io.h and transferred the io based methods into avl_t…
adamcesco Apr 24, 2022
6ee7743
Merge pull request #15
adamcesco Apr 24, 2022
e64b63a
successfully implemented caching for TableBundle struct and Articles
adamcesco Apr 26, 2022
75e16d0
edited json parsing to correctly include all organizations
adamcesco Apr 26, 2022
b237dbf
edited json parsing to correctly include all organizations. Removed u…
adamcesco Apr 27, 2022
89dd62d
Added a minor pipeline that all author names and organization names g…
adamcesco Apr 27, 2022
b9dc0ad
upgraded hash_table class to include the key data members
adamcesco Apr 27, 2022
6ccc3bf
set up framework of console interface
adamcesco Apr 27, 2022
ce56913
completed console interface
adamcesco Apr 27, 2022
de32741
completed console interface and added minor visual aids
adamcesco Apr 27, 2022
fa9a2d7
setting up the basis of a newer improved hash_table class
adamcesco Apr 27, 2022
6d921cd
improved speed of hashtable via load factor computation
adamcesco Apr 27, 2022
9b35295
added more caching features to console interface
adamcesco Apr 27, 2022
98caab4
Build query with singleword
drew-harris Apr 28, 2022
a223523
Working single word query tree
drew-harris Apr 28, 2022
a3b91da
Merge remote-tracking branch 'origin/main' into main
adamcesco Apr 28, 2022
5cf2d94
AND working
drew-harris Apr 28, 2022
6aa6ec1
Merge branch 'main' of github.com:/SMUCSE2341/22s-final-project-fair-…
drew-harris Apr 28, 2022
2e6f349
"NOT" working
drew-harris Apr 28, 2022
eafbeea
Convert and sort results into articles and display titles
drew-harris Apr 28, 2022
1a2b839
Remove mutex from query builder
drew-harris Apr 28, 2022
781471e
NEED to FIX ClearPropnoun pt1
drew-harris Apr 29, 2022
4ceab7f
NEED to FIX ClearPropnoun pt2
drew-harris Apr 29, 2022
0233c24
Merge pull request #17 from SMUCSE2341/query
drew-harris Apr 29, 2022
e683cf5
cleaned includes
adamcesco Apr 29, 2022
7a9a58b
cleaned includes
adamcesco Apr 29, 2022
d367786
fixed build multiple definition error
adamcesco Apr 29, 2022
2b8721a
Create run script
drew-harris Apr 30, 2022
919406d
Completed extra statistics pt.1
drew-harris Apr 30, 2022
b5adb90
Completed extra statistics pt.2
drew-harris Apr 30, 2022
ad1f32d
formatted console interface to be cleaner
adamcesco Apr 30, 2022
3b8a8fa
cleaned progress bar code by creating a templated struct ProgressBar.h
adamcesco May 1, 2022
c192023
cleaned progress bar code by creating a templated struct ProgressBar.h
adamcesco May 1, 2022
be055d0
added support for counting organizations and people while building fr…
adamcesco May 1, 2022
47d2754
changed console output logic and format
adamcesco May 1, 2022
5e944ba
cleaned code
adamcesco May 1, 2022
410c577
removed all valgrind messages
adamcesco May 1, 2022
42197ef
cleaned code
adamcesco May 1, 2022
58521fa
Merge pull request #18
adamcesco May 1, 2022
47cd66f
moved code to make the repo more visually organized
adamcesco May 1, 2022
dab8d29
cleaned code, improved OOP, and added another feature to SearchEngine…
adamcesco May 2, 2022
3749a88
fixed bug in input checker
adamcesco May 2, 2022
39b8fd7
updated CMake and filesystem
adamcesco May 2, 2022
fb37389
Convert to our own hash_table
drew-harris May 2, 2022
49ea6f3
RESET
drew-harris May 3, 2022
02f2203
parsing persons and all article now contains persons
adamcesco May 3, 2022
d6d1230
added documentation
adamcesco May 3, 2022
7d1e0f0
Fix person query
drew-harris May 3, 2022
3e99e94
updated article struct to archive people
adamcesco May 3, 2022
6ab6161
updated documentation and removed bug generated during parsing and bu…
adamcesco May 3, 2022
2750aea
added and edited documentation, fixed runtime bug, and cleaned consol…
adamcesco May 3, 2022
a5cc7a6
changed console output and edited code to follow good practice
adamcesco May 6, 2022
cda4d8e
cleaned code
adamcesco May 6, 2022
0c1ac8e
cleaned catch tests for avl-tree
adamcesco May 6, 2022
578d566
cleaned code
adamcesco May 6, 2022
6f3e82b
updated README.md
adamcesco May 6, 2022
1b75d31
removed need for useless command line argument
adamcesco May 16, 2022
e13b808
updated README.md and readability
adamcesco May 16, 2022
995365d
Update README.md
adamcesco May 16, 2022
70a185d
Update README.md
adamcesco May 17, 2022
bb35eaf
Update README.md
adamcesco May 18, 2022
4c18b6d
Update README.md
adamcesco May 18, 2022
b233138
Update README.md
adamcesco May 18, 2022
1c7866c
Update README.md
adamcesco May 18, 2022
dc852c1
Update README.md
adamcesco May 18, 2022
77a30b7
Update README.md
adamcesco May 18, 2022
bed6b06
Update README.md
adamcesco May 18, 2022
356cfde
Update README.md
adamcesco May 18, 2022
be914e6
Update README.md
adamcesco May 18, 2022
5eaf38a
Update README.md
adamcesco May 18, 2022
c866f93
Update README.md
adamcesco May 18, 2022
1cf8815
Update README.md
adamcesco May 18, 2022
e8acef8
Update README.md
adamcesco May 18, 2022
b6f2bbc
Update README.md
adamcesco May 18, 2022
9692785
all words within a query are now cleaned via pipeline::cleanStr()
adamcesco May 18, 2022
bfb6052
Update README.md
adamcesco May 18, 2022
8da2c48
Update README.md
adamcesco May 18, 2022
0ff3f88
Update README.md
adamcesco Jun 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
data/
data/articles
data/*
cmake-build-debug/
cmake-build-release/
.idea/
.idea/
!data/small_articles
tree-cache.txt
cmake-build-*
/art-cache.txt
/author-cache.txt
/org-cache.txt
/article-cache.txt
/build
build
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "external/tbb"]
path = external/tbb
url = git@github.com:wjakob/tbb.git
37 changes: 34 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,37 @@
cmake_minimum_required(VERSION 3.20)
cmake_minimum_required(VERSION 3.12)
project(22s_final_proj)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -pthread")
link_libraries(stdc++fs)

add_executable(22s_final_proj main.cpp catch_setup.cpp)
#TBB
set(EXTERNAL_INSTALL_DIR ${CMAKE_BINARY_DIR}/external)
link_directories(${EXTERNAL_INSTALL_DIR}/lib)
include_directories(${EXTERNAL_INSTALL_DIR}/include)

#TBB
link_libraries(tbb)

add_executable(22s_final_proj main.cpp CatchTestUtils/catch_setup.cpp hash_table/hash_table.h hash_table/hash_ordered_map_tests.cpp SearchEngine/SearchEngine.cpp SearchEngine/SearchEngine.h Processor/Processor.cpp utilities/StopWords.h utilities/Article.h utilities/Pipelines.h avl_tree/avl_tests.cpp external/porter2_stemmer/porter2_stemmer.cpp external/porter2_stemmer/porter2_stemmer.h external/porter2_stemmer/util/hash.h external/porter2_stemmer/util/string_view.h utilities/typedefs.h QueryBuilder/QueryBuilder.cpp QueryBuilder/QueryBuilder.h utilities/Pipelines.cpp utilities/ProgressBar.h avl_tree/avl_friends.cpp)

######################################################################
# Build rules for tbb
######################################################################

include(ExternalProject)
ExternalProject_Add(
tbb
GIT_REPOSITORY ${CMAKE_SOURCE_DIR}/external/tbb
GIT_TAG master
INSTALL_DIR ${EXTERNAL_INSTALL_DIR}
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
-DTBB_BUILD_SHARED=ON
-DTBB_BUILD_STATIC=OFF
-DTBB_BUILD_TBBMALLOC=OFF
-DTBB_BUILD_TBBMALLOC_PROXY=OFF
-DTBB_BUILD_TESTS=OFF
-DTBB_CI_BUILD=OFF
)
add_dependencies(22s_final_proj tbb)
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading