diff --git a/mucc-logo.png b/mucc-logo.png new file mode 100644 index 0000000..fd8ffd5 Binary files /dev/null and b/mucc-logo.png differ diff --git a/src/app/Controller.java b/src/app/Controller.java index 47dedf3..1b8bda6 100644 --- a/src/app/Controller.java +++ b/src/app/Controller.java @@ -1,13 +1,15 @@ package app; +import app.routines.DeleteDuplicates; +import app.routines.FilePool; +import app.routines.RetrieveSubFiles; import javafx.concurrent.Task; import javafx.fxml.FXML; -import javafx.scene.Node; import javafx.scene.control.*; +import javafx.scene.paint.Color; import javafx.scene.text.Text; -import java.awt.event.ActionEvent; import java.io.File; import java.nio.file.Files; import java.nio.file.Path; @@ -23,9 +25,14 @@ import javafx.stage.Window; */ public class Controller { - Map doubles; + private Map doubles; + private Map dupblicate_base_pool; + private Map pdf_base_pool; + // Delete Duplicates + // ---------------------------------------------------------------------------------------------------------------- + @FXML protected Text loadDirState; @@ -42,17 +49,16 @@ public class Controller { protected Text delDuplicateState; @FXML - protected Text fileNr; + protected Text fileNrCount; @FXML - protected Text doubleNr; + protected Text doubleNrCount; @FXML protected TextField directoryField; - @FXML - protected void openDir(){ + protected void openDir() { Window stage = loadDirState.getScene().getWindow(); @@ -60,58 +66,58 @@ public class Controller { directoryChooser.setTitle("Select Directory."); directoryChooser.setInitialDirectory(new File(System.getProperty("user.home"))); - TextArea textArea = new TextArea(); - textArea.setMinHeight(70); - File dir = directoryChooser.showDialog(stage); - if (dir != null){ + if (dir != null) { directoryField.setText(dir.getAbsolutePath()); - }else{ - //textArea.setText(null); } } - @FXML - protected void loadDir() { + protected void loadDuplicateDir() { Task loadDirTask = new Task() { @Override public Void call() { - loadDirState.setText(""); - calcMd5State.setText(""); - sortFileState.setText(""); - findDuplicateState.setText(""); - delDuplicateState.setText(""); - fileNr.setText("Number of Files:"); - doubleNr.setText("Number of Duplicates:"); + loadDirState.setText("__"); + calcMd5State.setText("__"); + sortFileState.setText("__"); + findDuplicateState.setText("__"); + delDuplicateState.setText("__"); + fileNrCount.setText("__"); + doubleNrCount.setText("__"); + Path path = Paths.get(directoryField.getText()); if (!Files.isDirectory(path)) { + loadDirState.setFill(Color.RED); loadDirState.setText("ERROR."); } else { - Map pool = new Routines().loadPool(directoryField.getText(), "file"); + Map pool = new FilePool().loadPool(directoryField.getText(), "file"); new Write().textPool("pool", pool); + loadDirState.setFill(Color.GREEN); loadDirState.setText("OK."); - fileNr.setText("Number of Files: " + pool.size()); + fileNrCount.setText("" + pool.size()); - Map md5Pool = new Routines().md5Pool(pool); + Map md5Pool = new DeleteDuplicates().md5Pool(pool); new Write().textMd5Pool("md5Pool", md5Pool); + calcMd5State.setFill(Color.GREEN); calcMd5State.setText("OK."); Map qsMd5Pool = new QuicksortMd5().quicksortMd5(md5Pool); new Write().textMd5Pool("qsMd5Pool", qsMd5Pool); + sortFileState.setFill(Color.GREEN); sortFileState.setText("OK."); - doubles = new Routines().doubles(qsMd5Pool); + doubles = new DeleteDuplicates().doubles(qsMd5Pool); new Write().textMd5Pool("doubles", doubles); + findDuplicateState.setFill(Color.GREEN); findDuplicateState.setText("OK."); - doubleNr.setText("Number of Duplicates: " + doubles.size()); + doubleNrCount.setText("" + doubles.size()); } return null; @@ -120,17 +126,18 @@ public class Controller { new Thread(loadDirTask).start(); } + @FXML protected void deleteDuplicates() { Task delDuplicateTask = new Task() { @Override - public Void call() { + public Void call() { for (int i = 0; i < doubles.size(); i++) { new Execute().execute(new String[]{"rm", doubles.get(i).file.getAbsolutePath()}); - } + delDuplicateState.setFill(Color.GREEN); delDuplicateState.setText("OK."); return null; } @@ -138,5 +145,70 @@ public class Controller { new Thread(delDuplicateTask).start(); } + @FXML + protected void loadBaseFiles() { + } + + + // Retrieve Sub-Files + // ---------------------------------------------------------------------------------------------------------------- + + @FXML + protected Text loadPdfState; + + @FXML + protected Text splitPdfState; + + @FXML + protected Text baseFileCount; + + @FXML + protected Text subFileCount; + + @FXML + protected void loadBaseDir() { + Task loadDirTask = new Task() { + @Override + public Void call() { + + loadPdfState.setText("__"); + splitPdfState.setText("__"); + baseFileCount.setText("__"); + subFileCount.setText("__"); + + Path path = Paths.get(directoryField.getText()); + + if (!Files.isDirectory(path)) { + loadPdfState.setFill(Color.RED); + loadPdfState.setText("ERROR."); + } else { + + pdf_base_pool = new FilePool().loadPool(directoryField.getText(), "file"); + loadPdfState.setFill(Color.GREEN); + loadPdfState.setText("OK."); + baseFileCount.setText("" + pdf_base_pool.size()); + } + return null; + } + }; + new Thread(loadDirTask).start(); + } + + @FXML + protected void splitPdf() { + + Task splitPdfTask = new Task() { + @Override + public Void call() { + + int list_size = new RetrieveSubFiles().pdf_method(pdf_base_pool); + splitPdfState.setFill(Color.GREEN); + splitPdfState.setText("OK."); + subFileCount.setText("" + list_size); + return null; + } + }; + new Thread(splitPdfTask).start(); + } } diff --git a/src/app/Tools.java b/src/app/Tools.java index bde8c67..32e085b 100644 --- a/src/app/Tools.java +++ b/src/app/Tools.java @@ -2,8 +2,10 @@ package app; import java.io.BufferedReader; import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -class Tools { +public class Tools { private final Execute x; @@ -27,4 +29,33 @@ class Tools { } return md5; } + + + public String brToString(BufferedReader br) { + StringBuilder sb = new StringBuilder(); + Object[] br_array = br.lines().toArray(); + for (int i = 0; i < br_array.length; i++) { + sb.append(br_array[i].toString() + "\n"); + } + return sb.toString(); + } + + /** + * + * @param input input String + * @param regex pattern String + * @return matches for pattern, separated by \n + */ + public String matchRegEx(String input, String regex){ + + Pattern pattern = Pattern.compile(regex); + Matcher m = pattern.matcher(input); + StringBuilder sb = new StringBuilder(); + while (m.find()){ + // + sb.append(m.group()+"\n"); + } + + return sb.toString(); + } } diff --git a/src/app/layout.fxml b/src/app/layout.fxml index 81708b1..c4d85ae 100644 --- a/src/app/layout.fxml +++ b/src/app/layout.fxml @@ -9,132 +9,169 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + diff --git a/src/app/Routines.java b/src/app/routines/DeleteDuplicates.java similarity index 57% rename from src/app/Routines.java rename to src/app/routines/DeleteDuplicates.java index 77a2c50..77fdc83 100644 --- a/src/app/Routines.java +++ b/src/app/routines/DeleteDuplicates.java @@ -1,4 +1,4 @@ -package app; +package app.routines; import java.io.File; import java.io.IOException; @@ -8,50 +8,21 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import app.Artifacts; import app.Artifacts.MFile; +import app.Execute; -class Routines { +public class DeleteDuplicates { private final Execute x; - public Routines() { + public DeleteDuplicates() { this.x = new Execute(); } - /** - * [1] Write output of find srcdir to /tmp/find .
- * [2] Read /tmp/find into List< String /> .
- * [3] Add List< String /> entries to Map>String,File> , where - * String is an int key.
- * - * @param srcdir String - * @param type String "file" OR "dir" , pick what type will be loaded. - * @return filepool - */ - public Map loadPool(String srcdir, String type) { - // [1] - x.execute(new String[]{System.getProperty("user.dir") + "/src/app/toFile.sh", "find", srcdir, "/tmp/find"}); - // [2] - List lines = null; - try { - lines = Files.readAllLines(Paths.get("/tmp/find")); - } catch (IOException e) { - e.printStackTrace(); - } - // [3] - Map filepool = new HashMap<>(); - int j = 0; - for (String line : lines) { - File file = new File(line); - if (type == "dir" && file.isDirectory() || type == "file" && file.isFile()) { - filepool.put(j, file); - j++; - } - } - return filepool; - } + /** * Calculate md5 for each file in pool . diff --git a/src/app/routines/FilePool.java b/src/app/routines/FilePool.java new file mode 100644 index 0000000..c00f99b --- /dev/null +++ b/src/app/routines/FilePool.java @@ -0,0 +1,49 @@ +package app.routines; + +import app.Execute; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class FilePool { + + private final Execute x = new Execute(); + + /** + * [1] Write output of find srcdir to /tmp/find .
+ * [2] Read /tmp/find into List< String /> .
+ * [3] Add List< String /> entries to Map>String,File> , where + * String is an int key.
+ * + * @param srcdir String + * @param type String "file" OR "dir" , pick what type will be loaded. + * @return filepool + */ + public Map loadPool(String srcdir, String type) { + // [1] + x.execute(new String[]{System.getProperty("user.dir") + "/src/app/shell/toFile.sh", "find", srcdir, "/tmp/find"}); + // [2] + List lines = null; + try { + lines = Files.readAllLines(Paths.get("/tmp/find")); + } catch (IOException e) { + e.printStackTrace(); + } + // [3] + Map filepool = new HashMap<>(); + int j = 0; + for (String line : lines) { + File file = new File(line); + if (type == "dir" && file.isDirectory() || type == "file" && file.isFile()) { + filepool.put(j, file); + j++; + } + } + return filepool; + } +} diff --git a/src/app/routines/RetrieveSubFiles.java b/src/app/routines/RetrieveSubFiles.java new file mode 100644 index 0000000..2483e78 --- /dev/null +++ b/src/app/routines/RetrieveSubFiles.java @@ -0,0 +1,114 @@ +package app.routines; + +import app.Execute; +import app.Tools; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class RetrieveSubFiles { + + private final Execute x; + private final Tools tools; + + + public RetrieveSubFiles() { + this.x = new Execute(); + this.tools = new Tools(); + } + + + + + + /** + * @param pool Map< Integer, File /> , a map containing files. + * @return create subfiles on disk. MFile /> of duplicates contained in md5Pool + */ + public int pdf_method(Map pool) { + + int files_created = 0; + + for (int i = 0; i < pool.size(); i++) { + + File file = pool.get(i); + String f_path = file.getAbsolutePath(); + if (f_path.endsWith("pdf")) { + // -n is for adding line numbers + // -a is for accepting binary input + // -T is for preserving tabs, this helps in some special cases. + String[] cmd = new String[]{System.getProperty("user.dir") + "/src/app/shell/pipe.sh", + "cat " + f_path, + "grep -naT %PDF"}; + String pdf = tools.brToString(x.execute(cmd).output); + // because -T was used, we now must use regex to extract the '1234:' tag + String pdf_lines = tools.matchRegEx(pdf, "[0-9]+:"); + + cmd = new String[]{System.getProperty("user.dir") + "/src/app/shell/pipe.sh", + "cat " + f_path, + "grep -naT %%EOF"}; + String eof = tools.brToString(x.execute(cmd).output); + String eof_lines = tools.matchRegEx(eof, "[0-9]+:"); + + // TODO because of PDF tags having 'error char' instad of line nums, the # of PDF tags < # EOF tags + // TODO fix this by maybe making a grep of grep + List pdf_list = pdf_list(pdf_lines, eof_lines); + int adf = 3; + + for (int j = 0; j < pdf_list.size(); j++) { + String sub_f_name = file.getName().split("\\.")[0] + "-sub" + j + ".pdf"; + String[] split = pdf_list.get(j).split(":"); + + cmd = new String[]{System.getProperty("user.dir") + "/src/app/shell/fileCut.sh", + f_path, + split[0], + split[1], + file.getParent() + "/" + sub_f_name}; + x.execute(cmd); + } + + files_created += pdf_list.size(); + // x.execute(new String[]{"rm", f_path}); + + } else { + // Do nothing. File is either not a PDF, + // or contains no usable %PDF and %%EOF sequences. + } + } + return files_created; + } + + + /** + * pdf_matrix contains rows for each subfile as hinted by params:
+ * [ %PDF line ; %%EOF line ; %PDF-version ; %%EOF ] + * + * @param pdf Record of lines containing the %PDF sequence. + * @param eof Record of lines containing the %%EOF sequence. + * @return pdf_matrix, see above. + */ + public List pdf_list(String pdf, String eof) { + List list = new ArrayList(); + + String[] pdf_list = pdf.split("\n"); + String[] eof_list = eof.split("\n"); + + if (pdf_list.length == eof_list.length) { + for (int j = 0; j < pdf_list.length; j++) { + String pdf_tag_line = pdf_list[j].split(":")[0]; + String eof_tag_line = eof_list[j].split(":")[0]; + list.add(pdf_tag_line + ":" + eof_tag_line); + } + } else { + throw new Error("Number of %PDF tags does not match the number %%EOF tags. Skipping this file."); + } + return list; + } + +} diff --git a/src/app/shell/fileCut.sh b/src/app/shell/fileCut.sh new file mode 100755 index 0000000..8fa99a8 --- /dev/null +++ b/src/app/shell/fileCut.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# ================================================================================ +# FILE: fileCut.sh +# +# USAGE: fileCut.sh [input file] [cut start] [cut end] [output file] +# +# DESCRIPTION: Cuts section between 'cut start' and 'cut end' from 'input file' +# and writes the contents of the cut to 'output file'. +# +# ================================================================================ + +input_file=$1 +start_cut=$2 +end_cut=$3 +output_file=$4 +let cut_size=$end_cut-$start_cut + + +head -n $start_cut $input_file | tail -n 1 | sed 's/.*%PDF/%PDF/g' > $output_file + + +head -n $end_cut $input_file | tail -n $cut_size >> $output_file diff --git a/src/app/shell/pipe.sh b/src/app/shell/pipe.sh new file mode 100755 index 0000000..dc6f96c --- /dev/null +++ b/src/app/shell/pipe.sh @@ -0,0 +1,6 @@ +#!/bin/bash +a=$1 +b=$2 +$a | $b + +# this is a pipe \ No newline at end of file diff --git a/src/app/shell/pipe2.sh b/src/app/shell/pipe2.sh new file mode 100755 index 0000000..826e21f --- /dev/null +++ b/src/app/shell/pipe2.sh @@ -0,0 +1,7 @@ +#!/bin/bash +a=$1 +b=$2 +c=$3 +$a | $b | $c + +# this is a double pipe \ No newline at end of file diff --git a/src/app/toFile.sh b/src/app/shell/toFile.sh similarity index 100% rename from src/app/toFile.sh rename to src/app/shell/toFile.sh