diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b038222 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +.git + +.pristine + +.trash + +.recycle + +.backup + +.volumes + +web/ + +docs/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..82f9275 --- /dev/null +++ b/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/.recycle/test/.test.bash.swp b/.recycle/test/.test.bash.swp new file mode 100644 index 0000000..a03d15e Binary files /dev/null and b/.recycle/test/.test.bash.swp differ diff --git a/.recycle/test/test-complex.bash b/.recycle/test/test-complex.bash new file mode 100644 index 0000000..6900f18 --- /dev/null +++ b/.recycle/test/test-complex.bash @@ -0,0 +1,27 @@ +#!/bin/bash + +reset + +clear + +set -e + +set -x + +# Define the API endpoint URL with root and path +API_URL="http://aventador.embanet.online:5000/complex_process" + +# Data to send in JSON format +DATA='{"string": "test", "num1": 10, "num2": 20}' + +# Send POST request using curl with provided data +response=$(curl -s -X POST -H "Content-Type: application/json" -d "$DATA" $API_URL) + +# Check for successful response (exit code 0) and extract the result +if [[ $? -eq 0 ]]; then + result=$(echo $response | jq -r '.result') + echo "API responded successfully with result: $result" +else + echo "Error: API request failed!" + exit 1 +fi diff --git a/.recycle/test/test.bash b/.recycle/test/test.bash new file mode 100644 index 0000000..47d80ad --- /dev/null +++ b/.recycle/test/test.bash @@ -0,0 +1,27 @@ +#!/bin/bash + +reset + +clear + +set -e + +set -x + +# Define the API endpoint URL with root and path +API_URL="http://aventador.embanet.online:5000/process" + +# Data to send in JSON format +DATA='{"string": "test", "num1": 10, "num2": 20}' + +# Send POST request using curl with provided data +response=$(curl -s -X POST -H "Content-Type: application/json" -d "$DATA" $API_URL) + +# Check for successful response (exit code 0) and extract the result +if [[ $? -eq 0 ]]; then + result=$(echo $response | jq -r '.result') + echo "API responded successfully with result: $result" +else + echo "Error: API request failed!" + exit 1 +fi diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..47484a1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3-bookworm + +WORKDIR /app + +COPY . . + +RUN pip install --no-cache-dir -r requirements.txt + +EXPOSE 5000 + +CMD ["flask", "run", "--host=0.0.0.0"] diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..691ebed --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,75 @@ +pipeline { + + agent none + + options { + + disableConcurrentBuilds(abortPrevious: true) + + buildDiscarder(logRotator(numToKeepStr: '1')) + } + + stages { + + stage('docker compose build') { + + agent { + + label "aventador" + + } + + steps { + + dir('.') { + + sh 'docker compose build' + + } + + } + + } + + stage('docker compose push') { + + agent { + + label "aventador" + + } + + steps { + + dir('.') { + + sh 'docker compose push' + + } + + } + + } + + stage('prune') { + + agent { + + label "aventador" + + } + + steps { + + dir('.') { + + sh 'docker system prune -a -f' + + } + + } + + } + +}} + diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 0b8ae76..0000000 --- a/LICENSE +++ /dev/null @@ -1,5 +0,0 @@ -Copyright (C) YEAR by AUTHOR EMAIL - -Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md deleted file mode 100644 index 623bccf..0000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# lenape-jobs-fetcher - diff --git a/app.bash b/app.bash new file mode 100755 index 0000000..26fab04 --- /dev/null +++ b/app.bash @@ -0,0 +1,17 @@ +#!/bin/bash + +##set -e + +##set -x + +reset + +clear + +echo "## launch > flask api" + +pkill flask + +flask run --host=0.0.0.0 + +echo "flask api started..." diff --git a/app.py b/app.py new file mode 100644 index 0000000..326f6dc --- /dev/null +++ b/app.py @@ -0,0 +1,36 @@ +from flask import Flask, request, jsonify +from flask_cors import CORS, cross_origin +import datetime + +import uuid + +from fetchjobs import writeFile, dowork, read + +app = Flask(__name__) + +CORS(app) + +@app.route('/') +def hello(): + + return 'Hello, Universe! With ❤️ from Software Shinobi (www.softwareshinobi.com)' + +@app.route('/jobs/load') +def fetchJobs(): + + thing = dowork() + + print("thing1: ",thing) + + thing2 = writeFile(thing) + + print("thing2: ",thing2) + + return thing + +@app.route('/jobs/') +def ddsd(): + + loaded = read() + + return loaded diff --git a/compose.bash b/compose.bash new file mode 100755 index 0000000..ff7d4f7 --- /dev/null +++ b/compose.bash @@ -0,0 +1,11 @@ +reset + +clear + +set -e + +set -x + +docker compose down --remove-orphans + +docker compose up --build -d diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..194c410 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,33 @@ +services: + + shinobi-jobs-fetcher-server: + + container_name: shinobi-jobs-fetcher-server + + image: softwareshinobi/shinobi-jobs-fetcher-server + + build: + + context: . + + dockerfile: Dockerfile + + ports: + + - 8888:5000 + + shinobi-jobs-fetcher-web: + + container_name: shinobi-jobs-fetcher-web + + image: softwareshinobi/shinobi-jobs-fetcher-web + + build: + + context: web + + dockerfile: Dockerfile + + ports: + + - 8880:80 diff --git a/cover.png b/cover.png new file mode 100644 index 0000000..591606f Binary files /dev/null and b/cover.png differ diff --git a/fetchjobs.py b/fetchjobs.py new file mode 100644 index 0000000..4d6a5dc --- /dev/null +++ b/fetchjobs.py @@ -0,0 +1,73 @@ +import csv + +import json + +from datetime import datetime + +from jobspy import scrape_jobs + +def dowork(): + + jobs = scrape_jobs( + + site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor", "google"], + + search_term="Devops", + + google_search_term="Remote Devops jobs near miami, florida", + + location="miami, fl", + + results_wanted=50, + + hours_old=72, # (only Linkedin/Indeed is hour specific, others round up to days old) + + country_indeed='USA', # only needed for indeed / glassdoor + + ) + + print(f"scraper seearch completed. found {len(jobs)} jobs.") + + print(jobs.head()) + + # Convert DataFrame to JSON string + json_string = jobs.to_json(orient='records') + + print("json",json_string) + + + # jobs.to_csv("shinob-jobs-report.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False, sep='|' ) + + return json_string + +def writeFile( variable_value): + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + filename = f"shinobi-jobs-report.json" + + with open(filename, "w") as f: + + f.write(str(variable_value)) + +def read(): + + filename = "shinobi-jobs-report.json" + + try: + + with open(filename, "r") as f: + + return f.read() + + except FileNotFoundError: + + print(f"Error: File '{filename}' not found.") + + return None + +#thing = dowork() + +#print("thing: ",thing) + +#writeFile(thing) \ No newline at end of file diff --git a/provision.bash b/provision.bash new file mode 100755 index 0000000..ba3ec06 --- /dev/null +++ b/provision.bash @@ -0,0 +1,37 @@ +#!/bin/bash + +#reset + +#clear +#set -e + +#set -x + +sudo apt update + +sudo apt install -y python3-pip + +sudo apt install -y python3-flask + +##sudo apt install -y python3.12-venv + +##python3 -m venv my_venv + +##source my_venv/bin/activate + +pip install -r requirements.txt + +# sudo pip install jobspy --break-system-packages + +## dev notes / we just install it twice. fuck it. i dont care to get cute. + +pip install Flask-CORS + +pip install -U python-jobspy + +#pip[p install -U python-jobspy --break-system-packages ## worked on jacques's garuda box 12/19 + + + + +##pip install flask diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..38580e2 --- /dev/null +++ b/readme.md @@ -0,0 +1,41 @@ +# Shinobi Jobs Report + +This Flask application fetches jobs from various job boards and returns them in JSON format. + + + +**Endpoints** + +* **/jobs/load** - This endpoint scrapes jobs from Indeed, LinkedIn, Zip Recruiter, Glassdoor, and Google for "Devops" positions in the Miami, Florida area and returns them as JSON. +* **/jobs/** - This endpoint returns the most recent jobs data that was fetched. + +**How to Use** + +1. Clone this repository. +2. Install the required libraries using `pip install -r requirements.txt`. +3. Run the application using `flask run`. +4. To fetch new jobs, make a GET request to `/jobs/load`. +5. To access the most recent jobs data, make a GET request to `/jobs/`. + +**Example Usage** + +``` +curl http://localhost:5000/jobs/load +``` + +This will return a JSON object containing the scraped jobs data. + +**Explanation of the Code** + +* The `dowork` function scrapes jobs from the specified job boards using the `jobspy` library. +* The `writeFile` function writes the scraped jobs data to a JSON file. +* The `read` function reads the most recent jobs data from the JSON file. +* The `/jobs/load` endpoint calls the `dowork` function to scrape new jobs and then calls the `writeFile` function to save the data. It then returns the JSON data. +* The `/jobs/` endpoint calls the `read` function to retrieve the most recent jobs data from the JSON file and returns it. + +**Additional Notes** + +* This is a basic example of a Flask application. You can customize it to fit your specific needs. +* The `jobspy` library is not included in this repository. You will need to install it separately using `pip install jobspy`. + +I hope this helps! Let me know if you have any other questions. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..36a284d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +Flask==1.1.2 +Jinja2==2.11.2 +Flask-RESTful==0.3.8 +MarkupSafe==1.1.1 +itsdangerous==1.1.0 +Werkzeug==1.0.1 +Flask-CORS +python-jobspy \ No newline at end of file diff --git a/web/.dockerignore b/web/.dockerignore new file mode 100755 index 0000000..93c32d7 --- /dev/null +++ b/web/.dockerignore @@ -0,0 +1,13 @@ +.git + +.pristine + +.trash + +.recycle + +.backup + +.template + +.calendar diff --git a/web/Dockerfile b/web/Dockerfile new file mode 100755 index 0000000..331f233 --- /dev/null +++ b/web/Dockerfile @@ -0,0 +1,5 @@ +FROM nginx + +WORKDIR /usr/share/nginx/html/ + +COPY . . diff --git a/web/compose.bash b/web/compose.bash new file mode 100755 index 0000000..7f991e8 --- /dev/null +++ b/web/compose.bash @@ -0,0 +1,13 @@ +#!/bin/bash + +reset + +clear + +set -e + +set -x + +docker-compose down + +docker-compose up --build diff --git a/web/compose.yaml b/web/compose.yaml new file mode 100644 index 0000000..82affec --- /dev/null +++ b/web/compose.yaml @@ -0,0 +1,17 @@ +services: + + valorant-digital-workspace-web: + + container_name: valorant-digital-workspace-web + + image: linuxlinape/lenape-jobs-dashboard + + build: + + context: . + + dockerfile: Dockerfile + + ports: + + - 8080:80 diff --git a/web/favicon.ico b/web/favicon.ico new file mode 100755 index 0000000..d9e68c4 Binary files /dev/null and b/web/favicon.ico differ diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000..aa11e02 --- /dev/null +++ b/web/index.html @@ -0,0 +1,81 @@ + + + + + + + + + Recent Jobs from Internet + + + + + + + + + + + +
+ +

Recent Jobs from Internet

+ + + + + + + + + + + + + + + + + + +
source siteCompanyPositionLocationjob typeis remotedescription
+
+ + + + + + + + + + + + + diff --git a/web/js/jobs.js b/web/js/jobs.js new file mode 100644 index 0000000..134e93d --- /dev/null +++ b/web/js/jobs.js @@ -0,0 +1,120 @@ + +$(document).ready(function () { + +fetch(); + +setInterval(fetch, 1000*120); + +}); +function fetch() { + +console.debug(" -> :: fetchLeaderboardDetails()"); + +$.ajax({ + +type: "GET", + +url: "http://localhost:8888"+ "/jobs/", + + dataType: 'json', // <-- Set dataType to 'json' + +//contentType: "text/plain", + +crossDomain: false, + +success: function (data, status, jqXHR) { + +console.log("good"); + +profitleaderboard(data); + +}, + +error: function (error, status) { + + console.log("error fetching leaderboard", error); + +} + +}); + +} +function load() { + +console.debug(" -> :: fetchLeaderboardDetails()"); + +$.ajax({ + +type: "GET", + +url: "http://localhost:8888"+ "/jobs/load", + + dataType: 'json', // <-- Set dataType to 'json' + +//contentType: "text/plain", + +crossDomain: false, + +success: function (data, status, jqXHR) { + +console.log("good"); + +profitleaderboard(data); + +}, + +error: function (error, status) { + + console.log("error fetching leaderboard", error); + +} + +}); + +} + +function profitleaderboard(jobList) { + +console.log("jobList / " ,jobList); + + var html = ''; + +size = jobList.length; + +console.log("size / " + size); + + for (var i =0;i< jobList.length; i++) { + + html += ''; + + html += '' + (i+1) + ''; // Integer (ranking) + html += '' + jobList[i].site + ''; // Integer (ranking) + html += '' + jobList[i].company + ''; // String (username) + + html += ''+jobList[i].title+ ''; + +// html += '' + + ''; // String (username) + + html += '' + jobList[i].location + ''; // String (username) + html += '' + jobList[i].job_type + ''; // String (username) + html += '' + jobList[i].is_remote + ''; // String (username) + + + + html += '' + jobList[i].description + ''; // String (username) + + + + + + + + + + html += ''; + + } + + $('#leaderboard > tbody').html(html); + +}