dev1/backend/server2.js
Josh 6578d3c856
All checks were successful
ci/woodpecker/manual/woodpecker Pipeline was successful
changed server2 user_profile.db to remove sqlite connection
2025-09-13 15:47:08 +00:00

2056 lines
69 KiB
JavaScript
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**************************************************
* server2.js - Reverted to SQLite version
**************************************************/
import express from 'express';
import axios from 'axios';
import helmet from 'helmet';
import dotenv from 'dotenv';
import xlsx from 'xlsx';
import path from 'path';
import { fileURLToPath } from 'url';
import { open } from 'sqlite';
import sqlite3 from 'sqlite3';
import pool from './config/mysqlPool.js'; // exports { query, execute, raw, ... }
import fs from 'fs';
import { readFile } from 'fs/promises'; // <-- add this
import chatFreeEndpoint from "./utils/chatFreeEndpoint.js";
import { OpenAI } from 'openai';
import rateLimit from 'express-rate-limit';
import authenticateUser from './utils/authenticateUser.js';
import { vectorSearch } from "./utils/vectorSearch.js";
import { initEncryption, verifyCanary } from './shared/crypto/encryption.js';
import sgMail from '@sendgrid/mail'; // npm i @sendgrid/mail
import crypto from 'crypto';
import cookieParser from 'cookie-parser';
import { v4 as uuid } from 'uuid';
import Fuse from 'fuse.js';
import { parse as csvParse } from 'csv-parse/sync';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const rootPath = path.resolve(__dirname, '..');
const env = process.env.NODE_ENV?.trim() || 'development';
const envPath = path.resolve(rootPath, `.env.${env}`);
dotenv.config({ path: envPath, override: false }); // don't clobber compose-injected env
const ROOT_DIR = path.resolve(__dirname, '..');
const DATA_DIR = path.join(__dirname, 'data');
const CIP_TO_SOC_PATH = path.join(DATA_DIR, 'CIP_to_ONET_SOC.xlsx');
const INSTITUTION_DATA_PATH = path.join(DATA_DIR, 'Institution_data.json');
const SALARY_DB_PATH = path.join(ROOT_DIR, 'salary_info.db');
const USER_PROFILE_DB_PATH = path.join(ROOT_DIR, 'user_profile.db');
const DB_POOL_SIZE = 6;
const API_BASE = (process.env.APTIVA_INTERNAL_API || 'http://server1:5000').replace(/\/+$/, '');
for (const p of [CIP_TO_SOC_PATH, INSTITUTION_DATA_PATH, SALARY_DB_PATH, USER_PROFILE_DB_PATH]) {
if (!fs.existsSync(p)) {
console.error(`FATAL Required data file not found → ${p}`);
process.exit(1);
}
}
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const chatLimiter = rateLimit({
windowMs: 60 * 60 * 1000,
max: 20,
keyGenerator: req => req.user?.id || req.ip
});
// ── RUNTIME PROTECTION: outbound host allowlist (server2) ──
const OUTBOUND_ALLOW = new Set([
'services.onetcenter.org', // O*NET
'maps.googleapis.com', // Google Distance
'api.openai.com', // Free chat (chatFreeEndpoint)
'server2',
'server1'
]);
// Guard global fetch (Node 20+)
const _fetch = globalThis.fetch;
globalThis.fetch = async (input, init) => {
const u = new URL(typeof input === 'string' ? input : input.url, 'http://local/');
// allow relative/internal URLs (no hostname)
if (!u.hostname || u.hostname === 'local') return _fetch(input, init);
if (!OUTBOUND_ALLOW.has(u.hostname)) throw new Error(`blocked_outbound_host:${u.hostname}`);
return _fetch(input, init);
};
// Guard axios
axios.interceptors.request.use((cfg) => {
try {
const u = cfg.baseURL ? new URL(cfg.url, cfg.baseURL) : new URL(cfg.url, 'http://local/');
if (!u.hostname || u.hostname === 'local') return cfg; // internal/relative
if (!OUTBOUND_ALLOW.has(u.hostname)) return Promise.reject(new Error(`blocked_outbound_host:${u.hostname}`));
} catch { /* leave internal relatives alone */ }
return cfg;
});
// ── helpers ─────────────────────────────────────────────────────────
const normTitle = (s='') =>
String(s)
.toLowerCase()
.replace(/\s*&\s*/g, ' and ')
.replace(/[–—]/g, '-') // en/em dash → hyphen
.replace(/\s+/g, ' ')
.trim();
const stripSoc = (s='') => String(s).split('.')[0]; // "15-1252.00" → "15-1252"
// 1) careers_with_ratings.json
let CAREERS = [];
let careersFuse = null;
try {
const raw = fs.readFileSync(path.join(DATA_DIR, 'careers_with_ratings.json'), 'utf8');
CAREERS = JSON.parse(raw);
careersFuse = new Fuse(CAREERS, {
keys: ['title'],
threshold: 0.3,
ignoreLocation: true,
});
console.log(`[data] careers_with_ratings loaded: ${CAREERS.length}`);
} catch (e) {
console.error('[data] careers_with_ratings load failed:', e.message);
}
const norm = (s='') =>
String(s).toLowerCase().replace(/\s*&\s*/g,' and ').replace(/[–—]/g,'-').replace(/\s+/g,' ').trim();
// 2) CIP institution mapping (line-delimited JSON or array)
let CIPMAP = [];
try {
const cand = ['cip_institution_mapping_new.json','cip_institution_mapping_fixed.json','cip_institution_mapping.json']
.map(f => path.join(DATA_DIR, f))
.find(f => fs.existsSync(f));
if (cand) {
const text = fs.readFileSync(cand, 'utf8').trim();
if (text.startsWith('[')) {
CIPMAP = JSON.parse(text);
} else {
CIPMAP = text.split('\n').map(l => { try { return JSON.parse(l); } catch { return null; } })
.filter(Boolean);
}
console.log(`[data] CIP map loaded: ${CIPMAP.length} rows`);
}
} catch (e) {
console.error('[data] CIP map load failed:', e.message);
}
// 3) IPEDS ic2023_ay.csv -> parse once
let IPEDS = [];
try {
const csv = fs.readFileSync(path.join(DATA_DIR, 'ic2023_ay.csv'), 'utf8');
IPEDS = csvParse(csv, { columns: true, skip_empty_lines: true });
console.log(`[data] IPEDS ic2023_ay loaded: ${IPEDS.length} rows`);
} catch (e) {
console.error('[data] IPEDS load failed:', e.message);
}
// Load institution data (kept for existing routes)
const institutionData = JSON.parse(fs.readFileSync(INSTITUTION_DATA_PATH, 'utf8'));
// ── DEK + canary bootstrap (use raw pool to avoid DAO interception) ──
const sql = pool.raw || pool;
try {
await initEncryption();
await sql.query('SELECT 1');
await verifyCanary(sql);
} catch (e) {
console.error('FATAL during crypto/DB bootstrap:', e?.message || e);
process.exit(1);
}
// Create Express app
const app = express();
const PORT = process.env.SERVER2_PORT || 5001;
app.use(cookieParser());
app.disable('x-powered-by');
app.set('trust proxy', 1);
app.use(express.json({ limit: '1mb' }));
// --- Request ID + minimal audit log for /api/* ---
function getRequestId(req, res) {
const hdr = req.headers['x-request-id'];
if (typeof hdr === 'string' && hdr) return hdr; // from Nginx
const rid = crypto?.randomUUID?.() || `${Date.now().toString(36)}-${Math.random().toString(36).slice(2,8)}`;
res.setHeader('X-Request-ID', rid);
return rid;
}
// ---- RUNTIME: enforce JSON on API writes (with narrow exceptions) ----
const MUST_JSON = new Set(['POST','PUT','PATCH']);
const EXEMPT_PATHS = [
// server3
/^\/api\/premium\/resume\/optimize$/, // multer (multipart/form-data)
/^\/api\/premium\/stripe\/webhook$/, // Stripe (express.raw)
// add others if truly needed
];
app.use((req, res, next) => {
if (!req.path.startsWith('/api/')) return next();
if (isHotReloadPath(req)) return next();
if (!MUST_JSON.has(req.method)) return next();
if (EXEMPT_PATHS.some(rx => rx.test(req.path))) return next();
const ct = req.headers['content-type'] || '';
if (!ct.toLowerCase().includes('application/json')) {
return res.status(415).json({ error: 'unsupported_media_type' });
}
next();
});
// ---- RUNTIME: last-resort error sanitizer ----
app.use((err, req, res, _next) => {
// dont double-send
if (res.headersSent) return;
// map a few known errors cleanly
if (err?.code === 'LIMIT_FILE_SIZE') {
return res.status(413).json({ error: 'file_too_large', limit_mb: 10 });
}
if (err?.message && String(err.message).startsWith('blocked_outbound_host:')) {
return res.status(400).json({ error: 'blocked_outbound_host' });
}
if (err?.message === 'unsupported_type') {
return res.status(415).json({ error: 'unsupported_type' });
}
// default: generic 500 without internals
console.error('[unhandled]', err?.message || err); // logs to stderr only
return res.status(500).json({ error: 'Server error' });
});
app.use((req, res, next) => {
if (!req.path.startsWith('/api/')) return next();
const rid = getRequestId(req, res);
const t0 = process.hrtime.bigint();
res.on('finish', () => {
const durMs = Number((process.hrtime.bigint() - t0) / 1_000_000n);
const out = {
ts: new Date().toISOString(),
rid,
ip: req.ip || req.headers['x-forwarded-for'] || '',
method: req.method,
path: req.path,
status: res.statusCode,
dur_ms: durMs,
bytes_sent: Number(res.getHeader('Content-Length') || 0),
userId: req.userId || req.id || null
};
try { console.log(JSON.stringify(out)); } catch {}
});
next();
});
// ---- RUNTIME: minimal audit logging (API only, redacted) ----
function pickIp(req) {
// trust proxy already set in your apps
return req.ip || req.headers['x-forwarded-for'] || req.socket?.remoteAddress || '';
}
function redactHeaders(h) {
const out = { ...h };
delete out.authorization;
delete out.cookie;
delete out['x-forwarded-for'];
return out;
}
function sampleBody(b) {
if (!b || typeof b !== 'object') return undefined;
// avoid logging PII: show keys + small snippet
const keys = Object.keys(b);
const preview = {};
for (const k of keys.slice(0, 12)) {
const v = b[k];
preview[k] = typeof v === 'string' ? (v.length > 80 ? v.slice(0, 80) + '…' : v) : (Array.isArray(v) ? `[array:${v.length}]` : typeof v);
}
return preview;
}
app.use((req, res, next) => {
if (!req.path.startsWith('/api/')) return next();
// correlation id
const rid = req.headers['x-request-id'] || crypto.randomUUID?.() || String(Date.now());
res.setHeader('X-Request-ID', rid);
const t0 = process.hrtime.bigint();
// capture minimal request data
const reqLog = {
ts: new Date().toISOString(),
rid,
ip: pickIp(req),
method: req.method,
path: req.path,
userId: req.userId || req.id || null, // populated by your auth middleware on many routes
ua: req.headers['user-agent'] || '',
hdr: redactHeaders(req.headers),
body: sampleBody(req.body)
};
res.on('finish', () => {
const durMs = Number((process.hrtime.bigint() - t0) / 1_000_000n);
const out = {
...reqLog,
status: res.statusCode,
dur_ms: durMs,
bytes_sent: Number(res.getHeader('Content-Length') || 0)
};
// one line JSON per request
try { console.log(JSON.stringify(out)); } catch {}
});
next();
});
// ---- RUNTIME: never cache API responses ----
app.use((req, res, next) => {
if (req.path.startsWith('/api/')) {
res.set('Cache-Control', 'no-store');
res.set('Pragma', 'no-cache');
res.set('Expires', '0');
}
next();
});
process.on('unhandledRejection', (e) => console.error('[unhandledRejection]', e));
process.on('uncaughtException', (e) => console.error('[uncaughtException]', e));
// ---- RUNTIME PROTECTION: HPP guard (dedupe + cap arrays) ----
app.use((req, _res, next) => {
// Bypass guard on hot reload routes to avoid slicing/false negatives
if (isHotReloadPath(req)) return next();
const MAX_ARRAY = 20; // keep stricter cap elsewhere
const sanitize = (obj) => {
if (!obj || typeof obj !== 'object') return;
for (const k of Object.keys(obj)) {
const v = obj[k];
if (Array.isArray(v)) {
obj[k] = v.slice(0, MAX_ARRAY).filter(x => x !== '' && x != null);
if (obj[k].length === 1) obj[k] = obj[k][0];
}
}
};
sanitize(req.query);
sanitize(req.body);
next();
});
// ---- RUNTIME: reject request bodies on GET/HEAD ----
app.use((req, res, next) => {
if (isHotReloadPath(req)) return next();
if ((req.method === 'GET' || req.method === 'HEAD') && Number(req.headers['content-length'] || 0) > 0) {
return res.status(400).json({ error: 'no_body_allowed' });
}
next();
});
function fprPathFromEnv() {
const p = (process.env.DEK_PATH || '').trim();
return p ? path.join(path.dirname(p), 'dek.fpr') : null;
}
// 1) Liveness: process up
app.get('/livez', (_req, res) => res.type('text').send('OK'));
// 2) Readiness: DEK + canary OK
app.get('/readyz', async (_req, res) => {
try {
await initEncryption();
await verifyCanary(sql); // <-- use raw pool
return res.type('text').send('OK');
} catch (e) {
console.error('[READYZ]', e.message);
return res.status(500).type('text').send('FAIL');
}
});
const isHotReloadPath = (req) => {
if (!req || !req.path) return false;
if (req.path.startsWith('/api/onet/')) return true;
if (req.path.startsWith('/api/cip/')) return true;
if (req.path.startsWith('/api/projections/')) return true;
if (req.path.startsWith('/api/salary')) return true;
if (req.method === 'POST' && req.path === '/api/job-zones') return true;
return false;
};
// 3) Health: detailed JSON you can curl
app.get('/healthz', async (_req, res) => {
const out = {
service: process.env.npm_package_name || 'server2',
version: process.env.IMG_TAG || null,
uptime_s: Math.floor(process.uptime()),
now: new Date().toISOString(),
checks: {
live: { ok: true },
crypto: { ok: false, fp: null },
db: { ok: false, ping_ms: null },
canary: { ok: false }
}
};
// crypto / DEK
try {
await initEncryption();
out.checks.crypto.ok = true;
const p = fprPathFromEnv();
if (p) {
try { out.checks.crypto.fp = (await readFile(p, 'utf8')).trim(); } catch {}
}
} catch (e) {
out.checks.crypto.error = e.message;
}
// DB ping
const t0 = Date.now();
try {
await sql.query('SELECT 1'); // <-- use raw pool
out.checks.db.ok = true;
out.checks.db.ping_ms = Date.now() - t0;
} catch (e) {
out.checks.db.error = e.message;
}
// canary
try {
await verifyCanary(sql); // <-- use raw pool
out.checks.canary.ok = true;
} catch (e) {
out.checks.canary.error = e.message;
}
const ready = out.checks.crypto.ok && out.checks.db.ok && out.checks.canary.ok;
return res.status(ready ? 200 : 503).json(out);
});
// ── Support mail config (quotesafe) ────────────────────────────────
const SENDGRID_KEY = (process.env.SUPPORT_SENDGRID_API_KEY || '')
.trim()
.replace(/^['"]+|['"]+$/g, ''); // strip leading/trailing quotes if GCP injects them
if (SENDGRID_KEY) {
sgMail.setApiKey(SENDGRID_KEY);
} else {
console.warn('[support] SUPPORT_SENDGRID_API_KEY missing/empty; support email disabled');
}
// Small inmemory dedupe: (userId|subject|message) hash → expiresAt
const supportDedupe = new Map();
const DEDUPE_TTL_MS = 10 * 60 * 1000; // 10 minutes
function normalize(s = '') {
return String(s).toLowerCase().replace(/\s+/g, ' ').trim();
}
function makeKey(userId, subject, message) {
const h = crypto.createHash('sha256');
h.update(`${userId}|${normalize(subject)}|${normalize(message)}`);
return h.digest('hex');
}
function isDuplicateAndRemember(key) {
const now = Date.now();
// prune expired
for (const [k, exp] of supportDedupe.entries()) {
if (exp <= now) supportDedupe.delete(k);
}
if (supportDedupe.has(key)) return true;
supportDedupe.set(key, now + DEDUPE_TTL_MS);
return false;
}
const supportBurstLimiter = rateLimit({
windowMs: 30 * 1000, // 1 every 30s
max: 1,
standardHeaders: true,
legacyHeaders: false,
keyGenerator: (req) => req.user?.id || req.ip
});
const supportDailyLimiter = rateLimit({
windowMs: 24 * 60 * 60 * 1000, // per day
max: 3, // at most 3 per day
standardHeaders: true,
legacyHeaders: false,
keyGenerator: (req) => req.user?.id || req.ip
});
/**************************************************
* DB connections (SQLite)
**************************************************/
let dbSqlite;
// Salary fast path: prepared stmt + tiny LRU cache
let SALARY_STMT = null;
const SALARY_CACHE = new Map(); // key: `${occ}|${area}` -> result
const SALARY_CACHE_MAX = 512;
let userProfileDb;
async function initDatabases() {
try {
// Open salary DB as an immutable, read-only URI so SQLite never touches -wal/-shm/-journal.
// Requires OPEN_URI flag in addition to OPEN_READONLY.
const SALARY_DB_URI = `file:${SALARY_DB_PATH}?immutable=1&mode=ro`;
dbSqlite = await open({
filename: SALARY_DB_URI,
driver : sqlite3.Database,
mode : sqlite3.OPEN_READONLY | sqlite3.OPEN_URI
});
console.log('✅ Connected to salary_info.db (immutable, ro)');
// Read-only safe PRAGMAs (no write state). journal_mode/synchronous removed.
await dbSqlite.exec(`
PRAGMA busy_timeout=4000;
PRAGMA temp_store=MEMORY;
`);
// One prepared statement: regional (param) + national in a single scan
SALARY_STMT = await dbSqlite.prepare(`
SELECT
MAX(CASE WHEN AREA_TITLE = ? THEN A_PCT10 END) AS regional_PCT10,
MAX(CASE WHEN AREA_TITLE = ? THEN A_PCT25 END) AS regional_PCT25,
MAX(CASE WHEN AREA_TITLE = ? THEN A_MEDIAN END) AS regional_MEDIAN,
MAX(CASE WHEN AREA_TITLE = ? THEN A_PCT75 END) AS regional_PCT75,
MAX(CASE WHEN AREA_TITLE = ? THEN A_PCT90 END) AS regional_PCT90,
MAX(CASE WHEN AREA_TITLE = 'U.S.' THEN A_PCT10 END) AS national_PCT10,
MAX(CASE WHEN AREA_TITLE = 'U.S.' THEN A_PCT25 END) AS national_PCT25,
MAX(CASE WHEN AREA_TITLE = 'U.S.' THEN A_MEDIAN END) AS national_MEDIAN,
MAX(CASE WHEN AREA_TITLE = 'U.S.' THEN A_PCT75 END) AS national_PCT75,
MAX(CASE WHEN AREA_TITLE = 'U.S.' THEN A_PCT90 END) AS national_PCT90
FROM salary_data
WHERE OCC_CODE = ?
AND (AREA_TITLE = ? OR AREA_TITLE = 'U.S.')
`);
// In prod, user_profile is MySQL; do not try to open a SQLite file here.
// Leave dev behavior untouched: only open SQLite if clearly running non-prod.
if (process.env.NODE_ENV !== 'production') {
userProfileDb = await open({
filename: USER_PROFILE_DB_PATH,
driver : sqlite3.Database
});
console.log('✅ Connected to user_profile.db (sqlite, dev)');
} else {
userProfileDb = null;
console.log(' user_profile via MySQL pool (no SQLite open in prod)');
}
} catch (err) {
console.error('❌ DB init failed →', err);
process.exit(1);
}
}
await initDatabases();
/* ──────────────────────────────────────────────────────────────
* SECURITY, CORS, JSON Body
* ────────────────────────────────────────────────────────────── */
/* 1 — Require critical env var up-front */
if (!process.env.CORS_ALLOWED_ORIGINS) {
console.error('FATAL CORS_ALLOWED_ORIGINS is not set');
process.exit(1);
}
/* 2 — Build allow-list from env (comma-separated) */
const allowedOrigins = process.env.CORS_ALLOWED_ORIGINS
.split(',')
.map(o => o.trim())
.filter(Boolean);
/* 3 — Security headers */
app.use(
helmet({
contentSecurityPolicy: false,
crossOriginEmbedderPolicy: false,
})
);
/* 4 — Strict CORS: only exact origins from env */
app.use((req, res, next) => {
const origin = req.headers.origin || '';
// No Origin header (same-origin, curl, server->server) → skip CORS and allow
if (!origin) return next();
// Exact match against env list only (scheme + host), no soft allow
if (!allowedOrigins.includes(origin)) {
return res.status(403).end();
}
res.setHeader('Access-Control-Allow-Origin', origin);
res.setHeader('Access-Control-Allow-Credentials', 'true');
res.setHeader(
'Access-Control-Allow-Headers',
'Authorization, Content-Type, Accept, Origin, X-Requested-With, Access-Control-Allow-Methods'
);
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
if (req.method === 'OPTIONS') return res.status(204).end();
next();
});
/* 5 — JSON parsing & static assets */
app.use(express.json());
/* 6 — No-op pass-through (kept for completeness) */
app.use((req, res, next) => next());
// ──────────────────────────────── Data endpoints ───────────────────────────────
/**************************************************
* BULK limited-data computation (single call)
* - Input: { socCodes: [full SOCs], state, area }
* - Output: { [fullSoc]: { job_zone, limitedData } }
**************************************************/
app.post('/api/suggestions/limited-data', async (req, res) => {
try {
const socCodes = Array.isArray(req.body?.socCodes) ? req.body.socCodes : [];
const stateIn = String(req.body?.state || '');
const area = String(req.body?.area || '');
if (!socCodes.length) return res.json({});
const fullState = fullStateFrom(stateIn);
// Pre-dedupe base SOCs for base-scoped work (projections, salary, job_zone)
const bases = [...new Set(socCodes.map(baseSocOf))];
// Precompute base-scoped booleans/zones
const projMap = new Map();
const salMap = new Map();
const zoneMap = new Map();
await Promise.all(bases.map(async (b) => {
const [hasProj, hasSal, zone] = await Promise.all([
Promise.resolve(projectionsExist(b, fullState)),
salaryExist(b, area),
getJobZone(b)
]);
projMap.set(b, !!hasProj);
salMap.set(b, !!hasSal);
zoneMap.set(b, zone ?? null);
}));
// Build per-full-SOC answers (CIP + O*NET desc presence)
const out = {};
for (const fullSoc of socCodes) {
const base = baseSocOf(fullSoc);
const hasCip = hasCipForFullSoc(fullSoc);
// O*NET presence (from our tiny cache table). If unknown → treat as false for now
// (conservative; first modal open will populate and future runs will be correct).
let hasDesc = false;
let hasTasks = false;
try {
const p = await onetDescPresence(fullSoc);
if (p) { hasDesc = !!p.has_desc; hasTasks = !!p.has_tasks; }
} catch {}
const hasProj = !!projMap.get(base);
const hasSal = !!salMap.get(base);
const job_zone = zoneMap.get(base) ?? null;
const limitedData = !(hasCip && (hasDesc || hasTasks) && hasProj && hasSal);
out[fullSoc] = { job_zone, limitedData };
}
return res.json(out);
} catch (e) {
console.error('[limited-data bulk]', e?.message || e);
return res.status(500).json({ error: 'failed' });
}
});
// /api/data/cip-institution-map → backend/data/cip_institution_mapping_new.json (or fallback)
app.get('/api/data/cip-institution-map', (req, res) => {
const candidates = [
'cip_institution_mapping_new.json',
'cip_institution_mapping_fixed.json',
'cip_institution_mapping.json'
].map(f => path.join(DATA_DIR, f));
const found = candidates.find(f => fs.existsSync(f));
if (!found) {
return res.status(404).json({ error: 'CIP institution map not found', tried: candidates.map(p => path.basename(p)) });
}
res.type('application/json');
res.sendFile(found);
});
// /api/data/ic2023 → backend/data/ic2023_ay.csv
app.get('/api/data/ic2023', (req, res) => {
const p = path.join(DATA_DIR, 'ic2023_ay.csv');
fs.access(p, fs.constants.R_OK, (err) => {
if (err) return res.status(404).json({ error: 'ic2023_ay.csv not found' });
res.type('text/csv');
res.sendFile(p);
});
});
app.get('/api/data/career-clusters', (req, res) => {
const p = path.join(DATA_DIR, 'career_clusters.json');
fs.access(p, fs.constants.R_OK, (err) => {
if (err) return res.status(404).json({ error: 'career_clusters.json not found' });
res.type('application/json');
res.sendFile(p);
});
});
/**************************************************
* Load CIP->SOC mapping
**************************************************/
function loadMapping() {
const wb = xlsx.readFile(CIP_TO_SOC_PATH);
const sheet= wb.Sheets[wb.SheetNames[0]];
return xlsx.utils.sheet_to_json(sheet); // socToCipMapping array
}
const socToCipMapping = loadMapping();
if (socToCipMapping.length === 0) {
console.error('SOC to CIP mapping data is empty.');
}
// O(1) CIP index: FULL SOC -> CIP code (first match wins)
const CIP_BY_SOC = new Map();
for (const row of socToCipMapping) {
const soc = String(row['O*NET-SOC 2019 Code'] || '').trim();
const cip = row['2020 CIP Code'];
if (soc && cip && !CIP_BY_SOC.has(soc)) CIP_BY_SOC.set(soc, cip);
}
/**************************************************
* Load single JSON with all states + US
* Replaces old GA-only approach
**************************************************/
const singleProjFile = path.join(DATA_DIR, 'economicproj.json');
let allProjections = [];
try {
const raw = fs.readFileSync(singleProjFile, 'utf8');
allProjections = JSON.parse(raw);
console.log(`Loaded ${allProjections.length} rows from economicproj.json`);
} catch (err) {
console.error('Error reading economicproj.json:', err);
}
// O(1) projections index: key = `${occ}|${areaLower}`
const PROJ_BY_KEY = new Map();
for (const r of allProjections) {
const occ = String(r['Occupation Code'] || '').trim();
const area = String(r['Area Name'] || '').trim().toLowerCase();
if (!occ || !area) continue;
PROJ_BY_KEY.set(`${occ}|${area}`, {
area : r['Area Name'],
baseYear : r['Base Year'],
base : r['Base'],
projectedYear : r['Projected Year'],
projection : r['Projection'],
change : r['Change'],
percentChange : r['Percent Change'],
annualOpenings: r['Average Annual Openings'],
occupationName: r['Occupation Name'],
});
}
//AI At Risk helpers
async function getRiskAnalysisFromDB(socCode) {
const row = await userProfileDb.get(
`SELECT * FROM ai_risk_analysis WHERE soc_code = ?`,
[socCode]
);
return row || null;
}
/* ──────────────────────────────────────────────────────────────
* Helpers used by /api/onet/submit_answers for limited_data
* - Uses ONLY local sources you already load:
* • socToCipMapping (Excel → in-memory array)
* • allProjections (economicproj.json → in-memory array)
* • dbSqlite (salary_info.db → SQLite handle)
* - No external HTTP calls. No logging of SOCs.
* ────────────────────────────────────────────────────────────── */
const baseSocOf = (fullSoc = '') => String(fullSoc).split('.')[0]; // "15-1252.01" → "15-1252"
const fullStateFrom = (s = '') => {
const M = {
AL:'Alabama', AK:'Alaska', AZ:'Arizona', AR:'Arkansas', CA:'California', CO:'Colorado',
CT:'Connecticut', DE:'Delaware', DC:'District of Columbia', FL:'Florida', GA:'Georgia',
HI:'Hawaii', ID:'Idaho', IL:'Illinois', IN:'Indiana', IA:'Iowa', KS:'Kansas',
KY:'Kentucky', LA:'Louisiana', ME:'Maine', MD:'Maryland', MA:'Massachusetts',
MI:'Michigan', MN:'Minnesota', MS:'Mississippi', MO:'Missouri', MT:'Montana',
NE:'Nebraska', NV:'Nevada', NH:'New Hampshire', NJ:'New Jersey', NM:'New Mexico',
NY:'New York', NC:'North Carolina', ND:'North Dakota', OH:'Ohio', OK:'Oklahoma',
OR:'Oregon', PA:'Pennsylvania', RI:'Rhode Island', SC:'South Carolina',
SD:'South Dakota', TN:'Tennessee', TX:'Texas', UT:'Utah', VT:'Vermont',
VA:'Virginia', WA:'Washington', WV:'West Virginia', WI:'Wisconsin', WY:'Wyoming'
};
if (!s) return '';
const up = String(s).trim().toUpperCase();
return M[up] || s; // already full name → return as-is
};
/** CIP presence from your loaded Excel mapping (exact FULL SOC match) */
const hasCipForFullSoc = (fullSoc = '') => {
const want = String(fullSoc).trim();
for (const row of socToCipMapping) {
if (String(row['O*NET-SOC 2019 Code'] || '').trim() === want) return true;
}
return false;
};
/** Projections presence from economicproj.json
* True if a row exists for base SOC in the requested state (full name) OR in "United States".
*/
function projectionsExist(baseSoc, fullState) {
const code = String(baseSoc).trim();
const wantState = (fullState || '').trim().toLowerCase();
const hasState = wantState
? allProjections.some(r =>
String(r['Occupation Code']).trim() === code &&
String(r['Area Name'] || '').trim().toLowerCase() === wantState
)
: false;
const hasUS = allProjections.some(r =>
String(r['Occupation Code']).trim() === code &&
String(r['Area Name'] || '').trim().toLowerCase() === 'united states'
);
return hasState || hasUS;
}
/** Salary presence from salary_info.db
* True if regional row exists (base SOC + AREA_TITLE == area); otherwise fallback to U.S.
*/
async function salaryExist(baseSoc, area) {
const occ = String(baseSoc).trim();
const a = String(area || '').trim();
if (a) {
const regional = await dbSqlite.get(
`SELECT 1 FROM salary_data WHERE OCC_CODE = ? AND AREA_TITLE = ? LIMIT 1`,
[occ, a]
);
if (regional) return true;
}
const national = await dbSqlite.get(
`SELECT 1 FROM salary_data WHERE OCC_CODE = ? AND AREA_TITLE = 'U.S.' LIMIT 1`,
[occ]
);
return !!national;
}
/** Compute limited_data exactly once for a given career row */
async function computeLimitedFor(fullSoc, stateAbbrevOrName, areaTitle) {
const base = baseSocOf(fullSoc);
const fullSt = fullStateFrom(stateAbbrevOrName) || 'United States';
const hasCip = hasCipForFullSoc(fullSoc);
const hasProj = projectionsExist(base, fullSt);
const hasSal = await salaryExist(base, areaTitle);
return !(hasCip && hasProj && hasSal);
}
// Helper to upsert a row
async function storeRiskAnalysisInDB({
socCode,
careerName,
jobDescription,
tasks,
riskLevel,
reasoning
}) {
// 1) get existing row if any
const existing = await userProfileDb.get(
`SELECT * FROM ai_risk_analysis WHERE soc_code = ?`,
[socCode]
);
let finalJobDesc = jobDescription ?? "";
let finalTasks = tasks ?? "";
// 2) If existing row and the new jobDescription is blank => keep existing
if (existing) {
if (!jobDescription?.trim()) {
finalJobDesc = existing.job_description;
}
if (!tasks?.trim()) {
finalTasks = existing.tasks;
}
}
const sql = `
INSERT OR REPLACE INTO ai_risk_analysis (
soc_code,
career_name,
job_description,
tasks,
risk_level,
reasoning,
created_at
) VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
`;
await userProfileDb.run(sql, [
socCode,
careerName || existing?.career_name || '',
finalJobDesc || '',
finalTasks || '',
riskLevel || existing?.risk_level || '',
reasoning || existing?.reasoning || ''
]);
}
/**************************************************
* O*Net routes, CIP routes, distance routes, etc.
**************************************************/
// O*Net interest questions
app.get('/api/onet/questions', async (req, res) => {
const { start, end } = req.query;
if (!start || !end) {
return res.status(400).json({ error: 'Start and end parameters are required' });
}
try {
const questions = [];
let currentStart = parseInt(start, 10);
let currentEnd = parseInt(end, 10);
while (currentStart <= currentEnd) {
const response = await axios.get(
`https://services.onetcenter.org/ws/mnm/interestprofiler/questions?start=${currentStart}&end=${Math.min(
currentEnd,
currentStart + 11
)}`,
{
auth: {
username: process.env.ONET_USERNAME,
password: process.env.ONET_PASSWORD,
},
}
);
if (response.data.question && Array.isArray(response.data.question)) {
questions.push(...response.data.question);
}
const nextLink = response.data.link?.find((link) => link.rel === 'next');
if (nextLink) {
const nextParams = new URLSearchParams(nextLink.href.split('?')[1]);
currentStart = parseInt(nextParams.get('start'), 10);
currentEnd = parseInt(nextParams.get('end'), 10);
} else {
break;
}
}
res.status(200).json({ questions });
} catch (error) {
console.error('Error fetching O*Net questions:', error.message);
res.status(500).json({ error: 'Failed to fetch O*Net questions' });
}
});
// geocode
async function geocodeZipCode(zipCode) {
const apiKey = process.env.GOOGLE_MAPS_API_KEY;
if (!apiKey) {
console.error('Google Maps API Key missing');
}
try {
const geocodeUrl = `https://maps.googleapis.com/maps/api/geocode/json?address=${encodeURIComponent(
zipCode
)}&components=country:US&key=${apiKey}`;
const response = await axios.get(geocodeUrl);
if (response.data.status === 'OK' && response.data.results.length > 0) {
return response.data.results[0].geometry.location;
} else {
throw new Error('Geocoding failed');
}
} catch (error) {
console.error('Error geocoding ZIP code:', error.message);
return null;
}
}
/** @aiTool {
"name": "getDistanceInMiles",
"description": "Return driving distance and duration between the user ZIP and destination(s)",
"parameters": {
"type": "object",
"properties": {
"userZipcode": { "type": "string" },
"destinations": { "type": "string", "description": "Pipe-separated lat,lng pairs or addresses" }
},
"required": ["userZipcode", "destinations"]
},
"pages": ["EducationalProgramsPage", "LoanRepayment"],
"write": false
} */
// Distance
app.post('/api/maps/distance', async (req, res) => {
const { userZipcode, destinations } = req.body;
if (!userZipcode || !destinations) {
console.error('Missing required parameters:', { userZipcode, destinations });
return res.status(400).json({ error: 'User ZIP code and destination are required.' });
}
try {
const googleMapsApiKey = process.env.GOOGLE_MAPS_API_KEY;
const userLocation = await geocodeZipCode(userZipcode);
if (!userLocation) {
return res.status(400).json({ error: 'Unable to geocode user ZIP code.' });
}
const origins = `${userLocation.lat},${userLocation.lng}`;
const distanceUrl = `https://maps.googleapis.com/maps/api/distancematrix/json?origins=${origins}&destinations=${encodeURIComponent(
destinations
)}&units=imperial&key=${googleMapsApiKey}`;
const distanceResponse = await axios.get(distanceUrl);
if (distanceResponse.data.status !== 'OK') {
return res.status(500).json({ error: 'Error fetching distance from Google Maps API' });
}
const { distance, duration } = distanceResponse.data.rows[0].elements[0];
res.json({ distance: distance.text, duration: duration.text });
} catch (error) {
console.error('Error during distance calculation:', error);
res.status(500).json({ error: 'Internal server error', details: error.message });
}
});
// ONet submission
app.post('/api/onet/submit_answers', async (req, res) => {
console.log('POST /api/onet/submit_answers hit');
const { answers } = req.body;
if (!answers || answers.length !== 60) {
console.error('Invalid answers:', answers);
return res.status(400).json({ error: 'Answers must be 60 chars long.' });
}
try {
const careerUrl = `https://services.onetcenter.org/ws/mnm/interestprofiler/careers?answers=${answers}&start=1&end=1000`;
const resultsUrl = `https://services.onetcenter.org/ws/mnm/interestprofiler/results?answers=${answers}`;
// O*NET calls → Basic Auth only
const careerResponse = await axios.get(careerUrl, {
auth: {
username: process.env.ONET_USERNAME,
password: process.env.ONET_PASSWORD,
},
headers: { Accept: 'application/json' },
});
const resultsResponse = await axios.get(resultsUrl, {
auth: {
username: process.env.ONET_USERNAME,
password: process.env.ONET_PASSWORD,
},
headers: { Accept: 'application/json' },
});
const careerSuggestions = careerResponse.data.career || [];
const riaSecScores = resultsResponse.data.result || [];
const filtered = filterHigherEducationCareers(careerSuggestions);
const riasecCode = convertToRiasecCode(riaSecScores);
// Pass the caller's Bearer straight through to server1 (if present)
const bearer = req.headers.authorization; // e.g. "Bearer eyJ..."
if (bearer) {
try {
await axios.post(
`${API_BASE}/api/user-profile`,
{
interest_inventory_answers: answers,
riasec: riasecCode,
},
{ headers: { Authorization: bearer } }
);
} catch (err) {
console.error(
'Error storing RIASEC in user_profile =>',
err.response?.data || err.message
);
// non-fatal for the O*NET response
}
}
res.status(200).json({
careers: filtered,
riaSecScores,
});
} catch (error) {
console.error('Error in ONet API:', error.response?.data || error.message);
res.status(500).json({
error: 'Failed to fetch data from ONet',
details: error.response?.data || error.message,
});
}
});
function filterHigherEducationCareers(careers) {
return careers
.map((c) => {
const edu = c.education;
if (!['No formal education', 'High school', 'Some college, no degree'].includes(edu)) {
return {
href: c.href,
fit: c.fit,
code: c.code,
title: c.title,
tags: c.tags,
};
}
return null;
})
.filter(Boolean);
}
function convertToRiasecCode(riaSecScores) {
// We assume each item has { area, score, description }
// Sort them by area in R, I, A, S, E, C order or by highest score, whichever you prefer:
// Sort by standard R -> I -> A -> S -> E -> C ordering:
const order = { Realistic: 0, Investigative: 1, Artistic: 2, Social: 3, Enterprising: 4, Conventional: 5 };
// or you can sort by descending score:
// const sorted = [...riaSecScores].sort((a, b) => b.score - a.score);
// For this example, let's do the standard R -> I -> A -> S -> E -> C
const sorted = [...riaSecScores].sort((a, b) => order[a.area] - order[b.area]);
// Now build the 6-letter code
// e.g. "RI" + "A" + ...
// If you want to show tie-breaking or real logic, you can do so
return sorted.map(item => item.area[0].toUpperCase()).join('');
// e.g. "RIASEC"
}
// ONet career details
app.get('/api/onet/career-details/:socCode', async (req, res) => {
const { socCode } = req.params;
if (!socCode) {
return res.status(400).json({ error: 'SOC code is required' });
}
try {
const response = await axios.get(`https://services.onetcenter.org/ws/mnm/careers/${socCode}`, {
auth: {
username: process.env.ONET_USERNAME,
password: process.env.ONET_PASSWORD,
},
headers: { Accept: 'application/json' },
});
res.status(200).json(response.data);
} catch (err) {
console.error('Error fetching career details:', err);
res.status(500).json({ error: 'Failed to fetch career details' });
}
});
// ONet career description
app.get('/api/onet/career-description/:socCode', async (req, res) => {
const { socCode } = req.params;
if (!socCode) {
return res.status(400).json({ error: 'SOC Code is required' });
}
try {
const response = await axios.get(`https://services.onetcenter.org/ws/mnm/careers/${socCode}`, {
auth: {
username: process.env.ONET_USERNAME,
password: process.env.ONET_PASSWORD,
},
headers: { Accept: 'application/json' },
});
if (response.data && response.data.title) {
const { what_they_do, on_the_job } = response.data;
const tasks = on_the_job?.task || [];
return res.json({
description: what_they_do || 'No description available',
tasks: tasks.length ? tasks : ['No tasks available'],
});
}
return res.status(404).json({ error: 'Career not found for SOC code' });
} catch (error) {
console.error('Error in career-description route:', error);
res.status(500).json({ error: 'Failed to fetch career description' });
}
});
// CIP route
app.get('/api/cip/:socCode', (req, res) => {
const { socCode } = req.params;
const key = String(socCode || '').trim();
const cip = CIP_BY_SOC.get(key);
if (cip) return res.json({ cipCode: cip });
return res.status(404).json({ error: 'CIP code not found' });
});
/** @aiTool {
"name": "getSchoolsForCIPs",
"description": "Return a list of schools whose CIP codes match the supplied prefixes in the given state",
"parameters": {
"type": "object",
"properties": {
"cipCodes": {
"type": "string",
"description": "Comma-separated CIP prefixes, e.g. \"1101,1103\""
},
"state": {
"type": "string",
"description": "Two-letter state abbreviation, e.g. \"GA\""
}
},
"required": ["cipCodes", "state"]
},
"pages": ["EducationalProgramsPage"],
"write": false
} */
/**************************************************
* Single schools / tuition / etc. routes
**************************************************/
app.get('/api/schools', (req, res) => {
// 1) Read `cipCodes` from query (comma-separated string)
const { cipCodes } = req.query;
if (!cipCodes) {
return res
.status(400)
.json({ error: 'cipCodes (comma-separated) and state are required.' });
}
try {
// 2) Convert `cipCodes` to array => e.g. "1101,1103,1104" => ["1101","1103","1104"]
const cipArray = cipCodes
.split(',')
.map((c) => c.trim())
.filter(Boolean);
if (cipArray.length === 0) {
return res.status(400).json({ error: 'No valid CIP codes were provided.' });
}
// 3) Load your raw schools data
let schoolsData = [];
try {
schoolsData = institutionData;
} catch (err) {
console.error('Error parsing institution data:', err.message);
return res.status(500).json({ error: 'Failed to load schools data.' });
}
// 4) Filter any school whose CIP code matches ANY of the CIP codes in the array
const filtered = schoolsData.filter((s) => {
const scip = s['CIPCODE']?.toString().replace('.', '').slice(0, 4);
return cipArray.some((cip) => scip.startsWith(cip));
});
// 5) (Optional) Deduplicate
const uniqueMap = new Map();
for (const school of filtered) {
const key = school.UNITID || school.INSTNM; // pick your unique field
if (!uniqueMap.has(key)) {
uniqueMap.set(key, school);
}
}
const deduped = Array.from(uniqueMap.values());
console.log('Unique schools found:', deduped.length);
res.json(deduped);
} catch (err) {
console.error('Error reading Institution data:', err.message);
res.status(500).json({ error: 'Failed to load schools data.' });
}
});
/** @aiTool {
"name": "getTuitionForCIPs",
"description": "Return in-state / out-state tuition rows for schools matching CIP prefixes in a given state",
"parameters": {
"type": "object",
"properties": {
"cipCodes": { "type": "string", "description": "Comma-separated prefixes, e.g. \"1101,1103\"" },
"state": { "type": "string", "description": "Two-letter state code, e.g. \"GA\"" }
},
"required": ["cipCodes", "state"]
},
"pages": ["EducationalProgramsPage", "LoanRepayment"],
"write": false
} */
// tuition
app.get('/api/tuition', (req, res) => {
const { cipCodes, state } = req.query;
if (!cipCodes || !state) {
return res.status(400).json({ error: 'cipCodes and state are required.' });
}
try {
let schoolsData = institutionData;
const cipArray = cipCodes
.split(',')
.map((c) => c.trim())
.filter(Boolean);
if (!cipArray.length) {
return res.status(400).json({ error: 'No valid CIP codes.' });
}
// Filter logic
const filtered = schoolsData.filter((school) => {
const cval = school['CIPCODE']?.toString().replace(/\./g, '').slice(0, 4);
const sVal = school['State']?.toUpperCase().trim();
// Check if cval starts with ANY CIP in cipArray
const matchesCip = cipArray.some((cip) => cval.startsWith(cip));
const matchesState = sVal === state.toUpperCase().trim();
return matchesCip && matchesState;
});
// Optionally deduplicate by UNITID
const uniqueMap = new Map();
for (const school of filtered) {
const key = school.UNITID || school.INSTNM; // or something else unique
if (!uniqueMap.has(key)) {
uniqueMap.set(key, school);
}
}
const deduped = Array.from(uniqueMap.values());
console.log('Filtered Tuition Data Count:', deduped.length);
res.json(deduped);
} catch (err) {
console.error('Error reading tuition data:', err.message);
res.status(500).json({ error: 'Failed to load tuition data.' });
}
});
/** @aiTool {
"name": "getEconomicProjections",
"description": "Return state and national employment projections for a SOC code",
"parameters": {
"type": "object",
"properties": {
"socCode": { "type": "string" },
"state": { "type": "string", "description": "Optional state abbreviation" }
},
"required": ["socCode"]
},
"pages": ["CareerExplorer"],
"write": false
} */
/**************************************************
* SINGLE route for projections from economicproj.json
**************************************************/
app.get('/api/projections/:socCode', (req, res) => {
const { socCode } = req.params;
const { state } = req.query;
const occ = String(socCode).trim();
const areaKey = String(state ? state : 'United States').trim().toLowerCase();
const rowState = PROJ_BY_KEY.get(`${occ}|${areaKey}`) || null;
const rowUS = PROJ_BY_KEY.get(`${occ}|united states`) || null;
if (!rowState && !rowUS) {
return res.status(404).json({ error: 'No projections found for this SOC + area.' });
}
return res.json({ state: rowState, national: rowUS });
});
/** @aiTool {
"name": "getSalaryData",
"description": "Return residential area and national salary percentiles for a SOC code",
"parameters": {
"type": "object",
"properties": {
"socCode": { "type": "string" },
"area": { "type": "string", "description": "User's residential area" }
},
"required": ["socCode"]
},
"pages": ["CareerExplorer"],
"write": false
} */
/**************************************************
* Salary route
**************************************************/
app.get('/api/salary', async (req, res) => {
const { socCode, area } = req.query;
if (!socCode) {
return res.status(400).json({ error: 'SOC Code is required' });
}
try {
const keyArea = String(area || ''); // allow empty → national only
const cacheKey = `${socCode}|${keyArea}`;
const cached = SALARY_CACHE.get(cacheKey);
if (cached) return res.json(cached);
// Bind regional placeholders (five times) + occ + area
const row = await SALARY_STMT.get(
keyArea, keyArea, keyArea, keyArea, keyArea, socCode, keyArea
);
const regional = {
regional_PCT10 : row?.regional_PCT10 ?? undefined,
regional_PCT25 : row?.regional_PCT25 ?? undefined,
regional_MEDIAN : row?.regional_MEDIAN ?? undefined,
regional_PCT75 : row?.regional_PCT75 ?? undefined,
regional_PCT90 : row?.regional_PCT90 ?? undefined,
};
const national = {
national_PCT10 : row?.national_PCT10 ?? undefined,
national_PCT25 : row?.national_PCT25 ?? undefined,
national_MEDIAN : row?.national_MEDIAN ?? undefined,
national_PCT75 : row?.national_PCT75 ?? undefined,
national_PCT90 : row?.national_PCT90 ?? undefined,
};
// If both are empty, 404 to match old behavior
if (
Object.values(regional).every(v => v == null) &&
Object.values(national).every(v => v == null)
) {
return res.status(404).json({ error: 'No salary data found' });
}
const payload = { regional, national };
// Tiny LRU: cap at 512 entries
SALARY_CACHE.set(cacheKey, payload);
if (SALARY_CACHE.size > SALARY_CACHE_MAX) {
const first = SALARY_CACHE.keys().next().value;
SALARY_CACHE.delete(first);
}
res.json(payload);
} catch (error) {
console.error('Error executing salary query:', error.message);
res.status(500).json({ error: 'Failed to fetch salary data' });
}
});
/**************************************************
* job-zones route
**************************************************/
app.post('/api/job-zones', async (req, res) => {
const { socCodes } = req.body;
if (!socCodes || !Array.isArray(socCodes) || socCodes.length === 0) {
return res.status(400).json({ error: 'SOC Codes are required.' });
}
try {
// Format them
const formattedSocCodes = socCodes.map((code) => {
let cleaned = code.trim().replace(/\./g, '');
if (!cleaned.includes('-') && cleaned.length === 6) {
cleaned = cleaned.slice(0, 2) + '-' + cleaned.slice(2);
}
return cleaned.slice(0, 7);
});
const placeholders = formattedSocCodes.map(() => '?').join(',');
const q = `
SELECT OCC_CODE,
JOB_ZONE,
A_MEDIAN,
A_PCT10,
A_PCT25,
A_PCT75
FROM salary_data
WHERE OCC_CODE IN (${placeholders})
`;
const rows = await dbSqlite.all(q, formattedSocCodes);
console.log('Salary Data Query Results:', rows);
const jobZoneMapping = rows.reduce((acc, row) => {
const isMissing = [row.A_MEDIAN, row.A_PCT10, row.A_PCT25, row.A_PCT75].some(
(v) => !v || v === '#' || v === '*'
);
acc[row.OCC_CODE] = {
job_zone: row.JOB_ZONE,
limited_data: isMissing ? 1 : 0,
};
return acc;
}, {});
console.log('Job Zone & Limited Data:', jobZoneMapping);
res.json(jobZoneMapping);
} catch (error) {
console.error('Error fetching job zones:', error);
res.status(500).json({ error: 'Failed to fetch job zones.' });
}
});
/**************************************************
* O*NET Skills route
**************************************************/
app.get('/api/skills/:socCode', async (req, res) => {
const { socCode } = req.params;
if (!socCode) {
return res.status(400).json({ error: 'SOC code is required' });
}
console.log(`Fetching O*NET skills for SOC Code: ${socCode}`);
try {
// 1) Build the O*NET API URL
const onetUrl = `https://services.onetcenter.org/ws/mnm/careers/${socCode}/skills`;
// 2) Call O*NET with Basic Auth
const response = await axios.get(onetUrl, {
auth: {
username: process.env.ONET_USERNAME,
password: process.env.ONET_PASSWORD,
},
headers: { Accept: 'application/json' },
});
const data = response.data || {};
const groups = data.group || [];
const skills = [];
// Flatten out the group[].element[] into a single skills array
groups.forEach((groupItem) => {
const groupName = groupItem?.title?.name || 'Unknown Group';
if (Array.isArray(groupItem.element)) {
groupItem.element.forEach((elem) => {
skills.push({
groupName,
skillId: elem.id,
skillName: elem.name,
});
});
}
});
res.json({ skills });
} catch (error) {
console.error('Error fetching O*NET skills:', error.message);
if (error.response) {
console.error('O*NET error status:', error.response.status);
console.error('O*NET error data:', error.response.data);
} else if (error.request) {
console.error('No response received from O*NET.');
console.error('Axios error.request:', error.request);
} else {
console.error('Request setup error:', error.message);
}
return res.status(500).json({ error: 'Failed to fetch O*NET skills' });
}
});
/***************************************************
* AI RISK ASSESSMENT ENDPOINT READ
****************************************************/
app.get('/api/ai-risk/:socCode', async (req, res) => {
const { socCode } = req.params;
try {
const row = await getRiskAnalysisFromDB(socCode);
if (!row) {
return res.status(404).json({ error: 'Not found' });
}
// Return full data or partial, up to you:
res.json({
socCode: row.soc_code,
careerName: row.career_name,
jobDescription: row.job_description,
tasks: row.tasks,
riskLevel: row.risk_level,
reasoning: row.reasoning,
created_at: row.created_at,
});
} catch (err) {
console.error('Error fetching AI risk:', err);
res.status(500).json({ error: 'Internal server error' });
}
});
/***************************************************
* AI RISK ASSESSMENT ENDPOINT WRITE
****************************************************/
app.post('/api/ai-risk', async (req, res) => {
try {
const {
socCode,
careerName,
jobDescription,
tasks,
riskLevel,
reasoning,
} = req.body;
if (!socCode) {
return res.status(400).json({ error: 'socCode is required' });
}
// Store / upsert row
await storeRiskAnalysisInDB({
socCode,
careerName,
jobDescription,
tasks,
riskLevel,
vectorSearch,
reasoning,
});
res.status(201).json({ message: 'AI Risk Analysis stored successfully' });
} catch (err) {
console.error('Error storing AI risk data:', err);
res.status(500).json({ error: 'Failed to store AI risk data.' });
}
});
chatFreeEndpoint(app, {
openai,
authenticateUser, // or omit if you dont need auth yet
chatLimiter,
userProfileDb
});
/**************************************************
* Support email endpoint
* Uses rate limiting to prevent abuse
* Supports deduplication
* Uses burst and daily limits
* Returns 429 Too Many Requests if limits exceeded
* Supports deduplication for 10 minutes
* *************************************************/
app.post(
'/api/support',
authenticateUser, // logged-in only
supportBurstLimiter,
supportDailyLimiter,
async (req, res) => {
try {
const user = req.user || {};
const userId = user.id || user.user_id || user.sub; // depends on your token
if (!userId) {
return res.status(401).json({ error: 'Auth required' });
}
// Prefer token email; fall back to DB; last resort: body.email
let accountEmail = user.email || user.mail || null;
if (!accountEmail) {
try {
const row = await userProfileDb.get(
'SELECT email FROM user_profile WHERE id = ?',
[userId]
);
accountEmail = row?.email || null;
} catch {}
}
// If still missing, fetch from server1 using the caller's session
if (!accountEmail) {
try {
const r = await fetch('http://server1:5000/api/user-profile?fields=email', {
method: 'GET',
headers: {
// forward caller's auth — either cookie (HttpOnly session) or bearer
'Authorization': req.headers.authorization || '',
'Cookie': req.headers.cookie || ''
}
});
if (r.ok && (r.headers.get('content-type') || '').includes('application/json')) {
const j = await r.json();
accountEmail = j?.email || null;
}
} catch { /* best-effort; fall through to error below if still null */ }
}
if (!accountEmail) {
return res.status(400).json({ error: 'No email on file for this user' });
}
const { subject = '', category = 'general', message = '' } = req.body || {};
// Basic validation
const allowedCats = new Set(['general','billing','technical','data','ux']);
const subj = subject.toString().slice(0, 120).trim();
const body = message.toString().trim();
if (!allowedCats.has(String(category))) {
return res.status(400).json({ error: 'Invalid category' });
}
if (body.length < 5) {
return res.status(400).json({ error: 'Message too short' });
}
// Dedupe
const key = makeKey(userId, subj || '(no subject)', body);
if (isDuplicateAndRemember(key)) {
return res.status(202).json({ ok: true, deduped: true });
}
// Require mail config
const FROM = 'support@aptivaai.com';
const TO = 'support@aptivaai.com';
if (!SENDGRID_KEY) {
return res.status(503).json({ error: 'Support email not configured' });
}
const humanSubject =
`[Support • ${category}] ${subj || '(no subject)'} — user ${userId}`;
const textBody =
`User: ${userId}
Email: ${accountEmail}
Category: ${category}
${body}`;
await sgMail.send({
to: TO,
from: FROM,
replyTo: accountEmail,
subject: humanSubject,
text: textBody,
html: `<pre style="font-family: ui-monospace, Menlo, monospace; white-space: pre-wrap">${textBody}</pre>`,
categories: ['support', String(category || 'general')]
});
return res.status(200).json({ ok: true });
} catch (err) {
console.error('[support] error:', err?.message || err);
return res.status(500).json({ error: 'Failed to send support message' });
}
}
);
/* ----------------- Support bot chat (server2) ----------------- */
/* CREATE thread */
app.post('/api/chat/threads', authenticateUser, async (req, res) => {
const userId = req.user.id;
const id = uuid();
const title = (req.body?.title || 'Support chat').slice(0, 200);
await pool.query(
'INSERT INTO ai_chat_threads (id,user_id,bot_type,title) VALUES (?,?, "support", ?)',
[id, userId, title]
);
res.json({ id, title });
});
/* LIST threads */
app.get('/api/chat/threads', authenticateUser, async (req, res) => {
const [rows] = await pool.query(
'SELECT id,title,updated_at FROM ai_chat_threads WHERE user_id=? AND bot_type="support" ORDER BY updated_at DESC LIMIT 50',
[req.user.id]
);
res.json({ threads: rows });
});
/* GET thread + messages */
app.get('/api/chat/threads/:id', authenticateUser, async (req, res) => {
const { id } = req.params;
const [[t]] = await pool.query(
'SELECT id FROM ai_chat_threads WHERE id=? AND user_id=? AND bot_type="support"',
[id, req.user.id]
);
if (!t) return res.status(404).json({ error: 'not_found' });
const [msgs] = await pool.query(
'SELECT role,content,created_at FROM ai_chat_messages WHERE thread_id=? ORDER BY id ASC LIMIT 200',
[id]
);
res.json({ messages: msgs });
});
/* STREAM reply via local /api/chat/free */
app.post('/api/chat/threads/:id/stream', authenticateUser, async (req, res) => {
const { id } = req.params;
const userId = req.user.id;
const { prompt = '', pageContext = '', snapshot = null } = req.body || {};
if (!prompt.trim()) return res.status(400).json({ error: 'empty' });
const [[t]] = await pool.query(
'SELECT id FROM ai_chat_threads WHERE id=? AND user_id=? AND bot_type="support"',
[id, userId]
);
if (!t) return res.status(404).json({ error: 'not_found' });
// save user msg
await pool.query(
'INSERT INTO ai_chat_messages (thread_id,user_id,role,content) VALUES (?,?, "user", ?)',
[id, userId, prompt]
);
// small history for context
const [history] = await pool.query(
'SELECT role,content FROM ai_chat_messages WHERE thread_id=? ORDER BY id ASC LIMIT 40',
[id]
);
// call local free-chat (server2 hosts /api/chat/free)
const internal = await fetch('http://server2:5001/api/chat/free', {
method: 'POST',
headers: {
'Content-Type' : 'application/json',
'Accept' : 'text/event-stream',
'Authorization': req.headers.authorization || '',
'Cookie' : req.headers.cookie || ''
},
body: JSON.stringify({ prompt, pageContext, snapshot, chatHistory: history })
});
if (!internal.ok || !internal.body) {
return res.status(502).json({ error: 'upstream_failed' });
}
// SSE-ish newline stream (matches your ChatDrawer reader)
res.writeHead(200, {
'Content-Type' : 'text/event-stream; charset=utf-8',
'Cache-Control' : 'no-cache',
'Connection' : 'keep-alive',
'X-Accel-Buffering': 'no'
});
res.flushHeaders?.();
const reader = internal.body.getReader();
const decoder = new TextDecoder();
let buf = '';
let assistant = '';
const push = (line) => {
assistant += line + '\n';
res.write(line + '\n'); // write strings, no await
};
try {
while (true) {
const { value, done } = await reader.read();
if (done) break;
if (!value) continue;
buf += decoder.decode(value, { stream: true });
let nl;
while ((nl = buf.indexOf('\n')) !== -1) {
const line = buf.slice(0, nl).trim();
buf = buf.slice(nl + 1);
if (line) push(line);
}
}
if (buf.trim()) push(buf.trim());
} catch (err) {
console.error('[support stream]', err);
res.write('Sorry — error occurred\n');
}
// persist assistant
if (assistant.trim()) {
await pool.query(
'INSERT INTO ai_chat_messages (thread_id,user_id,role,content) VALUES (?,?, "assistant", ?)',
[id, userId, assistant.trim()]
);
await pool.query('UPDATE ai_chat_threads SET updated_at=CURRENT_TIMESTAMP WHERE id=?', [id]);
}
res.end();
});
// GET /api/careers/search?query=<text>&limit=15
app.get('/api/careers/search', (req, res) => {
const q = String(req.query.query || '').trim();
const limit = Math.min(50, Math.max(1, Number(req.query.limit) || 15));
if (!q || !careersFuse) return res.json([]);
// exact match first (auto-commit parity)
const nq = normTitle(q);
const exact = CAREERS.find(c => normTitle(c.title) === nq);
// fuse results for partials
const hits = careersFuse.search(q, { limit: Math.max(limit, 25) }).map(h => h.item);
// de-dupe by normalized title; put exact first if present
const seen = new Set();
const out = [];
if (exact) {
seen.add(normTitle(exact.title));
out.push(exact);
}
for (const c of hits) {
const key = normTitle(c.title);
if (seen.has(key)) continue;
out.push(c);
seen.add(key);
if (out.length >= limit) break;
}
// return minimal shape + SOC stripped; include cip_code mirror for legacy callers
res.json(out.map(c => ({
title : c.title,
soc_code : c.soc_code,
cip_codes : c.cip_codes,
cip_code : c.cip_codes, // ← backward-compat (some callers expect singular)
limited_data : c.limited_data,
ratings : c.ratings,
})));
});
// GET /api/careers/resolve?title=<exact>
app.get('/api/careers/resolve', (req, res) => {
const t = String(req.query.title || '').trim();
if (!t) return res.status(400).json({ error: 'title required' });
const m = CAREERS.find(c => normTitle(c.title) === normTitle(t));
if (!m) return res.status(404).json({ error: 'not_found' });
res.json({
title : m.title,
soc_code : m.soc_code,
cip_codes : m.cip_codes,
cip_code : m.cip_codes, // ← mirror for legacy
limited_data : m.limited_data,
ratings : m.ratings,
});
});
app.get('/api/careers/by-soc', (req, res) => {
const raw = String(req.query.soc || '').trim();
if (!raw) return res.status(400).json({ error: 'soc required' });
const base = raw; // tolerate .00
const match =
CAREERS.find(c => c.soc_code === base) ||
null;
if (!match) return res.status(404).json({ error: 'not_found' });
res.json({
soc_code : match.soc_code,
title : match.title,
cip_codes : match.cip_codes || [],
ratings : match.ratings || {}
});
});
// GET /api/schools/suggest?query=<text>&limit=10
app.get('/api/schools/suggest', (req, res) => {
const q = String(req.query.query || '').trim().toLowerCase();
const limit = Math.min(20, Math.max(1, Number(req.query.limit) || 10));
if (!q) return res.json([]);
const seen = new Set();
const out = [];
for (const r of CIPMAP) {
const name = (r.INSTNM || '').trim();
if (!name) continue;
if (name.toLowerCase().includes(q) && !seen.has(name)) {
seen.add(name);
out.push({ name, unitId: r.UNITID || null });
if (out.length >= limit) break;
}
}
res.json(out);
});
// GET /api/programs/suggest?school=<name>&query=<text>&limit=10
app.get('/api/programs/suggest', (req, res) => {
const school = String(req.query.school || '').trim().toLowerCase();
const q = String(req.query.query || '').trim().toLowerCase();
const limit = Math.min(30, Math.max(1, Number(req.query.limit) || 10));
if (!school || !q) return res.json([]);
const seen = new Set();
const out = [];
for (const r of CIPMAP) {
const sname = (r.INSTNM || '').trim().toLowerCase();
const prog = (r.CIPDESC || '').trim();
if (!prog || sname !== school) continue;
if (prog.toLowerCase().includes(q) && !seen.has(prog)) {
seen.add(prog);
out.push({ program: prog });
if (out.length >= limit) break;
}
}
res.json(out);
});
// GET /api/programs/types?school=<name>&program=<exact>
app.get('/api/programs/types', (req, res) => {
const school = String(req.query.school || '').trim().toLowerCase();
const program = String(req.query.program || '').trim();
if (!school || !program) return res.status(400).json({ error: 'school and program required' });
const types = new Set(
CIPMAP
.filter(r =>
(r.INSTNM || '').trim().toLowerCase() === school &&
(r.CIPDESC || '').trim() === program
)
.map(r => r.CREDDESC)
.filter(Boolean)
);
res.json({ types: [...types] });
});
// GET /api/tuition/estimate?unitId=...&programType=...&inState=0|1&inDistrict=0|1&creditHoursPerYear=NN
app.get('/api/tuition/estimate', (req, res) => {
const unitId = String(req.query.unitId || '').trim();
const programType = String(req.query.programType || '').trim();
const inState = Number(req.query.inState || 0) ? 1 : 0;
const inDistrict = Number(req.query.inDistrict || 0) ? 1 : 0;
const chpy = Math.max(0, Number(req.query.creditHoursPerYear || 0));
if (!unitId) return res.status(400).json({ error: 'unitId required' });
const row = IPEDS.find(r => String(r.UNITID) === unitId);
if (!row) return res.status(404).json({ error: 'unitId not found' });
const grad = [
"Master's Degree", "Doctoral Degree", "Graduate/Professional Certificate", "First Professional Degree"
].includes(programType);
let part, full;
if (grad) {
if (inDistrict) { part = Number(row.HRCHG5 || 0); full = Number(row.TUITION5 || 0); }
else if (inState) { part = Number(row.HRCHG6 || 0); full = Number(row.TUITION6 || 0); }
else { part = Number(row.HRCHG7 || 0); full = Number(row.TUITION7 || 0); }
} else {
if (inDistrict) { part = Number(row.HRCHG1 || 0); full = Number(row.TUITION1 || 0); }
else if (inState) { part = Number(row.HRCHG2 || 0); full = Number(row.TUITION2 || 0); }
else { part = Number(row.HRCHG3 || 0); full = Number(row.TUITION3 || 0); }
}
let estimate = full;
if (chpy && chpy < 24 && part) estimate = part * chpy;
res.json({
unitId,
programType,
inState: !!inState,
inDistrict: !!inDistrict,
creditHoursPerYear: chpy,
estimate: Math.round(estimate)
});
});
app.use((err, req, res, _next) => {
if (res.headersSent) return;
const rid = req.headers['x-request-id'] || res.get('X-Request-ID') || getRequestId(req, res);
console.error(`[ref ${rid}]`, err?.message || err);
// map known cases if you have them; otherwise generic:
return res.status(500).json({ error: 'Server error', ref: rid });
});
/**************************************************
* Start the Express server
**************************************************/
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});