// STEPIK_SESSIONID= deno run --allow-net=stepik.org --allow-env=STEPIK_SESSIONID test/stepik-scraper.ts 579 > ignored/cse100.html const ROOT = 'https://stepik.org' const [courseId] = Deno.args const cookie = `sessionid=${Deno.env.get('STEPIK_SESSIONID')}` function partition ( array: T[], getKey: (value: T) => string | number ): Record { const result: Record = {} for (const item of array) { const key = getKey(item) result[key] ??= [] result[key].push(item) } return result } type StepikShoebox = { courses: { id: number summary: string instructors: number[] certificate: string requirements: string description: string // HTML sections: number[] title: string slug: string }[] 'course-review-summaries': unknown[] stepics: unknown[] users: unknown[] profiles: unknown[] } type Section = { id: number course: number units: number[] position: number title: string slug: string } type Unit = { id: number section: number lesson: number position: number } type Lesson = { id: number steps: number[] title: string slug: string } type Step = { id: number lesson: number position: number block: { name: string // either 'text' or the type of answer for the problem text: string } } const initData: StepikShoebox = await fetch( `${ROOT}/course/${courseId}/syllabus`, { headers: { cookie } } ) .then(r => r.text()) .then(html => html.match(/__stepik_shoebox__ = JSON\.parse\('(.+)'\)/)?.[1]) .then(json => JSON.parse(JSON.parse(`"${json}"`))) const [course] = initData.courses async function getApi (path: string, ids: number[]): Promise { return fetch( `${ROOT}/api/${path}?${new URLSearchParams( ids.map(id => ['ids[]', String(id)]) )}`, { headers: { cookie } } ) .then(r => r.ok ? r.json() : Promise.reject(new Error(`${r.status} HTTP error`)) ) .then(json => json[path]) } const sections = await getApi
('sections', course.sections) const units = await getApi( 'units', sections.flatMap(section => section.units) ) const unitIdMap = Object.fromEntries(units.map(unit => [unit.id, unit])) /** NOT in order! Do not iterate over this. */ const lessons = await getApi( 'lessons', units.map(unit => unit.lesson) ) const lessonIdMap = Object.fromEntries( lessons.map(lesson => [lesson.id, lesson]) ) const firstLessonMap = Object.fromEntries( sections.map(section => [unitIdMap[section.units[0]].lesson, section]) ) console.log('') console.log( '' ) console.log(`${course.summary}`) console.log('') console.log(`

${course.title}

`) console.log('
    ') for (const section of sections) { console.log('
  1. ') console.log(`${section.title}`) console.log('
      ') for (const unit of section.units) { const lesson = lessonIdMap[unitIdMap[unit].lesson] console.log('
    1. ') console.log(`${lesson.title}`) console.log('
    2. ') } console.log('
    ') console.log('
  2. ') } console.log('
') for (const unit of units) { const lesson = lessonIdMap[unit.lesson] const section = firstLessonMap[lesson.id] if (section) { console.log(`

${section.title}

`) } console.log(`

${lesson.title}

`) const steps = await getApi('steps', lesson.steps) steps.sort((a, b) => a.position - b.position) let first = true for (const step of steps) { if (step.block.name === 'text') { if (first) { first = false } else { console.log('
') } console.log(step.block.text) } } } console.log('')